Skip to content

Commit

Permalink
Docstrings API examples (#648)
Browse files Browse the repository at this point in the history
* Example for `FormulaicTransformer`

* Example for `IdentityTransformer`

* Example for `PandasTypeSelector`

* Example for `InformationFilter`
 
* Example for `RepeatingBasisFunction`

* Fix `Examples` keyword in docstring

---------

Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
  • Loading branch information
anopsy and FBruzzesi authored Apr 10, 2024
1 parent 5e53190 commit 35dd279
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 4 deletions.
4 changes: 2 additions & 2 deletions sklego/preprocessing/dictmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class DictMapper(TransformerMixin, BaseEstimator):
dim_ : int
Deprecated, please use `n_features_in_` instead.
Example
-------
Examples
--------
```py
import pandas as pd
from sklego.preprocessing.dictmapper import DictMapper
Expand Down
27 changes: 27 additions & 0 deletions sklego/preprocessing/formulaictransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,33 @@ class FormulaicTransformer(TransformerMixin, BaseEstimator):
The parsed model specification.
n_features_in_ : int
Number of features seen during `fit`.
Examples
--------
```py
import formulaic
import pandas as pd
import numpy as np
from sklego.preprocessing import FormulaicTransformer
df = pd.DataFrame({
'a': ['A', 'B', 'C'],
'b': [0.3, 0.1, 0.2],
})
#default type of returned matrix - numpy
FormulaicTransformer("a + b + a:b").fit_transform(df)
# array([[1. , 0. , 0. , 0.3, 0. , 0. ],
# [1. , 1. , 0. , 0.1, 0.1, 0. ],
# [1. , 0. , 1. , 0.2, 0. , 0.2]])
#pandas return type
FormulaicTransformer("a + b + a:b", "pandas").fit_transform(df)
# Intercept a[T.B] a[T.C] b a[T.B]:b a[T.C]:b
#0 1.0 0 0 0.3 0.0 0.0
#1 1.0 1 0 0.1 0.1 0.0
#2 1.0 0 1 0.2 0.0 0.2
```
"""

def __init__(self, formula, return_type="numpy"):
Expand Down
25 changes: 25 additions & 0 deletions sklego/preprocessing/identitytransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,31 @@ class IdentityTransformer(BaseEstimator, TransformerMixin):
The number of features seen during `fit`.
shape_ : tuple[int, int]
Deprecated, please use `n_samples_` and `n_features_in_` instead.
Examples
--------
```py
import pandas as pd
from sklego.preprocessing import IdentityTransformer
df = pd.DataFrame({
"name": ["Swen", "Victor", "Alex"],
"length": [1.82, 1.85, 1.80],
"shoesize": [42, 44, 45]
})
IdentityTransformer().fit_transform(df)
# name length shoesize
# 0 Swen 1.82 42
# 1 Victor 1.85 44
# 2 Alex 1.80 45
#using check_X=True to validate `X` to be non-empty 2D array of finite values and attempt to cast `X` to float
IdentityTransformer(check_X=True).fit_transform(df.drop(columns="name"))
# array([[ 1.82, 42. ],
# [ 1.85, 44. ],
# [ 1.8 , 45. ]])
```
"""

def __init__(self, check_X: bool = False):
Expand Down
4 changes: 2 additions & 2 deletions sklego/preprocessing/outlier_remover.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ class OutlierRemover(TrainOnlyTransformerMixin, BaseEstimator):
estimator_ : object
The fitted outlier detector.
Example
-------
Examples
--------
```py
import numpy as np
Expand Down
27 changes: 27 additions & 0 deletions sklego/preprocessing/pandastransformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,33 @@ class PandasTypeSelector(BaseEstimator, TransformerMixin):
!!! warning
Raises a `TypeError` if input provided is not a DataFrame.
Examples
--------
```py
import pandas as pd
from sklego.preprocessing import PandasTypeSelector
df = pd.DataFrame({
"name": ["Swen", "Victor", "Alex"],
"length": [1.82, 1.85, 1.80],
"shoesize": [42, 44, 45]
})
#Excluding single column
PandasTypeSelector(exclude="int64").fit_transform(df)
# name length
#0 Swen 1.82
#1 Victor 1.85
#2 Alex 1.80
#Including multiple columns
PandasTypeSelector(include=["int64", "object"]).fit_transform(df)
# name shoesize
#0 Swen 42
#1 Victor 44
#2 Alex 45
```
"""

def __init__(self, include=None, exclude=None):
Expand Down
18 changes: 18 additions & 0 deletions sklego/preprocessing/projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,24 @@ class InformationFilter(BaseEstimator, TransformerMixin):
The projection matrix that can be used to filter information out of a dataset.
col_ids_ : List[int] of length `len(columns)`
The list of column ids of the sensitive columns.
Examples
--------
```py
import pandas as pd
from sklego.preprocessing import InformationFilter
df = pd.DataFrame({
"user_id": [101, 102, 103],
"length": [1.82, 1.85, 1.80],
"age": [21, 37, 45]
})
InformationFilter(columns=["length", "age"], alpha=0.5).fit_transform(df)
# array([[50.10152483, 3.87905643],
# [50.26253897, 19.59684308],
# [52.66084873, 28.06719867]])
```
"""

def __init__(self, columns, alpha=1):
Expand Down
16 changes: 16 additions & 0 deletions sklego/preprocessing/repeatingbasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ class RepeatingBasisFunction(TransformerMixin, BaseEstimator):
----------
pipeline_ : ColumnTransformer
Fitted `ColumnTransformer` object used to transform data with repeating basis functions.
Examples
--------
```py
import pandas as pd
from sklego.preprocessing import RepeatingBasisFunction
df = pd.DataFrame({
"user_id": [101, 102, 103],
"created_day": [5, 1, 7]
})
RepeatingBasisFunction(column="created_day", input_range=(1,7)).fit_transform(df)
# array([[0.06217652, 0.00432024, 0.16901332, 0.89483932, 0.64118039],
# [1. , 0.36787944, 0.01831564, 0.01831564, 0.36787944],
# [1. , 0.36787944, 0.01831564, 0.01831564, 0.36787944]])
```
"""

def __init__(self, column=0, remainder="drop", n_periods=12, input_range=None, width=1.0):
Expand Down

0 comments on commit 35dd279

Please sign in to comment.