Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

153 add type hints to functions #157

Merged
merged 8 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ syntax: glob
env/*
venv/*
ENV/*
.idea/*
.vscode/*
.idea
.vscode
.DS_Store
dython.egg*/*
*__pycache__*
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Change Log

# 0.7.5 _(dev)_
* Adding type hints to all functions (issue [#153](https://github.com/shakedzy/dython/issues/153))
* Dropping dependency in `scikit-plot` as it is no longer maintained (issue [#156](https://github.com/shakedzy/dython/issues/156))

## 0.7.4
* Handling running plotting functions with `plot=False` in Jupyter and truly avoid plotting (issue [#147](https://github.com/shakedzy/dython/issues/147))

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ If you wish to install from source:
pip install git+https://github.com/shakedzy/dython.git
```

**Dependencies:** `numpy`, `pandas`, `seaborn`, `scipy`, `matplotlib`, `sklearn`, `scikit-plot`
**Dependencies:** `numpy`, `pandas`, `seaborn`, `scipy`, `matplotlib`, `sklearn`

## Contributing:
Contributions are always welcomed - if you found something you can fix, or have an idea for a new feature, feel free to write it and open a pull request. Please make sure to go over the [contributions guidelines](https://github.com/shakedzy/dython/blob/master/CONTRIBUTING.md).
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7.4
0.7.5.dev
30 changes: 22 additions & 8 deletions dython/_private.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,34 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.typing import NDArray
from typing import Optional, Any, Tuple, Union, List, Literal
from .typing import Number, OneDimArray

IS_JUPYTER = None

IS_JUPYTER: bool = False

def set_is_jupyter(force_to=None):

def set_is_jupyter(force_to: Optional[bool] = None) -> None:
global IS_JUPYTER
if force_to is not None:
IS_JUPYTER = force_to
else:
IS_JUPYTER = "ipykernel_launcher.py" in sys.argv[0]


def plot_or_not(plot):
def plot_or_not(plot: bool) -> None:
if plot:
plt.show()
elif not plot and IS_JUPYTER:
plt.close()


def convert(data, to, copy=True):
def convert(
data: Union[List[Number], NDArray, pd.DataFrame],
to: Literal["array", "list", "dataframe"],
copy: bool = True,
) -> Union[List[Number], NDArray, pd.DataFrame]:
converted = None
if to == "array":
if isinstance(data, np.ndarray):
Expand All @@ -31,7 +39,7 @@ def convert(data, to, copy=True):
elif isinstance(data, list):
converted = np.array(data)
elif isinstance(data, pd.DataFrame):
converted = data.values()
converted = data.values() # type: ignore
elif to == "list":
if isinstance(data, list):
converted = data.copy() if copy else data
Expand All @@ -53,10 +61,12 @@ def convert(data, to, copy=True):
)
)
else:
return converted
return converted # type: ignore


def remove_incomplete_samples(x, y):
def remove_incomplete_samples(
x: Union[List[Any], OneDimArray], y: Union[List[Any], OneDimArray]
) -> Tuple[Union[List[Any], OneDimArray], Union[List[Any], OneDimArray]]:
x = [v if v is not None else np.nan for v in x]
y = [v if v is not None else np.nan for v in y]
arr = np.array([x, y]).transpose()
Expand All @@ -67,7 +77,11 @@ def remove_incomplete_samples(x, y):
return arr[0], arr[1]


def replace_nan_with_value(x, y, value):
def replace_nan_with_value(
x: Union[List[Any], OneDimArray],
y: Union[List[Any], OneDimArray],
value: Any,
) -> Tuple[NDArray, NDArray]:
x = np.array(
[v if v == v and v is not None else value for v in x]
) # NaN != NaN
Expand Down
53 changes: 31 additions & 22 deletions dython/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional, Tuple, List, Any, Union
from numpy.typing import NDArray
from .typing import Number, TwoDimArray
from ._private import convert, plot_or_not


Expand All @@ -12,7 +15,10 @@
]


def one_hot_encode(arr, classes=None):
def one_hot_encode(
array: Union[List[Union[Number, str]], NDArray],
classes: Optional[int] = None,
) -> NDArray:
"""
One-hot encode a 1D array.
Based on this StackOverflow answer: https://stackoverflow.com/a/29831596/5863503
Expand All @@ -35,31 +41,31 @@ def one_hot_encode(arr, classes=None):
[1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1.]])
"""
arr = convert(arr, "array").astype(int)
arr: NDArray = convert(array, "array").astype(int) # type: ignore
if not len(arr.shape) == 1:
raise ValueError(
f"array must have only one dimension, but has shape: {arr.shape}"
)
if arr.min() < 0:
raise ValueError("array cannot contain negative values")
classes = classes if classes is not None else arr.max() + 1
h = np.zeros((arr.size, classes))
h = np.zeros((arr.size, classes)) # type: ignore
h[np.arange(arr.size), arr] = 1
return h


def split_hist(
dataset,
values,
split_by,
title="",
xlabel="",
ylabel=None,
figsize=None,
legend="best",
plot=True,
dataset: pd.DataFrame,
values: str,
split_by: str,
title: Optional[str] = "",
xlabel: Optional[str] = "",
ylabel: Optional[str] = None,
figsize: Optional[Tuple[int, int]] = None,
legend: Optional[str] = "best",
plot: bool = True,
**hist_kwargs,
):
) -> plt.Axes:
"""
Plot a histogram of values from a given dataset, split by the values of a chosen column

Expand Down Expand Up @@ -88,7 +94,7 @@ def split_hist(

Returns:
--------
A Matplotlib `Axe`
A Matplotlib `Axes`

Example:
--------
Expand All @@ -111,13 +117,16 @@ def split_hist(
if title == "":
title = values + " by " + split_by
plt.title(title)
plt.ylabel(ylabel)
if ylabel:
plt.ylabel(ylabel)
ax = plt.gca()
plot_or_not(plot)
return ax


def identify_columns_by_type(dataset, include):
def identify_columns_by_type(
dataset: TwoDimArray, include: List[str]
) -> List[Any]:
"""
Given a dataset, identify columns of the types requested.

Expand All @@ -138,12 +147,12 @@ def identify_columns_by_type(dataset, include):
['col2', 'col3']

"""
dataset = convert(dataset, "dataframe")
columns = list(dataset.select_dtypes(include=include).columns)
df: pd.DataFrame = convert(dataset, "dataframe") # type: ignore
columns = list(df.select_dtypes(include=include).columns)
return columns


def identify_columns_with_na(dataset):
def identify_columns_with_na(dataset: TwoDimArray) -> pd.DataFrame:
"""
Return columns names having NA values, sorted in descending order by their number of NAs

Expand All @@ -164,10 +173,10 @@ def identify_columns_with_na(dataset):
1 col2 2
0 col1 1
"""
dataset = convert(dataset, "dataframe")
na_count = [sum(dataset[cc].isnull()) for cc in dataset.columns]
df: pd.DataFrame = convert(dataset, "dataframe") # type: ignore
na_count = [sum(df[cc].isnull()) for cc in df.columns]
return (
pd.DataFrame({"column": dataset.columns, "na_count": na_count})
pd.DataFrame({"column": df.columns, "na_count": na_count})
.query("na_count > 0")
.sort_values("na_count", ascending=False)
)
8 changes: 6 additions & 2 deletions dython/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def roc_graph_example():
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)

# Plot ROC graphs
return metric_graph(y_test, y_score, "roc", class_names=iris.target_names)
return metric_graph(
y_test, y_score, "roc", class_names_list=iris.target_names
)


def pr_graph_example():
Expand Down Expand Up @@ -73,7 +75,9 @@ def pr_graph_example():
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)

# Plot PR graphs
return metric_graph(y_test, y_score, "pr", class_names=iris.target_names)
return metric_graph(
y_test, y_score, "pr", class_names_list=iris.target_names
)


def associations_iris_example():
Expand Down
Loading
Loading