Skip to content

Commit

Permalink
REFACTOR-#7242: Add type hints for modin/core/dataframe/algebra/ (#…
Browse files Browse the repository at this point in the history
…7243)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored May 13, 2024
1 parent 599da50 commit 0559fa2
Show file tree
Hide file tree
Showing 11 changed files with 283 additions and 150 deletions.
74 changes: 49 additions & 25 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@

"""Module houses builder class for Binary operator."""

from __future__ import annotations

import warnings
from typing import Optional
from typing import TYPE_CHECKING, Any, Callable, Optional, Union

import numpy as np
import pandas
Expand All @@ -24,13 +26,20 @@

from .operator import Operator

if TYPE_CHECKING:
from pandas._typing import DtypeObj

from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


def maybe_compute_dtypes_common_cast(
first,
second,
trigger_computations=False,
axis=0,
func=None,
first: PandasQueryCompiler,
second: Union[PandasQueryCompiler, dict, list, tuple, np.ndarray, str, DtypeObj],
trigger_computations: bool = False,
axis: int = 0,
func: Optional[
Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]
] = None,
) -> Optional[pandas.Series]:
"""
Precompute data types for binary operations by finding common type between operands.
Expand All @@ -39,7 +48,7 @@ def maybe_compute_dtypes_common_cast(
----------
first : PandasQueryCompiler
First operand for which the binary operation would be performed later.
second : PandasQueryCompiler, list-like or scalar
second : PandasQueryCompiler, dict, list, tuple, np.ndarray, str or DtypeObj
Second operand for which the binary operation would be performed later.
trigger_computations : bool, default: False
Whether to trigger computation of the lazy metadata for `first` and `second`.
Expand Down Expand Up @@ -155,7 +164,7 @@ def maybe_compute_dtypes_common_cast(
],
index=common_columns,
)
dtypes = pandas.concat(
dtypes: pandas.Series = pandas.concat(
[
dtypes,
pandas.Series(
Expand All @@ -168,7 +177,10 @@ def maybe_compute_dtypes_common_cast(


def maybe_build_dtypes_series(
first, second, dtype, trigger_computations=False
first: PandasQueryCompiler,
second: Union[PandasQueryCompiler, Any],
dtype: DtypeObj,
trigger_computations: bool = False,
) -> Optional[pandas.Series]:
"""
Build a ``pandas.Series`` describing dtypes of the result of a binary operation.
Expand All @@ -179,7 +191,7 @@ def maybe_build_dtypes_series(
First operand for which the binary operation would be performed later.
second : PandasQueryCompiler, list-like or scalar
Second operand for which the binary operation would be performed later.
dtype : np.dtype
dtype : DtypeObj
Dtype of the result.
trigger_computations : bool, default: False
Whether to trigger computation of the lazy metadata for `first` and `second`.
Expand Down Expand Up @@ -217,8 +229,15 @@ def maybe_build_dtypes_series(


def try_compute_new_dtypes(
first, second, infer_dtypes=None, result_dtype=None, axis=0, func=None
):
first: PandasQueryCompiler,
second: Union[PandasQueryCompiler, Any],
infer_dtypes: Optional[str] = None,
result_dtype: Optional[Union[DtypeObj, str]] = None,
axis: int = 0,
func: Optional[
Callable[[pandas.DataFrame, pandas.DataFrame], pandas.DataFrame]
] = None,
) -> Optional[pandas.Series]:
"""
Precompute resulting dtypes of the binary operation if possible.
Expand Down Expand Up @@ -285,11 +304,11 @@ class Binary(Operator):
@classmethod
def register(
cls,
func,
join_type="outer",
labels="replace",
infer_dtypes=None,
):
func: Callable[..., pandas.DataFrame],
join_type: str = "outer",
labels: str = "replace",
infer_dtypes: Optional[str] = None,
) -> Callable[..., PandasQueryCompiler]:
"""
Build template binary operator.
Expand Down Expand Up @@ -318,34 +337,39 @@ def register(
"""

def caller(
query_compiler, other, broadcast=False, *args, dtypes=None, **kwargs
):
query_compiler: PandasQueryCompiler,
other: Union[PandasQueryCompiler, Any],
broadcast: bool = False,
*args: tuple,
dtypes: Optional[Union[DtypeObj, str]] = None,
**kwargs: dict,
) -> PandasQueryCompiler:
"""
Apply binary `func` to passed operands.
Parameters
----------
query_compiler : QueryCompiler
query_compiler : PandasQueryCompiler
Left operand of `func`.
other : QueryCompiler, list-like object or scalar
other : PandasQueryCompiler, list-like object or scalar
Right operand of `func`.
broadcast : bool, default: False
If `other` is a one-column query compiler, indicates whether it is a Series or not.
Frames and Series have to be processed differently, however we can't distinguish them
at the query compiler level, so this parameter is a hint that passed from a high level API.
*args : args,
*args : tuple,
Arguments that will be passed to `func`.
dtypes : "copy", scalar dtype or None, default: None
Dtypes of the result. "copy" to keep old dtypes and None to compute them on demand.
**kwargs : kwargs,
**kwargs : dict,
Arguments that will be passed to `func`.
Returns
-------
QueryCompiler
PandasQueryCompiler
Result of binary function.
"""
axis = kwargs.get("axis", 0)
axis: int = kwargs.get("axis", 0)
if isinstance(other, type(query_compiler)) and broadcast:
assert (
len(other.columns) == 1
Expand Down
32 changes: 24 additions & 8 deletions modin/core/dataframe/algebra/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,31 @@

"""Module houses builder class for Fold operator."""

from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Optional

from .operator import Operator

if TYPE_CHECKING:
import pandas

from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler


class Fold(Operator):
"""Builder class for Fold functions."""

@classmethod
def register(cls, fold_function):
def register(
cls, fold_function: Callable[..., pandas.DataFrame]
) -> Callable[..., PandasQueryCompiler]:
"""
Build Fold operator that will be performed across rows/columns.
Parameters
----------
fold_function : callable(pandas.DataFrame) -> pandas.DataFrame
fold_function : callable(pandas.DataFrame, *args, **kwargs) -> pandas.DataFrame
Function to apply across rows/columns.
Returns
Expand All @@ -35,25 +46,30 @@ def register(cls, fold_function):
Function that takes query compiler and executes Fold function.
"""

def caller(query_compiler, fold_axis=None, *args, **kwargs):
def caller(
query_compiler: PandasQueryCompiler,
fold_axis: Optional[int] = None,
*args: tuple,
**kwargs: dict,
) -> PandasQueryCompiler:
"""
Execute Fold function against passed query compiler.
Parameters
----------
query_compiler : BaseQueryCompiler
query_compiler : PandasQueryCompiler
The query compiler to execute the function on.
fold_axis : int, optional
0 or None means apply across full column partitions. 1 means
apply across full row partitions.
*args : iterable
Additional arguments passed to fold_function.
*args : tuple
Additional arguments passed to `fold_function`.
**kwargs: dict
Additional keyword arguments passed to fold_function.
Additional keyword arguments passed to `fold_function`.
Returns
-------
BaseQueryCompiler
PandasQueryCompiler
A new query compiler representing the result of executing the
function.
"""
Expand Down
Loading

0 comments on commit 0559fa2

Please sign in to comment.