Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed dfply documentation on verbs, created longs of those verbs, and improved dflpy import. #110

Merged
merged 8 commits into from
Jun 30, 2021
76 changes: 2 additions & 74 deletions grama/__init__.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,9 @@
from .tools import *
from .core import *

# Integrate dfply tools
# Import dfply
# --------------------------------------------------
from .dfply import Intention, dfdelegate, make_symbolic, convert_type
from .dfply import var_in, is_nan, not_nan
from .dfply import starts_with, ends_with, contains, matches, everything
from .dfply import num_range, one_of, columns_between, columns_from, columns_to

# group.py
from .dfply import group_by as tf_group_by
from .dfply import ungroup as tf_ungroup

# join.py
from .dfply import inner_join as tf_inner_join
from .dfply import full_join as tf_full_join
from .dfply import outer_join as tf_outer_join
from .dfply import left_join as tf_left_join
from .dfply import right_join as tf_right_join
from .dfply import semi_join as tf_semi_join
from .dfply import anti_join as tf_anti_join
from .dfply import bind_rows as tf_bind_rows
from .dfply import bind_cols as tf_bind_cols

# reshape.py
from .dfply import arrange as tf_arrange
from .dfply import rename as tf_rename
from .dfply import separate as tf_separate
from .dfply import unite as tf_unite
from .dfply import gather as tf_gather
from .dfply import spread as tf_spread
from .dfply import explode as tf_explode

# select.py
from .dfply import select as tf_select
from .dfply import select_if as tf_select_if
from .dfply import drop as tf_drop
from .dfply import drop_if as tf_drop_if

# set_ops.py
from .dfply import union as tf_union
from .dfply import intersect as tf_intersect
from .dfply import set_diff as tf_set_diff

# subset.py
from .dfply import head as tf_head
from .dfply import tail as tf_tail
from .dfply import sample as tf_sample
from .dfply import distinct as tf_distinct
from .dfply import row_slice as tf_row_slice
from .dfply import mask as tf_filter
from .dfply import top_n as tf_top_n
from .dfply import pull as tf_pull
from .dfply import dropna as tf_dropna

# summarize.py
from .dfply import summarize as tf_summarize
from .dfply import summarize_each as tf_summarize_each

# summary_functions.py
from .dfply import mean, first, last, nth, n, n_distinct, IQR, quant
from .dfply import colmin, colmax, colsum, median, var, sd, binomial_ci
from .dfply import mse, rmse, ndme, rsq
from .dfply import corr

# transform.py
from .dfply import mutate as tf_mutate
from .dfply import mutate_if as tf_mutate_if
from .dfply import transmute as tf_transmute

# vector.py
from .dfply import order_series_by, desc, coalesce, case_when, if_else, na_if

# window_functions.py
from .dfply import lead, lag, between, dense_rank, min_rank
from .dfply import cumsum, cummean, cummax, cummin, cumprod, cumany, cumall
from .dfply import percent_rank, row_number
from .dfply import *
zdelrosario marked this conversation as resolved.
Show resolved Hide resolved

# Add functionality to dfply
from .string_helpers import *
Expand Down
5 changes: 3 additions & 2 deletions grama/comp_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
"cp_marginals",
]

from collections import ChainMap
import grama as gr
from collections import ChainMap
from grama import add_pipe, pipe
from toolz import curry
from pandas import concat, DataFrame
from toolz import curry


## Model Building Interface (MBI) tools
##################################################
Expand Down
1 change: 1 addition & 0 deletions grama/comp_metamodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from grama import add_pipe, pipe
from toolz import curry


## Fit a metamodel
# --------------------------------------------------
@curry
Expand Down
35 changes: 11 additions & 24 deletions grama/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,26 @@
"NaN",
]

from abc import ABC, abstractmethod
import copy

from numpy import (
ones,
zeros,
triu_indices,
eye,
array,
Inf,
NaN,
sqrt,
dot,
diag,
isfinite,
)
import networkx as nx
import warnings
import grama as gr
from grama import pipe, valid_dist, param_dist
from abc import ABC, abstractmethod
from itertools import chain
from numpy import ones, zeros, triu_indices, eye, array, Inf, NaN, sqrt, \
dot, diag, isfinite
from numpy import min as npmin
from numpy import max as npmax
from numpy.linalg import cholesky
from numpy.random import random, multivariate_normal
from numpy.random import seed as set_seed
from pandas import DataFrame, concat
from scipy.linalg import det, LinAlgError, solve
from scipy.optimize import root_scalar
from scipy.stats import norm, gaussian_kde
from pandas import DataFrame, concat

import grama as gr
from grama import pipe, valid_dist, param_dist

from itertools import chain
from numpy.linalg import cholesky
from toolz import curry
import warnings
import networkx as nx


## Package settings
RUNTIME_LOWER = 1 # Cutoff threshold for runtime messages
Expand Down
1 change: 1 addition & 0 deletions grama/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pandas import read_csv
from pathlib import Path


path_this = Path(__file__)
path_grama = path_this.parents[1]

Expand Down
4 changes: 2 additions & 2 deletions grama/dfply/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
from .mask_helpers import *
from .reshape import *
from .select import *
from .select import *
from .set_ops import *
from .subset import *
from .summarize import *
from .transform import *
from .summary_functions import *
from .window_functions import *
from .vector import *
from .window_functions import *


for verb in dir():
if "ize" in verb:
Expand Down
87 changes: 47 additions & 40 deletions grama/dfply/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import pandas as pd
import numpy as np
__all__ = [
"Intention",
"dfdelegate",
"make_symbolic",
"symbolic_evaluation",
"group_delegation",
"flatten"
]

import warnings
from .. import pipe, add_pipe # Use grama pipe to preserve metadata
from functools import partial, wraps

from .. import pipe # Use grama pipe to preserve metadata
from numpy import zeros, array
from pandas import Series, Index, DataFrame


def _recursive_apply(f, l):
Expand All @@ -12,8 +20,7 @@ def _recursive_apply(f, l):
if isinstance(l, tuple):
out = tuple(out)
return out
else:
return f(l)
return f(l)


def contextualize(arg, context):
Expand Down Expand Up @@ -60,8 +67,7 @@ def wrapper(*args, **kwargs):
if delay:
delayed = _delayed_function(f, args, kwargs)
return Intention(delayed)
else:
return f(*args, **kwargs)
return f(*args, **kwargs)

## Preserve documentation
wrapper.__doc__ = f.__doc__
Expand Down Expand Up @@ -227,9 +233,9 @@ def _evaluate_label(self, df, arg):
arg = self._evaluate(df, arg)

cols = list(df.columns)
if isinstance(arg, pd.Series):
if isinstance(arg, Series):
arg = arg.name
if isinstance(arg, pd.Index):
if isinstance(arg, Index):
arg = list(arg)
if isinstance(arg, int):
arg = cols[arg]
Expand All @@ -242,11 +248,11 @@ def _evaluate_selector(self, df, arg):
arg = arg.evaluate(df)

cols = list(df.columns)
if isinstance(arg, pd.Series):
if isinstance(arg, Series):
arg = [cols.index(arg.name)]
if isinstance(arg, pd.Index):
if isinstance(arg, Index):
arg = [cols.index(i) for i in list(arg)]
if isinstance(arg, pd.DataFrame):
if isinstance(arg, DataFrame):
arg = [cols.index(i) for i in arg.columns]
if isinstance(arg, int):
arg = [arg]
Expand All @@ -255,8 +261,8 @@ def _evaluate_selector(self, df, arg):
if isinstance(arg, (list, tuple)):
arg = [cols.index(i) if isinstance(i, str) else i for i in arg]

selection_vector = np.zeros(df.shape[1])
col_idx = np.array(arg)
selection_vector = zeros(df.shape[1])
col_idx = array(arg)

if negate and len(col_idx) > 0:
selection_vector[col_idx] = -1
Expand All @@ -267,8 +273,7 @@ def _evaluate_selector(self, df, arg):
def _evaluator_loop(self, df, arg, eval_func):
if isinstance(arg, (list, tuple)):
return [self._evaluator_loop(df, a_, eval_func) for a_ in arg]
else:
return eval_func(df, arg)
return eval_func(df, arg)

def _symbolic_eval(self, df, arg):
return self._evaluator_loop(df, arg, self._evaluate)
Expand Down Expand Up @@ -314,16 +319,16 @@ def _recursive_kwarg_eval(self, df, kwargs):
}

def _find_eval_args(self, request, args):
if (request == True) or ("*" in request):
if (request is True) or ("*" in request):
return [i for i in range(len(args))]
elif request in [None, False]:
if request in [None, False]:
return []
return request

def _find_eval_kwargs(self, request, kwargs):
if (request == True) or ("**" in request):
if (request is True) or ("**" in request):
return [k for k in kwargs.keys()]
elif request in [None, False]:
if request in [None, False]:
return []
return request

Expand All @@ -341,21 +346,21 @@ def symbolic_evaluation(
):
if function:
return IntentionEvaluator(function)
else:

@wraps(function)
def wrapper(function):
return IntentionEvaluator(
function,
eval_symbols=eval_symbols,
eval_as_label=eval_as_label,
eval_as_selector=eval_as_selector,
)

return wrapper
@wraps(function)
def wrapper(function):
return IntentionEvaluator(
function,
eval_symbols=eval_symbols,
eval_as_label=eval_as_label,
eval_as_selector=eval_as_selector,
)

return wrapper


class group_delegation(object):

__name__ = "group_delegation"

def __init__(self, function):
Expand Down Expand Up @@ -386,19 +391,21 @@ def __call__(self, *args, **kwargs):
grouped_by = getattr(args[0], "_grouped_by", None)
if (grouped_by is None) or not all([g in args[0].columns for g in grouped_by]):
return self.function(*args, **kwargs)
else:
applied = self._apply(args[0], *args[1:], **kwargs)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
applied._grouped_by = grouped_by
applied = self._apply(args[0], *args[1:], **kwargs)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
applied._grouped_by = grouped_by

return applied
return applied


def dfpipe(f):
return pipe(group_delegation(symbolic_evaluation(f)))
return add_pipe(group_delegation(symbolic_evaluation(f)))


def dfdelegate(f):
return group_delegation(symbolic_evaluation(f))
class addName(group_delegation):
__name__ = f.__name__
return addName(group_delegation(symbolic_evaluation(f)))
zdelrosario marked this conversation as resolved.
Show resolved Hide resolved
20 changes: 15 additions & 5 deletions grama/dfply/group.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
from .base import *
__all__ = [
"tran_group_by",
"tf_group_by",
"tran_ungroup",
"tf_ungroup",
]

from .base import symbolic_evaluation
from .. import add_pipe


@pipe
@symbolic_evaluation(eval_as_label=True)
def group_by(df, *args):
def tran_group_by(df, *args):
df._grouped_by = list(args)
return df

tf_group_by = add_pipe(tran_group_by)


@pipe
def ungroup(df):
def tran_ungroup(df):
df._grouped_by = None
return df

tf_ungroup = add_pipe(tran_ungroup)
Loading