diff --git a/grama/comp_building.py b/grama/comp_building.py index 491a7f09..7d08b263 100644 --- a/grama/comp_building.py +++ b/grama/comp_building.py @@ -87,7 +87,7 @@ def _comp_function_data(model, fun, var, out, name, runtime): def comp_function(model, fun=None, var=None, out=None, name=None, runtime=0): r"""Add a function to a model - Composition. Add a function to an existing model. + Composition. Add a (non-vectorized) function to an existing model. See ``gr.comp_vec_function()`` to add a function that is vectorized over DataFrames. Args: model (gr.model): Model to compose @@ -102,16 +102,19 @@ def comp_function(model, fun=None, var=None, out=None, name=None, runtime=0): @pre (len(var) == d) | (var == d) @pre (len(out) == r) | (var == r) - Examples: - - >>> import grama as gr - >>> md = gr.Model("test") >> \ - >>> gr.function( - >>> fun=lambda x: x, - >>> var=1, - >>> out=["y"], - >>> name="identity" - >>> ) + Examples:: + + import grama as gr + ## Simple example + md = ( + gr.Model("test") + >> gr.cp_function( + fun=lambda x: x, + var=["x"], + out=["y"], + name="identity" + ) + ) """ model_new = model.copy() @@ -138,7 +141,10 @@ def comp_vec_function(model, fun=None, var=None, out=None, name=None, runtime=0) Composition. Add a function to an existing model. Function must be vectorized over DataFrames, and must add new columns matching its `out` - set. + set. See ``gr.cp_function()`` to add a non-vectorized function. + + Notes: + The helper function ``gr.df_make()`` is useful for constructing a vectorized lambda function (see Examples below). Args: model (gr.model): Model to compose @@ -153,18 +159,19 @@ def comp_vec_function(model, fun=None, var=None, out=None, name=None, runtime=0) @pre (len(var) == d) | (var == d) @pre (len(out) == r) | (var == r) - Examples: - - >>> import grama as gr - >>> md = ( - >>> gr.Model("Test") - >>> >> gr.cp_vec_function( - >>> fun=lambda df: gr.df_make(y=1 + 0.5 * df.x), - >>> var=["x"], - >>> out=["y"], - >>> name="Simple linear function", - >>> ) - >>> ) + Examples:: + + import grama as gr + ## Simple example + md = ( + gr.Model("Test") + >> gr.cp_vec_function( + fun=lambda df: gr.df_make(y=1 + 0.5 * df.x), + var=["x"], + out=["y"], + name="Simple linear function", + ) + ) """ model_new = model.copy() @@ -199,7 +206,20 @@ def comp_md_det(model, md=None): Returns: gr.model: New model with added function - Examples: + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + ## Use functions from beam model, but introduce new marginals + md_plate = ( + gr.Model("New beam model") + >> gr.cp_md_det(md=make_cantilever_beam()) + >> gr.cp_marginals( + H=gr.marg_mom("norm", mean=1000, cov=0.1), + V=gr.marg_mom("norm", mean=500, cov=0.1), + ) + >> gr.cp_copula_independence() + ) """ if md is None: @@ -235,7 +255,7 @@ def comp_md_sample(model, md=None, param=None, rand2out=False): Returns: gr.model: New model with added function - Examples: + Examples:: """ ## Check invariants @@ -307,32 +327,34 @@ def _ev(md, df): def comp_bounds(model, **kwargs): r"""Add variable bounds to a model - Composition. Add variable bounds to an existing model. Bounds are specified - by iterable; the model variable name is specified by the keyword argument - name. + Composition. Add variable bounds to an existing model. Bounds are specified by iterable; the model variable name is specified by the keyword argument name. Args: model (gr.model): Model to modify - var (iterable): Bound information + + Kwargs: + var (iterable): Bound information; keyword argument name is targeted variable, value should be a length 2 iterable of the form (lower_bound, upper_bound) Returns: gr.model: Model with new marginals @pre len(var) >= 2 - Examples: + Examples:: - >>> import grama as gr - >>> md = gr.Model() >> \ - >>> cp_function( - >>> lambda x: x[0] + x[1], - >>> var=["x0", "x1"], - >>> out=1 - >>> ) >> \ - >>> cp_bounds( - >>> x0=(-1, 1), - >>> x1=(0, np.inf) - >>> ) + import grama as gr + md = ( + gr.Model("Simple Model") + >> gr.cp_function( + lambda x: x[0] + x[1], + var=["x0", "x1"], + out=1 + ) + >> gr.cp_bounds( + x0=(-1, 1), # Finite bounds + x1=(0, np.inf) # Semi-infinite bounds + ) + ) """ new_model = model.copy() @@ -358,6 +380,13 @@ def comp_marginals(model, **kwargs): either by dictionary entries or by gr.Marginal() object. The model variable name is specified by the keyword argument name. + Notes: + Several helper functions are available to fit marginal distributions + + - ``gr.marg_fit()`` fits a distribution using a dataset (via maximum likelihood estimation) + - ``gr.marg_mom()`` fits a distribution using moments (via the method of moments) + - ``gr.marg_gkde()`` fits a gaussian kernel density using a dataset + Args: model (gr.model): Model to modify var (dict OR gr.Marginal): Marginal information @@ -365,19 +394,23 @@ def comp_marginals(model, **kwargs): Returns: gr.model: Model with new marginals - Examples: - - >>> import grama as gr - >>> print(gr.valid_dist.keys()) # Supported distributions - >>> md = gr.Model() >> \ - >>> cp_function( - >>> lambda x: x[0] + x[1], - >>> var=["x0", "x1"], - >>> out=1 - >>> ) >> \ - >>> cp_marginals( - >>> x0={"dist": "norm", "loc": 0, "scale": 1} - >>> ) + Examples:: + + import grama as gr + ## Print all of the grama-supported distributions + print(gr.valid_dist.keys()) + ## Construct a simple example model + md = ( + gr.Model() + >> gr.cp_function( + lambda x: x[0] + x[1], + var=["x0", "x1"], + out=["y"], + ) + >> gr.cp_marginals( + x0=gr.marg_mom("norm", mean=0, sd=1), + ) + ) """ new_model = model.copy() @@ -423,8 +456,7 @@ def comp_copula_independence(model): Composition. Add an independence copula to an existing model. - NOTE: Independence of random variables is a *very* strong assumption! - Recommend using comp_copula_gaussian instead. + NOTE: Independence of random variables is a *very* strong assumption! Recommend using comp_copula_gaussian instead. Args: model (gr.model): Model to modify @@ -432,12 +464,17 @@ def comp_copula_independence(model): Returns: gr.model: Model with independence copula - >>> import grama as gr - >>> md = gr.Model() >> \ - >>> cp_marginals( - >>> x0={"dist": "norm", "loc": 0, "scale": 1} - >>> ) >> \ - >>> cp_copula_independence() + Examples:: + + import grama as gr + md = ( + gr.Model() + >> gr.cp_marginals( + x0=gr.marg_mom("norm", mean=0, sd=1), + x1=gr.marg_mom("beta", mean=0, sd=1, skew=0, kurt=2), + ) + >> gr.cp_copula_independence() + ) """ new_model = model.copy() @@ -467,31 +504,31 @@ def comp_copula_gaussian(model, df_corr=None, df_data=None): Returns: gr.model: Model with Gaussian copula - Examples: - - >>> import grama as gr - >>> ## Manual assignment - >>> md = gr.Model() >> \ - >>> cp_marginals( - >>> x0={"dist": "norm", "loc": 0, "scale": 1} - >>> x1={"dist": "uniform", "loc": -1, "scale": 2} - >>> ) >> \ - >>> cp_copula_gaussian( - >>> df_corr=pd.DataFrame(dict( - >>> var1=["x0"], - >>> var2=["x1"], - >>> corr=[0.5] - >>> )) - >>> ) - >>> ## Automated fitting - >>> from grama.data import df_stang - >>> md = gr.Model() >> \ - >>> gr.cp_marginals( - >>> E=gr.marg_named(df_stang.E, "norm"), - >>> mu=gr.marg_named(df_stang.mu, "beta"), - >>> thick=gr.marg_named(df_stang.thick, "norm") - >>> ) >> \ - >>> gr.cp_copula_gaussian(df_data=df_stang) + Examples:: + + import grama as gr + ## Manual assignment + md_manual = (gr.Model() + >> gr.cp_marginals( + x0=gr.marg_mom("norm", mean=0, sd=1), + x1=gr.marg_mom("uniform", mean=0, sd=1), + ) + >> gr.cp_copula_gaussian( + # Specify correlation structure explicitly + df_corr=gr.df_make(var1="x0", var2="x1", corr=0.5) + ) + ) + ## Automated fitting + from grama.data import df_stang + md_auto = ( + gr.Model() + >> gr.cp_marginals( + E=gr.marg_fit("norm", df_stang.E), + mu=gr.marg_fit("beta", df_stang.mu), + thick=gr.marg_fit("norm", df_stang.thick) + ) + >> gr.cp_copula_gaussian(df_data=df_stang) + ) """ if not (df_corr is None): diff --git a/grama/dataframe.py b/grama/dataframe.py index 2e0ff93c..bb2ace43 100644 --- a/grama/dataframe.py +++ b/grama/dataframe.py @@ -25,32 +25,30 @@ def safelen(x): def df_make(**kwargs): r"""Construct a DataFrame - Helper function to construct a DataFrame. + Helper function to construct a DataFrame. A common use-case is to use df_make() to pass values to the df (and related) keyword arguments succinctly. + + Kwargs: + varname (iterable): Column for constructed dataframe; column name inferred from variable name. - Keyword Args: - varname (iterable): Column for constructed dataframe; column - name inferred from variable name. Returns: DataFrame: Constructed DataFrame Preconditions: - All provided iterables must have identical length or be of - length one. - + All provided iterables must have identical length or be of length one. All provided variable names (keyword arguments) must be distinct. - Examples: - A common use-case is to use df_make() to pass values to - the df_det keyword argument succinctly; + Examples:: - >>> import grama as gr - >>> from models import make_test - >>> md = make_test() - >>> md >> \ - >>> gr.ev_monte_carlo( - >>> n=1e3, - >>> df_det=gr.df_make(x2=[1, 2]) - >>> ) + import grama as gr + from models import make_test + md = make_test() + ( + md + >> gr.ev_sample( + n=1e3, + df_det=gr.df_make(x2=[1, 2]) + ) + ) """ ## Catch passed Intention operator @@ -89,8 +87,7 @@ def df_make(**kwargs): def df_equal(df1, df2, close=False, precision=3): """Check DataFrame equality - Check that two dataframes have the same columns and values. Allow column - order to differ. + Check that two dataframes have the same columns and values. Allows column order to differ. Args: df1 (DataFrame): Comparison input 1 @@ -123,19 +120,19 @@ def df_equal(df1, df2, close=False, precision=3): def df_grid(**kwargs): r"""Construct a DataFrame as outer-product - Helper function to construct a DataFrame as an outer-product of the given - columns. + Helper function to construct a DataFrame as an outer-product of the given columns. + + Kwargs: + varname (iterable): Column for constructed dataframe; column name inferred from variable name. - Keyword Args: - varname (iterable): Column for constructed dataframe; column - name inferred from variable name. Returns: DataFrame: Constructed DataFrame Preconditions: All provided variable names (keyword arguments) must be distinct. - Examples: + Examples:: + import grama as gr ## Make an empty DataFrame gr.df_grid() diff --git a/grama/dfply/count.py b/grama/dfply/count.py index ec10917b..36625892 100644 --- a/grama/dfply/count.py +++ b/grama/dfply/count.py @@ -20,21 +20,22 @@ def tran_count(df, *args): Returns: DataFrame: Result of group counting. - Examples: - >>> import grama as gr - >>> from grama.data import df_diamonds - >>> DF = gr.Intention() - >>> ( - >>> # Single group variable - >>> df_diamonds - >>> >> gr.tf_count(DF.cut) - >>> ) - >>> - >>> ( - >>> # Multiple group variables - >>> df_diamonds - >>> >> gr.tf_count(DF.cut, DF.clarity) - >>> ) + Examples:: + + import grama as gr + from grama.data import df_diamonds + DF = gr.Intention() + ( + # Single group variable + df_diamonds + >> gr.tf_count(DF.cut) + ) + + ( + # Multiple group variables + df_diamonds + >> gr.tf_count(DF.cut, DF.clarity) + ) """ # Compute the count diff --git a/grama/dfply/join.py b/grama/dfply/join.py index 5050473c..28d8a8d5 100644 --- a/grama/dfply/join.py +++ b/grama/dfply/join.py @@ -63,12 +63,16 @@ def tran_inner_join(df, other, **kwargs): suffixes (list): String suffixes to append to column names in left and right DataFrames. - Example: - a >> inner_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_inner_join(df_2, by="key") + ) - x1 x2 x3 - 0 A 1 True - 1 B 2 False """ left_on, right_on, suffixes = get_join_parameters(kwargs) @@ -96,14 +100,16 @@ def tran_full_join(df, other, **kwargs): suffixes (list): String suffixes to append to column names in left and right DataFrames. - Example: - a >> outer_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_full_join(df_2, by="key") + ) - x1 x2 x3 - 0 A 1.0 True - 1 B 2.0 False - 2 C 3.0 NaN - 3 D NaN True """ left_on, right_on, suffixes = get_join_parameters(kwargs) @@ -131,14 +137,16 @@ def tran_outer_join(df, other, **kwargs): suffixes (list): String suffixes to append to column names in left and right DataFrames. - Example: - a >> full_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_outer_join(df_2, by="key") + ) - x1 x2 x3 - 0 A 1.0 True - 1 B 2.0 False - 2 C 3.0 NaN - 3 D NaN True """ left_on, right_on, suffixes = get_join_parameters(kwargs) @@ -166,13 +174,16 @@ def tran_left_join(df, other, **kwargs): suffixes (list): String suffixes to append to column names in left and right DataFrames. - Example: - a >> left_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_left_join(df_2, by="key") + ) - x1 x2 x3 - 0 A 1 True - 1 B 2 False - 2 C 3 NaN """ left_on, right_on, suffixes = get_join_parameters(kwargs) @@ -200,13 +211,16 @@ def tran_right_join(df, other, **kwargs): suffixes (list): String suffixes to append to column names in left and right DataFrames. - Example: - a >> right_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_right_join(df_2, by="key") + ) - x1 x2 x3 - 0 A 1.0 True - 1 B 2.0 False - 2 D NaN True """ left_on, right_on, suffixes = get_join_parameters(kwargs) @@ -233,12 +247,16 @@ def tran_semi_join(df, other, **kwargs): on that column. If a list of lists which contain strings or integers, the right/left columns to join on. - Example: - a >> semi_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_semi_join(df_2, by="key") + ) - x1 x2 - 0 A 1 - 1 B 2 """ left_on, right_on, suffixes = get_join_parameters(kwargs) @@ -280,11 +298,16 @@ def tran_anti_join(df, other, **kwargs): on that column. If a list of lists which contain strings or integers, the right/left columns to join on. - Example: - a >> anti_join(b, by='x1') + Examples:: + + import grama as gr + df_1 = gr.df_make(key=["A", "B", "C"], x=[1, 2, 3]) + df_2 = gr.df_make(key=["B", "A", "D"], y=[4, 5, 6]) + ( + df_1 + >> gr.tf_anti_join(df_2, by="key") + ) - x1 x2 - 2 C 3 """ left_on, right_on, suffixes = get_join_parameters(kwargs) diff --git a/grama/dfply/reshape.py b/grama/dfply/reshape.py index d05dfc0e..a3a1d1b1 100644 --- a/grama/dfply/reshape.py +++ b/grama/dfply/reshape.py @@ -88,7 +88,8 @@ def tran_rename(df, **kwargs): the name of the argument (left of `=`) will be the new column name, the value of the argument (right of `=`) is the old column name (as a string). - Examples: + Examples:: + ## Setup import grama as gr DF = gr.Intention() @@ -138,7 +139,8 @@ def tran_gather(df, key, values, *args, **kwargs): column that will preserve information about the original rows (useful for being able to re-widen the data later). - Example: + Example :: + diamonds >> gather('variable', 'value', ['price', 'depth','x','y','z']) >> head(5) carat cut color clarity table variable value @@ -221,7 +223,8 @@ def tran_spread(df, key, values, convert=False, fill=None): the spread columns to more appropriate data types. - Example: + Examples:: + widened = elongated >> spread(X.variable, X.value) widened >> head(5) @@ -312,7 +315,8 @@ def tran_separate( Returns: pandas.DataFrame: Modified data - Examples: + Examples:: + import grama as gr DF = gr.Intention @@ -479,7 +483,7 @@ def tran_explode(df, col, convert=False): Returns: DataFrame: - Example: + Example:: """ diff --git a/grama/dfply/summarize.py b/grama/dfply/summarize.py index 6a0f1fb6..6e2e67ee 100644 --- a/grama/dfply/summarize.py +++ b/grama/dfply/summarize.py @@ -37,7 +37,8 @@ def tran_summarize(df, **kwargs): Returns: DataFrame: Summarized data - Examples: + Examples:: + ## Setup import grama as gr DF = gr.Intention() diff --git a/grama/dfply/summary_functions.py b/grama/dfply/summary_functions.py index c407c860..d5a3bc89 100644 --- a/grama/dfply/summary_functions.py +++ b/grama/dfply/summary_functions.py @@ -228,7 +228,8 @@ def n(series=None): Args: series (pandas.Series): column to summarize. Default is the size of the parent DataFrame. - Examples: + Examples:: + import grama as gr from grama.data import df_diamonds DF = gr.Intention() @@ -524,26 +525,27 @@ def pr(series): Args: series (pandas.Series): Column to summarize; must be boolean or 0/1. - Examples: - >>> import grama as gr - >>> DF = gr.Intention() - >>> ## Cantilever beam examples - >>> from grama.models import make_cantilever_beam - >>> md_beam = make_cantilever_beam() - >>> - >>> ## Estimate probabilities - >>> ( - >>> md_beam - >>> # Generate large - >>> >> gr.ev_sample(n=1e5, df_det="nom") - >>> # Estimate probabilities of failure - >>> >> gr.tf_summarize( - >>> pof_stress=gr.pr(DF.g_stress <= 0), - >>> pof_disp=gr.pr(DF.g_disp <= 0), - >>> pof_joint=gr.pr( (DF.g_stress <= 0) & (DF.g_disp) ), - >>> pof_either=gr.pr( (DF.g_stress <= 0) | (DF.g_disp) ), - >>> ) - >>> ) + Examples:: + + import grama as gr + DF = gr.Intention() + ## Cantilever beam examples + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + + ## Estimate probabilities + ( + md_beam + # Generate large + >> gr.ev_sample(n=1e5, df_det="nom") + # Estimate probabilities of failure + >> gr.tf_summarize( + pof_stress=gr.pr(DF.g_stress <= 0), + pof_disp=gr.pr(DF.g_disp <= 0), + pof_joint=gr.pr( (DF.g_stress <= 0) & (DF.g_disp) ), + pof_either=gr.pr( (DF.g_stress <= 0) | (DF.g_disp) ), + ) + ) """ return series.mean() @@ -567,25 +569,26 @@ def pr_lo(series, alpha=0.01): Returns: float: Lower confidence interval - Examples: - >>> import grama as gr - >>> DF = gr.Intention() - >>> ## Cantilever beam examples - >>> from grama.models import make_cantilever_beam - >>> md_beam = make_cantilever_beam() - >>> - >>> ## Estimate probabilities - >>> ( - >>> md_beam - >>> # Generate large - >>> >> gr.ev_sample(n=1e5, df_det="nom") - >>> # Estimate probabilities with a confidence interval - >>> >> gr.tf_summarize( - >>> pof_lo=gr.pr_lo(DF.g_stress <= 0), - >>> pof=gr.pr(DF.g_stress <= 0), - >>> pof_up=gr.pr_up(DF.g_stress <= 0), - >>> ) - >>> ) + Examples:: + + import grama as gr + DF = gr.Intention() + ## Cantilever beam examples + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + + ## Estimate probabilities + ( + md_beam + # Generate large + >> gr.ev_sample(n=1e5, df_det="nom") + # Estimate probabilities with a confidence interval + >> gr.tf_summarize( + pof_lo=gr.pr_lo(DF.g_stress <= 0), + pof=gr.pr(DF.g_stress <= 0), + pof_up=gr.pr_up(DF.g_stress <= 0), + ) + ) """ up = binomial_ci(series, alpha=alpha, side="lo") return up @@ -609,25 +612,26 @@ def pr_up(series, alpha=0.01): Returns: float: Upper confidence interval - Examples: - >>> import grama as gr - >>> DF = gr.Intention() - >>> ## Cantilever beam examples - >>> from grama.models import make_cantilever_beam - >>> md_beam = make_cantilever_beam() - >>> - >>> ## Estimate probabilities - >>> ( - >>> md_beam - >>> # Generate large - >>> >> gr.ev_sample(n=1e5, df_det="nom") - >>> # Estimate probabilities with a confidence interval - >>> >> gr.tf_summarize( - >>> pof_lo=gr.pr_lo(DF.g_stress <= 0), - >>> pof=gr.pr(DF.g_stress <= 0), - >>> pof_up=gr.pr_up(DF.g_stress <= 0), - >>> ) - >>> ) + Examples:: + + import grama as gr + DF = gr.Intention() + ## Cantilever beam examples + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + + ## Estimate probabilities + ( + md_beam + # Generate large + >> gr.ev_sample(n=1e5, df_det="nom") + # Estimate probabilities with a confidence interval + >> gr.tf_summarize( + pof_lo=gr.pr_lo(DF.g_stress <= 0), + pof=gr.pr(DF.g_stress <= 0), + pof_up=gr.pr_up(DF.g_stress <= 0), + ) + ) """ up = binomial_ci(series, alpha=alpha, side="up") return up @@ -677,11 +681,10 @@ def corr(series1, series2, method="pearson", res="corr", nan_drop=False): def neff_is(series): """Importance sampling n_eff - Computes the effective sample size according to Equation 9.13 of Owen - (2013). + Computes the effective sample size based on importance sampling weights. See Equation 9.13 of Owen (2013) for details. See ``gr.tran_reweight()`` for more details. Args: - series (pandas.Series): column to summarize. + series (pandas.Series): column of importance sampling weights. References: A.B. Owen, "Monte Carlo theory, methods and examples" (2013) diff --git a/grama/dfply/transform.py b/grama/dfply/transform.py index ea4747d2..ce89c514 100644 --- a/grama/dfply/transform.py +++ b/grama/dfply/transform.py @@ -25,7 +25,8 @@ def tran_mutate(df, **kwargs): the name of the argument (left of `=`) will be the new column name, the value of the argument (right of `=`) defines the new column's value - Example: + Examples:: + ## Setup import grama as gr DF = gr.Intention() @@ -57,7 +58,8 @@ def tran_mutate_if(df, predicate, fun): predicate: a function applied to columns that returns a boolean value fun: a function that will be applied to columns where predicate returns True - Example: + Examples:: + diamonds >> mutate_if(lambda col: min(col) < 1 and mean(col) < 4, lambda row: 2 * row) >> head(3) carat cut color clarity depth table price x y z 0 0.46 Ideal E SI2 61.5 55.0 326 3.95 3.98 4.86 diff --git a/grama/dfply/vector.py b/grama/dfply/vector.py index 976e618f..2e610206 100644 --- a/grama/dfply/vector.py +++ b/grama/dfply/vector.py @@ -65,7 +65,7 @@ def desc(series): inverted `pandas.Series`. The returned series will be numeric (integers), regardless of the type of the original series. - Example: + Examples:: First group by cut, then find the first value of price when ordering by price ascending, and ordering by price descending using the `desc` function. @@ -99,7 +99,8 @@ def coalesce(*series): *series: Series objects, typically represented in their symbolic form (like X.series). - Example: + Examples:: + df = pd.DataFrame({ 'a':[1,np.nan,np.nan,np.nan,np.nan], 'b':[2,3,np.nan,np.nan,np.nan], @@ -167,7 +168,8 @@ def case_when(*conditions): which the condition is met. The second value is a vector of values or single value specifying the outcome where that condition is met. - Example: + Example:: + df = pd.DataFrame({ 'num':np.arange(16) }) @@ -242,21 +244,22 @@ def if_else(condition, when_true, otherwise): otherwise: A vector the same length as the condition vector or a single value to apply when the condition is `False`. - Example: - >>> import grama as gr - >>> from grama.data import df_diamonds - >>> DF = gr.Intention() - >>> ( - >>> df_diamonds - >>> >> gr.tf_mutate( - >>> # Recode nonsensical x values - >>> x=gr.if_else( - >>> DF.x == 0 - >>> gr.NaN, - >>> DF.x, - >>> ) - >>> ) - >>> ) + Example:: + + import grama as gr + from grama.data import df_diamonds + DF = gr.Intention() + ( + df_diamonds + >> gr.tf_mutate( + # Recode nonsensical x values + x=gr.if_else( + DF.x == 0 + gr.NaN, + DF.x, + ) + ) + ) """ if not isinstance(when_true, collections.abc.Iterable) or isinstance(when_true, str): diff --git a/grama/eval_contour.py b/grama/eval_contour.py index b9283bce..004909b9 100644 --- a/grama/eval_contour.py +++ b/grama/eval_contour.py @@ -188,72 +188,70 @@ def eval_contour( ): r"""Generate contours from a model - Generates contours from a model. Evaluates the model on a dense grid, then - runs marching squares to generate contours. Supports targeting multiple - outputs and handling auxiliary inputs not included in the contour map. + Generates contours from a model. Evaluates the model on a dense grid, then runs marching squares to generate contours. Supports targeting multiple outputs and handling auxiliary inputs not included in the contour map. Args: model (gr.Model): Model to evaluate. - var (list of str): Model inputs to target; must provide exactly - two inputs, and both must have finite domain width. + var (list of str): Model inputs to target; must provide exactly two inputs, and both must have finite domain width. out (list of str): Model output(s) for contour generation. df (DataFrame): Levels for model variables not included in var (auxiliary inputs). - levels (dict): Specific output levels for contour generation; - overrides n_levels. + levels (dict): Specific output levels for contour generation; overrides n_levels. n_side (int): Side resolution for grid; n_side**2 total evaluations. n_levels (int): Number of contour levels. Returns: DataFrame: Points along contours, organized by output and auxiliary variable levels. - Examples: - - >>> import grama as gr - >>> ## Multiple outputs - >>> ( - >>> gr.Model() - >>> >> gr.cp_vec_function( - >>> fun=lambda df: gr.df_make( - >>> f=df.x**2 + df.y**2, - >>> g=df.x + df.y, - >>> ), - >>> var=["x", "y"], - >>> out=["f", "g"], - >>> ) - >>> >> gr.cp_bounds( - >>> x=(-1, +1), - >>> y=(-1, +1), - >>> ) - >>> >> gr.ev_contour( - >>> var=["x", "y"], - >>> out=["f", "g"], - >>> ) - >>> # Contours with no auxiliary variables can autoplot - >>> >> gr.pt_auto() - >>> ) - >>> ## Auxiliary inputs - >>> ( - >>> gr.Model() - >>> >> gr.cp_vec_function( - >>> fun=lambda df: gr.df_make( - >>> f=df.c * df.x + (1 - df.c) * df.y, - >>> ), - >>> var=["x", "y"], - >>> out=["f", "g"], - >>> ) - >>> >> gr.cp_bounds( - >>> x=(-1, +1), - >>> y=(-1, +1), - >>> ) - >>> >> gr.ev_contour( - >>> var=["x", "y"], - >>> out=["f"], - >>> df=gr.df_make(c=[0, 1]) - >>> ) - >>> - >>> >> gr.ggplot(gr.aes("x", "y")) - >>> + gr.geom_segment(gr.aes(xend="x_end", yend="y_end", group="level", color="c")) - >>> ) + Examples:: + + import grama as gr + ## Multiple outputs + ( + gr.Model() + >> gr.cp_vec_function( + fun=lambda df: gr.df_make( + f=df.x**2 + df.y**2, + g=df.x + df.y, + ), + var=["x", "y"], + out=["f", "g"], + ) + >> gr.cp_bounds( + x=(-1, +1), + y=(-1, +1), + ) + >> gr.ev_contour( + var=["x", "y"], + out=["f", "g"], + ) + # Contours with no auxiliary variables can autoplot + >> gr.pt_auto() + ) + + ## Auxiliary inputs + ( + gr.Model() + >> gr.cp_vec_function( + fun=lambda df: gr.df_make( + f=df.c * df.x + (1 - df.c) * df.y, + ), + var=["x", "y"], + out=["f", "g"], + ) + >> gr.cp_bounds( + x=(-1, +1), + y=(-1, +1), + ) + >> gr.ev_contour( + var=["x", "y"], + out=["f"], + df=gr.df_make(c=[0, 1]) + ) + + # Contours with auxiliary variables should be manually plotted + >> gr.ggplot(gr.aes("x", "y")) + + gr.geom_segment(gr.aes(xend="x_end", yend="y_end", group="level", color="c")) + ) """ ## Check invariants @@ -277,6 +275,10 @@ def eval_contour( "Must provide values for remaining model variables using df; " + "missing values: {}".format(var_diff) ) + # Drop the swept variables + df = df.drop(columns=var, errors="ignore") + + # Check for unsupported inputs var_diff2 = var_diff.difference(set(df.columns)) if len(var_diff2) > 0: raise ValueError( diff --git a/grama/eval_defaults.py b/grama/eval_defaults.py index 01dd8938..325e18d1 100644 --- a/grama/eval_defaults.py +++ b/grama/eval_defaults.py @@ -38,13 +38,13 @@ def eval_df(model, df=None, append=True, verbose=True): Returns: DataFrame: Results of model evaluation - Examples: + Examples:: - >>> import grama as gr - >>> from grama.models import make_test - >>> md = make_test() - >>> df = gr.df_make(x0=0, x1=1, x2=2) - >>> md >> gr.ev_df(df=df) + import grama as gr + from grama.models import make_test + md = make_test() + df = gr.df_make(x0=0, x1=1, x2=2) + md >> gr.ev_df(df=df) """ if df is None: @@ -80,24 +80,26 @@ def eval_df(model, df=None, append=True, verbose=True): def eval_nominal(model, df_det=None, append=True, skip=False): r"""Evaluate model at nominal values - Evaluates a given model at a model nominal conditions (median). + Evaluates a given model at a model nominal conditions (median) of random inputs. Optionally set nominal values for the deterministic inputs. Args: model (gr.Model): Model to evaluate - df_det (DataFrame): Deterministic levels for evaluation; use "nom" - for nominal deterministic levels. + df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. append (bool): Append results to nominal inputs? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of nominal model evaluation or unevaluated design - Examples: + Examples:: - >>> import grama as gr - >>> from grama.models import make_test - >>> md = make_test() - >>> md >> gr.ev_nominal(df_det="nom") + import grama as gr + from grama.models import make_test + md = make_test() + ## Set manual levels for deterministic inputs; nominal levels for random inputs + md >> gr.ev_nominal(df_det=gr.df_make(x2=[0, 1, 2]) + ## Use nominal deterministic levels + md >> gr.ev_nominal(df_det="nom") """ ## Draw from underlying gaussian @@ -141,14 +143,15 @@ def eval_grad_fd(model, h=1e-8, df_base=None, var=None, append=True, skip=False) @pre (not isinstance(h, collections.Sequence)) | (h.shape[0] == df_base.shape[1]) - Examples: + Examples:: - >>> import grama as gr - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> df_nom = md >> gr.ev_nominal(df_det="nom") - >>> df_grad = md >> gr.ev_grad_fd(df_base=df_nom) - >>> df_grad >> gr.tf_gather("var", "val", gr.everything()) + import grama as gr + from grama.models import make_cantilever_beam + md = make_cantilever_beam() + # Select base point(s) + df_nom = md >> gr.ev_nominal(df_det="nom") + # Approximate the gradient + df_grad = md >> gr.ev_grad_fd(df_base=df_nom) """ ## Check invariants @@ -226,14 +229,9 @@ def eval_grad_fd(model, h=1e-8, df_base=None, var=None, append=True, skip=False) def eval_conservative(model, quantiles=None, df_det=None, append=True, skip=False): r"""Evaluates a given model at conservative input quantiles - Uses model specifications to determine the "conservative" direction - for each input, and evaluates the model at the desired quantile. - Provided primarily for comparing UQ against pseudo-deterministic - design criteria. + Uses model specifications to determine the "conservative" direction for each input, and evaluates the model at the desired quantile. Provided primarily for comparing UQ against pseudo-deterministic design criteria (del Rosario et al.; 2021). - Note that if there is no conservative direction for the given input, - the given quantile will be ignored and the median will automatically - be selected. + Note that if there is no conservative direction for the given input, the given quantile will be ignored and the median will automatically be selected. Args: model (gr.Model): Model to evaluate @@ -249,12 +247,16 @@ def eval_conservative(model, quantiles=None, df_det=None, append=True, skip=Fals Returns: DataFrame: Conservative evaluation or unevaluated design - Examples: + References: + del Rosario, Zachary, Richard W. Fenrich, and Gianluca Iaccarino. "When Are Allowables Conservative?." AIAA Journal 59.5 (2021): 1760-1772. - >>> import grama as gr - >>> from grama.models import make_plate_buckle - >>> md = make_plate_buckle() - >>> md >> gr.ev_conservative(df_det="nom") + Examples:: + + import grama as gr + from grama.models import make_plate_buckle + md = make_plate_buckle() + # Evaluate at conservative input values + md >> gr.ev_conservative(df_det="nom") """ ## Default behavior @@ -312,60 +314,60 @@ def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False): Returns: DataFrame: Results of evaluation or unevaluated design - Examples: - - >>> import grama as gr - >>> from grama.models import make_test - >>> DF = gr.Intention() - >>> - >>> # Simple random sample evaluation - >>> md = make_test() - >>> df = md >> gr.ev_sample(n=1e2, df_det="nom") - >>> df.describe() - >>> - >>> ## Use autoplot to visualize results - >>> ( - >>> md - >>> >> gr.ev_sample(n=1e2, df_det="nom") - >>> >> gr.pt_auto() - >>> ) - >>> - >>> ## Cantilever beam examples - >>> from grama.models import make_cantilever_beam - >>> md_beam = make_cantilever_beam() - >>> - >>> ## Use iocorr to generate input/output correlation tile plot - >>> ( - >>> md_beam - >>> >> gr.ev_sample(n=1e3, df_det="nom", skip=True) - >>> # Generate input/output correlation summary - >>> >> gr.tf_iocorr() - >>> # Visualize - >>> >> gr.pt_auto() - >>> ) - >>> - >>> ## Use support points to reduce model runtime - >>> ( - >>> md_beam - >>> # Generate large input sample but don't evaluate outputs - >>> >> gr.ev_sample(n=1e5, df_det="nom", skip=True) - >>> # Reduce to a smaller---but representative---sample - >>> >> gr.tf_sp(n=50) - >>> # Evaluate the outputs - >>> >> gr.tf_md(md_beam) - >>> ) - >>> - >>> ## Estimate probabilities - >>> ( - >>> md_beam - >>> # Generate large - >>> >> gr.ev_sample(n=1e5, df_det="nom") - >>> # Estimate probabilities of failure - >>> >> gr.tf_summarize( - >>> pof_stress=gr.mean(DF.g_stress <= 0), - >>> pof_disp=gr.mean(DF.g_disp <= 0), - >>> ) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_test + DF = gr.Intention() + + # Simple random sample evaluation + md = make_test() + df = md >> gr.ev_sample(n=1e2, df_det="nom") + df.describe() + + ## Use autoplot to visualize results + ( + md + >> gr.ev_sample(n=1e2, df_det="nom") + >> gr.pt_auto() + ) + + ## Cantilever beam examples + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + + ## Use iocorr to generate input/output correlation tile plot + ( + md_beam + >> gr.ev_sample(n=1e3, df_det="nom", skip=True) + # Generate input/output correlation summary + >> gr.tf_iocorr() + # Visualize + >> gr.pt_auto() + ) + + ## Use support points to reduce model runtime + ( + md_beam + # Generate large input sample but don't evaluate outputs + >> gr.ev_sample(n=1e5, df_det="nom", skip=True) + # Reduce to a smaller---but representative---sample + >> gr.tf_sp(n=50) + # Evaluate the outputs + >> gr.tf_md(md_beam) + ) + + ## Estimate probabilities + ( + md_beam + # Generate large + >> gr.ev_sample(n=1e5, df_det="nom") + # Estimate probabilities of failure + >> gr.tf_summarize( + pof_stress=gr.mean(DF.g_stress <= 0), + pof_disp=gr.mean(DF.g_disp <= 0), + ) + ) """ diff --git a/grama/eval_opt.py b/grama/eval_opt.py index 82378587..d129c6fd 100644 --- a/grama/eval_opt.py +++ b/grama/eval_opt.py @@ -61,19 +61,20 @@ def eval_nls( Returns: DataFrame: Results of estimation - Examples: - >>> import grama as gr - >>> from grama.data import df_trajectory_full - >>> from grama.models import make_trajectory_linear - >>> - >>> md_trajectory = make_trajectory_linear() - >>> - >>> df_fit = ( - >>> md_trajectory - >>> >> gr.ev_nls(df_data=df_trajectory_full) - >>> ) - >>> - >>> print(df_fit) + Examples:: + + import grama as gr + from grama.data import df_trajectory_full + from grama.models import make_trajectory_linear + + md_trajectory = make_trajectory_linear() + + df_fit = ( + md_trajectory + >> gr.ev_nls(df_data=df_trajectory_full) + ) + + print(df_fit) """ ## Check `out` invariants @@ -283,34 +284,38 @@ def eval_min( Returns: DataFrame: Results of optimization - Examples: - >>> import grama as gr - >>> md = ( - >>> gr.Model("Constrained Rosenbrock") - >>> >> gr.cp_function( - >>> fun=lambda x: (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2, - >>> var=["x", "y"], - >>> out=["c"], - >>> ) - >>> >> gr.cp_function( - >>> fun=lambda x: (x[0] - 1)**3 - x[1] + 1, - >>> var=["x", "y"], - >>> out=["g1"], - >>> ) - >>> >> gr.cp_function( - >>> fun=lambda x: x[0] + x[1] - 2, - >>> var=["x", "y"], - >>> out=["g2"], - >>> ) - >>> >> gr.cp_bounds( - >>> x=(-1.5, +1.5), - >>> y=(-0.5, +2.5), - >>> ) - >>> ) - >>> md >> gr.ev_min( - >>> out_min="c", - >>> out_leq=["g1", "g2"] - >>> ) + Examples:: + + import grama as gr + ## Define a model with objective and constraints + md = ( + gr.Model("Constrained Rosenbrock") + >> gr.cp_function( + fun=lambda x: (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2, + var=["x", "y"], + out=["c"], + ) + >> gr.cp_function( + fun=lambda x: (x[0] - 1)**3 - x[1] + 1, + var=["x", "y"], + out=["g1"], + ) + >> gr.cp_function( + fun=lambda x: x[0] + x[1] - 2, + var=["x", "y"], + out=["g2"], + ) + >> gr.cp_bounds( + x=(-1.5, +1.5), + y=(-0.5, +2.5), + ) + ) + + ## Run the optimizer + md >> gr.ev_min( + out_min="c", + out_leq=["g1", "g2"] + ) """ ## Check that model has only deterministic variables diff --git a/grama/eval_pnd.py b/grama/eval_pnd.py index ca961e14..cc6d3a25 100644 --- a/grama/eval_pnd.py +++ b/grama/eval_pnd.py @@ -28,10 +28,9 @@ @curry def eval_pnd(model, df_train, df_test, signs, n=int(1e4), seed=None, append=True, \ mean_prefix="_mean", sd_prefix="_sd"): - """ Evaluate a Model using a predictive model + """Approximate the probability non-dominated (PND) - Evaluates a given model against a PND algorithm to determine - "optimal points". + Approximates the probability non-dominated (PND) for a set of training points given a fitted probabilistic model. Used to rank a set of candidates in the context of multiobjective optimization. Args: model (gr.model): predictive model to evaluate @@ -49,39 +48,43 @@ def eval_pnd(model, df_train, df_test, signs, n=int(1e4), seed=None, append=True DataFrame: Results of predictive model going through a PND algorithm. Conatians both values and their scores. - Example: - >>> import grama as gr - >>> - >>> md_true = gr.make_pareto_random() - >>> - >>> df_data = ( - >>> md_true - >>> >> gr.ev_sample(n=2e3, seed=101, df_det="nom") - >>> ) - >>> - >>> df_train = ( - >>> df_data - >>> >> gr.tf_sample(n=10)) - >>> ) - >>> - >>> df_test = ( - >>> df_data - >>> >> gr.anti_join( - >>> df_train, - >>> by = ["x1","x2"] - >>> ) - >>> >> gr.tf_sample(n=200) - >>> ) - >>> - >>> md_fit = ( - >>> df_train - >>> >> gr.ft_gp( - >>> var=["x1","x2"] - >>> out=["y1","y2"] - >>> ) - >>> ) - >>> - >>> df_pnd = ( + References: + del Rosario, Zachary, et al. "Assessing the frontier: Active learning, model accuracy, and multi-objective candidate discovery and optimization." The Journal of Chemical Physics 153.2 (2020): 024112. + + Examples:: + + import grama as gr + + ## Define a ground-truth model + md_true = gr.make_pareto_random() + df_data = ( + md_true + >> gr.ev_sample(n=2e3, seed=101, df_det="nom") + ) + ## Generate test/train data + df_train = ( + df_data + >> gr.tf_sample(n=10) + ) + + df_test = ( + df_data + >> gr.anti_join( + df_train, + by = ["x1","x2"] + ) + >> gr.tf_sample(n=200) + ) + ## Fit a model to training data + md_fit = ( + df_train + >> gr.ft_gp( + var=["x1","x2"] + out=["y1","y2"] + ) + ) + ## Rank training points by PND algorithm + df_pnd = ( md_fit >> gr.ev_pnd( df_train, @@ -91,6 +94,7 @@ def eval_pnd(model, df_train, df_test, signs, n=int(1e4), seed=None, append=True ) >> gr.tf_arrange(gr.desc(DF.pr_scores)) ) + """ # # Check for correct types # if not isinstance(model, Model): diff --git a/grama/eval_random.py b/grama/eval_random.py index 5c0c5828..279c9148 100644 --- a/grama/eval_random.py +++ b/grama/eval_random.py @@ -58,16 +58,16 @@ def eval_sinews( Returns: DataFrame: Results of evaluation or unevaluated design - Examples: + Examples:: - >>> import grama as gr - >>> md = gr.make_cantilever_beam() - >>> # Skip evaluation, vis. design - >>> df_design = md >> gr.ev_sinews(df_det="nom", skip=True) - >>> df_design >> gr.pt_auto() - >>> # Vis results - >>> df_sinew = md >> gr.ev_sinews(df_det="nom") - >>> df_sinew >> gr.pt_auto() + import grama as gr + md = gr.make_cantilever_beam() + # Skip evaluation, used to visualize the design (input points) + df_design = md >> gr.ev_sinews(df_det="nom", skip=True) + df_design >> gr.pt_auto() + # Visualize the input-to-output relationships of the model + df_sinew = md >> gr.ev_sinews(df_det="nom") + df_sinew >> gr.pt_auto() """ ## Override model if deterministic sweeps desired @@ -208,18 +208,18 @@ def eval_hybrid( DataFrame: Results of evaluation or unevaluated design References: - I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models" - (1999) MMCE, Vol 1. - - Examples: - - >>> import grama as gr - >>> md = gr.make_cantilever_beam() - >>> df_first = md >> gr.ev_hybrid(df_det="nom", plan="first") - >>> df_first >> gr.tf_sobol() - >>> - >>> df_total = md >> gr.ev_hybrid(df_det="nom", plan="total") - >>> df_total >> gr.tf_sobol() + I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models" (1999) MMCE, Vol 1. + + Examples:: + + import grama as gr + md = gr.make_cantilever_beam() + ## Compute the first-order indices + df_first = md >> gr.ev_hybrid(df_det="nom", plan="first") + df_first >> gr.tf_sobol() + ## Compute the total-order indices + df_total = md >> gr.ev_hybrid(df_det="nom", plan="total") + df_total >> gr.tf_sobol() """ ## Check invariants diff --git a/grama/eval_tail.py b/grama/eval_tail.py index 7b3cb578..cb966c0f 100644 --- a/grama/eval_tail.py +++ b/grama/eval_tail.py @@ -45,11 +45,9 @@ def eval_form_pma( ): r"""Tail quantile via FORM PMA - Approximate the desired tail quantiles using the performance measure - approach (PMA) of the first-order reliability method (FORM) [1]. Select - limit states to minimize at desired quantile with `betas`. Provide - confidence levels `cons` and estimator covariance `df_corr` to compute with - margin in beta [2]. + Approximate the desired tail quantiles using the performance measure approach (PMA) of the first-order reliability method (FORM) [1]. Select limit states to minimize at desired quantile with `betas`. Provide confidence levels `cons` and estimator covariance `df_corr` to compute with margin in beta [2]. + + Note that under the performance measure approach, the optimized limit state value `g` is sought to be non-negative $g \geq 0$. This is usually included as a constraint in optimization, which can be accomplished in by using ``gr.eval_form_pnd()` *within* a model definition---see the Examples below for more details. Args: model (gr.Model): Model to analyze @@ -59,10 +57,8 @@ def eval_form_pma( cons (dict or None): Target confidence levels; key = limit state name; must be in model.out value = confidence level, \in (0, 1) - df_corr (DataFrame or None): Sampling distribution covariance entries; - parameters with no information assumed to be known exactly. - df_det (DataFrame): Deterministic levels for evaluation; use "nom" - for nominal deterministic levels. + df_corr (DataFrame or None): Sampling distribution covariance entries; parameters with no information assumed to be known exactly. + df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. n_maxiter (int): Maximum iterations for each optimization run n_restart (int): Number of restarts (== number of optimization runs) append (bool): Append MPP results for random values? @@ -72,12 +68,58 @@ def eval_form_pma( DataFrame: Results of MPP search Notes: - - Since FORM PMA relies on optimization over the limit state, it is - often beneficial to scale your limit state to keep values near unity. + Since FORM PMA relies on optimization over the limit state, it is often beneficial to scale your limit state to keep values near unity. References: - - [1] Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999 - - [2] del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019 + Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999 + del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019 + + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Evaluate the reliability of specified designs + ( + md_beam + >> gr.ev_form_pma( + # Specify target reliability + betas=dict(g_stress=3, g_disp=3), + # Analyze three different thicknesses + df_det=gr.df_make(t=[2, 3, 4], w=3) + ) + ) + + ## Build a nested model for optimization under uncertainty + md_opt = ( + gr.Model("Beam Optimization") + >> gr.cp_vec_function( + fun=lambda df: gr.df_make(c_area=df.w * df.t), + var=["w", "t"], + out=["c_area"], + name="Area objective", + ) + >> gr.cp_vec_function( + fun=lambda df: gr.eval_form_pma( + md_beam, + betas=dict(g_stress=3, g_disp=3), + df_det=df, + append=False, + ) + var=["w", "t"], + out=["g_stress", "g_disp"], + name="Reliability constraints", + ) + >> gr.cp_bounds(w=(2, 4), t=(2, 4)) + ) + # Run the optimization + ( + md_opt + >> gr.ev_min( + out_min="c_area", + out_geq=["g_stress", "g_disp"], + ) + ) """ ## Check invariants @@ -213,23 +255,18 @@ def eval_form_ria( ): r"""Tail reliability via FORM RIA - Approximate the desired tail probability using the reliability index - approach (RIA) of the first-order reliability method (FORM) [1]. Select - limit states to analyze with list input `limits`. Provide confidence levels - `cons` and estimator covariance `df_corr` to compute with margin in beta - [2]. + Approximate the desired tail probability using the reliability index approach (RIA) of the first-order reliability method (FORM) [1]. Select limit states to analyze with list input `limits`. Provide confidence levels `cons` and estimator covariance `df_corr` to compute with margin in beta [2]. + + Note that the reliability index approach (RIA) is generally less stable than the performance measure approach (PMA). Consider using ``gr.eval_form_pma()`` instead, particularly when using FORM to optimize a design. Args: model (gr.Model): Model to analyze - limits (list): Target limit states; must be in model.out; limit state - assumed to be critical at g == 0 + limits (list): Target limit states; must be in model.out; limit state assumed to be critical at g == 0. cons (dict or None): Target confidence levels; key = limit state name; must be in model.out value = confidence level, \in (0, 1) - df_corr (DataFrame or None): Sampling distribution covariance entries; - parameters with no information assumed to be known exactly. - df_det (DataFrame): Deterministic levels for evaluation; use "nom" - for nominal deterministic levels. + df_corr (DataFrame or None): Sampling distribution covariance entries; parameters with no information assumed to be known exactly. + df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. n_maxiter (int): Maximum iterations for each optimization run n_restart (int): Number of restarts (== number of optimization runs) append (bool): Append MPP results for random values? @@ -239,12 +276,27 @@ def eval_form_ria( DataFrame: Results of MPP search Notes: - - Since FORM RIA relies on optimization over the limit state, it is - often beneficial to scale your limit state to keep values near unity. + Since FORM RIA relies on optimization over the limit state, it is often beneficial to scale your limit state to keep values near unity. References: - - [1] Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999 - - [2] del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019 + [1] Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999 + [2] del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019 + + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Evaluate the reliability of specified designs + ( + md_beam + >> gr.ev_form_ria( + # Specify limit states to analyze + limits=("g_stress", "g_disp"), + # Analyze three different thicknesses + df_det=gr.df_make(t=[2, 3, 4], w=3) + ) + ) """ ## Check invariants diff --git a/grama/fit/fit_scikitlearn.py b/grama/fit/fit_scikitlearn.py index bde16386..1d3ee64a 100644 --- a/grama/fit/fit_scikitlearn.py +++ b/grama/fit/fit_scikitlearn.py @@ -484,25 +484,26 @@ def fit_kmeans(df, var=None, colname="cluster_id", seed=None, **kwargs): References: Scikit-learn: Machine Learning in Python, Pedregosa et al. JMLR 12, pp. 2825-2830, 2011. - Examples: - >>> import grama as gr - >>> from grama.data import df_stang - >>> from grama.fit import ft_kmeans - >>> X = gr.Intention() - >>> md_cluster = ( - >>> df_stang - >>> >> ft_kmeans(var=["E", "mu"], n_clusters=2) - >>> ) - >>> ( - >>> md_cluster - >>> >> gr.ev_df(df_stang) - >>> >> gr.tf_group_by(X.cluster_id) - >>> >> gr.tf_summarize( - >>> thick_mean=gr.mean(X.thick), - >>> thick_sd=gr.sd(X.thick), - >>> n=gr.n(X.index), - >>> ) - >>> ) + Examples:: + + import grama as gr + from grama.data import df_stang + from grama.fit import ft_kmeans + DF = gr.Intention() + md_cluster = ( + df_stang + >> ft_kmeans(var=["E", "mu"], n_clusters=2) + ) + ( + md_cluster + >> gr.ev_df(df_stang) + >> gr.tf_group_by(DF.cluster_id) + >> gr.tf_summarize( + thick_mean=gr.mean(DF.thick), + thick_sd=gr.sd(DF.thick), + n=gr.n(), + ) + ) """ ## Check invariants diff --git a/grama/fit_synonyms.py b/grama/fit_synonyms.py index a58206aa..3872cf00 100644 --- a/grama/fit_synonyms.py +++ b/grama/fit_synonyms.py @@ -49,21 +49,18 @@ def fit_nls( var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. df_init (DataFrame): Initial guesses for parameters; overrides n_restart - n_restart (int): Number of restarts to try; the first try is at - the nominal conditions of the model. Returned model will use - the least-error parameter set among restarts tested. + n_restart (int): Number of restarts to try; the first try is at the nominal conditions of the model. Returned model will use the least-error parameter set among restarts tested. n_maxiter (int): Optimizer maximum iterations verbose (bool): Print best-fit parameters to console? - uq_method (str OR None): If string, select method to quantify parameter - uncertainties. If None, provide best-fit values only. Methods: - uq_method = "linpool": assume normal errors; linearly approximate - parameter effects; equally pool variance matrices for each output + uq_method (str OR None): If string, select method to quantify parameter uncertainties. If None, provide best-fit values only. Methods: + uq_method = "linpool": assume normal errors; linearly approximate parameter effects; equally pool variance matrices for each output Returns: gr.Model: Model for evaluation with best-fit variables frozen to optimized levels. - Examples: + Examples:: + import grama as gr from grama.data import df_trajectory_windowed from grama.models import make_trajectory_linear diff --git a/grama/marginals.py b/grama/marginals.py index 22125433..b8819923 100644 --- a/grama/marginals.py +++ b/grama/marginals.py @@ -449,13 +449,9 @@ def marg_mom( ): r"""Fit scipy.stats continuous distribution via moments - Fit a continuous distribution using the method of moments. Select a - distribution shape and provide numerical values for a convenient set of - common moments. + Fit a continuous distribution using the method of moments. Select a distribution shape and provide numerical values for a convenient set of common moments. - This routine uses a vector-output root finding routine to match the moments. - You may set an optional initial guess for the distribution parameters using - the dict_x0 argument. + This routine uses a vector-output root finding routine to match the moments. You may set an optional initial guess for the distribution parameters using the dict_x0 argument. Args: dist (str): Name of distribution to fit @@ -480,22 +476,23 @@ def marg_mom( Returns: gr.MarginalNamed: Distribution - Examples: - >>> import grama as gr - >>> ## Fit a normal distribution - >>> mg_norm = gr.marg_mom("norm", mean=0, sd=1) - >>> ## Fit a (3-parameter) lognormal distribution - >>> mg_lognorm = gr.marg_mom("lognorm", mean=1, sd=1, skew=1) - >>> ## Fit a lognormal, controlling kurtosis instead - >>> mg_lognorm = gr.marg_mom("lognorm", mean=1, sd=1, kurt=1) - >>> ## Fit a 2-parameter lognormal; no skewness or kurtosis needed - >>> mg_lognorm = gr.marg_mom("lognorm", mean=1, sd=1, floc=0) - >>> - >>> ## Not all moment combinations are feasible; this will fail - >>> gr.marg_mom("beta", mean=1, sd=1, skew=0, kurt=4) - >>> ## Skewness and kurtosis are related for the beta distribution; - >>> ## a different combination is feasible - >>> gr.marg_mom("beta", mean=1, sd=1, skew=0, kurt=2) + Examples:: + + import grama as gr + ## Fit a normal distribution + mg_norm = gr.marg_mom("norm", mean=0, sd=1) + ## Fit a (3-parameter) lognormal distribution + mg_lognorm = gr.marg_mom("lognorm", mean=1, sd=1, skew=1) + ## Fit a lognormal, controlling kurtosis instead + mg_lognorm = gr.marg_mom("lognorm", mean=1, sd=1, kurt=1) + ## Fit a 2-parameter lognormal; no skewness or kurtosis needed + mg_lognorm = gr.marg_mom("lognorm", mean=1, sd=1, floc=0) + + ## Not all moment combinations are feasible; this will fail + gr.marg_mom("beta", mean=1, sd=1, skew=0, kurt=4) + ## Skewness and kurtosis are related for the beta distribution; + ## a different combination is feasible + gr.marg_mom("beta", mean=1, sd=1, skew=0, kurt=2) """ ## Number of distribution parameters @@ -629,8 +626,7 @@ def _obj(v): def marg_fit(dist, data, name=True, sign=None, **kwargs): r"""Fit scipy.stats continuous distirbution - Fits a scipy.stats continuous distribution. Intended to be used to define a - marginal distribution from data. + Fits a scipy.stats continuous distribution. Intended to be used to define a marginal distribution from data. Arguments: dist (str): Distribution to fit @@ -651,48 +647,48 @@ def marg_fit(dist, data, name=True, sign=None, **kwargs): Returns: gr.MarginalNamed: Distribution - Examples: - - >>> import grama as gr - >>> from grama.data import df_shewhart - >>> # Fit normal distribution - >>> mg_normal = gr.marg_named( - >>> "norm", - >>> df_shewhart.tensile_strength, - >>> ) - >>> # Fit two-parameter Weibull distribution - >>> mg_weibull2 = gr.marg_named( - >>> "weibull_min", - >>> df_shewhart.tensile_strength, - >>> floc=0, # 2-parameter has frozen loc == 0 - >>> ) - >>> # Fit three-parameter Weibull distribution - >>> mg_weibull3 = gr.marg_named( - >>> "weibull_min", - >>> df_shewhart.tensile_strength, - >>> loc=0, # 3-parameter fit tends to be unstable; - >>> # an inital guess helps stabilize fit - >>> ) - >>> # Inspect fits with QQ plot - >>> ( - >>> df_shewhart - >>> >> gr.tf_mutate( - >>> q_normal=gr.qqvals(DF.tensile_strength, marg=mg_normal), - >>> q_weibull2=gr.qqvals(DF.tensile_strength, marg=mg_weibull2), - >>> ) - >>> >> gr.tf_pivot_longer( - >>> columns=[ - >>> "q_normal", - >>> "q_weibull2", - >>> ], - >>> names_to=[".value", "Distribution"], - >>> names_sep="_" - >>> ) - >>> - >>> >> gr.ggplot(gr.aes("q", "tensile_strength")) - >>> + gr.geom_abline(intercept=0, slope=1, linetype="dashed") - >>> + gr.geom_point(gr.aes(color="Distribution")) - >>> ) + Examples:: + + import grama as gr + from grama.data import df_shewhart + # Fit normal distribution + mg_normal = gr.marg_named( + "norm", + df_shewhart.tensile_strength, + ) + # Fit two-parameter Weibull distribution + mg_weibull2 = gr.marg_named( + "weibull_min", + df_shewhart.tensile_strength, + floc=0, # 2-parameter has frozen loc == 0 + ) + # Fit three-parameter Weibull distribution + mg_weibull3 = gr.marg_named( + "weibull_min", + df_shewhart.tensile_strength, + loc=0, # 3-parameter fit tends to be unstable; + # an inital guess helps stabilize fit + ) + # Inspect fits with QQ plot + ( + df_shewhart + >> gr.tf_mutate( + q_normal=gr.qqvals(DF.tensile_strength, marg=mg_normal), + q_weibull2=gr.qqvals(DF.tensile_strength, marg=mg_weibull2), + ) + >> gr.tf_pivot_longer( + columns=[ + "q_normal", + "q_weibull2", + ], + names_to=[".value", "Distribution"], + names_sep="_" + ) + + >> gr.ggplot(gr.aes("q", "tensile_strength")) + + gr.geom_abline(intercept=0, slope=1, linetype="dashed") + + gr.geom_point(gr.aes(color="Distribution")) + ) """ ## Catch case where user provides entire DataFrame @@ -728,16 +724,18 @@ def marg_gkde(data, sign=None): Returns: gr.MarginalGKDE: Marginal distribution - Examples: + Examples:: - >>> import grama as gr - >>> from grama.data import df_stang - >>> md = gr.Model("Marginal Example") >> \ - >>> gr.cp_marginals( - >>> E=gr.marg_gkde(df_stang.E), - >>> mu=gr.marg_gkde(df_stang.mu) - >>> ) - >>> md.printpretty() + import grama as gr + from grama.data import df_stang + md = ( + gr.Model("Marginal Example") + >> gr.cp_marginals( + E=gr.marg_gkde(df_stang.E), + mu=gr.marg_gkde(df_stang.mu), + ) + ) + md """ ## Catch case where user provides entire DataFrame diff --git a/grama/mutate_helpers.py b/grama/mutate_helpers.py index 100b52d8..efa5cf9b 100644 --- a/grama/mutate_helpers.py +++ b/grama/mutate_helpers.py @@ -168,6 +168,11 @@ def as_str(x): @make_symbolic def as_factor(x, categories=None, ordered=True, dtype=None): r"""Cast to factor + + Args: + x (pd.Series): Column to convert + categories (list): Categories (levels) of factor (Optional) + ordered (boolean): Order the factor? """ return Categorical(x, categories=categories, ordered=ordered, dtype=dtype) @@ -215,6 +220,7 @@ def pareto_min(*args): Returns: np.array of boolean: Indicates if observation is Pareto-efficient + """ # Check invariants lengths = map(len, args) @@ -241,8 +247,7 @@ def stratum_min(*args, max_depth=10): Args: xi (iterable OR gr.Intention()): Feature to minimize; use -X to maximize - max_depth (int): Maximum depth for recursive computation; stratum numbers exceeding - this value will not be computed and will be flagged as NaN. + max_depth (int): Maximum depth for recursive computation; stratum numbers exceeding this value will not be computed and will be flagged as NaN. Returns: np.array of floats: Pareto stratum number @@ -291,7 +296,8 @@ def fct_reorder(f, x, fun=median): Returns: Categorical: Iterable with levels sorted according to x - Examples: + Examples:: + import grama as gr from grama.data import df_diamonds DF = gr.Intention() @@ -322,14 +328,15 @@ def fillna(*args, **kwargs): (See below for Pandas documentation) - Examples: - >>> import grama as gr - >>> X = gr.Intention() - >>> df = gr.df_make(x=[1, gr.NaN], y=[2, 3]) - >>> df_filled = ( - >>> df - >>> >> gr.tf_mutate(x=gr.fillna(X.x, 0)) - >>> ) + Examples:: + + import grama as gr + X = gr.Intention() + df = gr.df_make(x=[1, gr.NaN], y=[2, 3]) + df_filled = ( + df + >> gr.tf_mutate(x=gr.fillna(X.x, 0)) + ) """ return Series.fillna(*args, **kwargs) @@ -342,37 +349,46 @@ def fillna(*args, **kwargs): def qqvals(x, dist=None, marg=None): r"""Generate theoretical quantiles - Generate theoretical quantiles for a Q-Q plot. Can provide either a - pre-defined Marginal object or the name of a distribution to fit. + Generate theoretical quantiles for a Q-Q plot. Can provide either a pre-defined Marginal object or the name of a distribution to fit. Arguments: x (array-like or gr.Intention()): Target observations Keyword Arguments: + dist (str or None): Name of scipy distribution to fit; see gr.valid_dist for list of valid distributions marg (gr.Marginal() or None): Pre-fitted marginal - dist (str or None): Name of scipy distribution to fit; see - gr.valid_dist for list of valid distributions Returns: Series: Theoretical quantiles, matched in order with target observations References: - Filliben, J. J., "The Probability Plot Correlation Coefficient Test - for Normality" (1975) Technometrics. DOI: 10.1080/00401706.1975.10489279 - - Examples: - >>> import grama as gr - >>> from grama.data import df_shewhart - >>> DF = gr.Intention() - >>> - >>> ( - >>> ## Make a Q-Q plot - >>> df_shewhart - >>> >> gr.tf_mutate(q=gr.qqvals(DF.tensile_strength, dist="norm")) - >>> >> gr.ggplot(gr.aes("q", "tensile_strength")) - >>> + gr.geom_abline(intercept=0, slope=1, linetype="dashed") - >>> + gr.geom_point() - >>> ) + Filliben, J. J., "The Probability Plot Correlation Coefficient Test for Normality" (1975) Technometrics. DOI: 10.1080/00401706.1975.10489279 + + Examples:: + + import grama as gr + from grama.data import df_shewhart + DF = gr.Intention() + + ## Make a Q-Q plot + ( + df_shewhart + >> gr.tf_mutate(q=gr.qqvals(DF.tensile_strength, dist="norm")) + >> gr.ggplot(gr.aes("q", "tensile_strength")) + + gr.geom_abline(intercept=0, slope=1, linetype="dashed") + + gr.geom_point() + ) + + ## Fit a marginal, use in Q-Q plot + mg_tys = gr.marg_fit("lognorm", df_shewhart.tensile_strength, floc=0) + # Plot to assess the fit + ( + df_shewhart + >> gr.tf_mutate(q=gr.qqvals(DF.tensile_strength, marg=mg_tys)) + >> gr.ggplot(gr.aes("q", "tensile_strength")) + + gr.geom_abline(intercept=0, slope=1, linetype="dashed") + + gr.geom_point() + ) """ # Check invariants @@ -403,11 +419,10 @@ def qqvals(x, dist=None, marg=None): # Array constructors # ------------------------- @make_symbolic -def linspace(a, b, n, **kwargs): +def linspace(a, b, n=100, **kwargs): r"""Linearly-spaced values - Create an array of linearly-spaced values. Accepts keyword arguments for - numpy.linspace. + Create an array of linearly-spaced values. Accepts keyword arguments for numpy.linspace. Arguments: a (numeric): Smallest value @@ -418,26 +433,26 @@ def linspace(a, b, n, **kwargs): numpy array: Array of requested values Notes: - This is a symbolic alias for np.linspace(); you can use this in - pipe-enabled functions. - - Examples: - >>> import grama as gr - >>> from grama.data import df_stang - >>> DF = gr.Intention() - >>> ( - >>> df_stang - >>> >> gr.tf_mutate(c=gr.linspace(0, 1, gr.n(DF.index))) - >>> ) + This is a symbolic alias for np.linspace(); you can use this in pipe-enabled functions. + + Examples:: + + import grama as gr + from grama.data import df_stang + DF = gr.Intention() + ( + df_stang + >> gr.tf_mutate(c=gr.linspace(0, 1, gr.n(DF.index))) + ) + """ return nplinspace(a, b, num=n, **kwargs) @make_symbolic -def logspace(a, b, n, **kwargs): +def logspace(a, b, n=100, **kwargs): r"""Logarithmically-spaced values - Create an array of logarithmically-spaced values. Accepts keyword arguments for - numpy.logspace. + Create an array of logarithmically-spaced values. Accepts keyword arguments for numpy.logspace. Arguments: a (numeric): Smallest value @@ -448,17 +463,18 @@ def logspace(a, b, n, **kwargs): numpy array: Array of requested values Notes: - This is a symbolic alias for np.logspace(); you can use this in - pipe-enabled functions. - - Examples: - >>> import grama as gr - >>> from grama.data import df_stang - >>> DF = gr.Intention() - >>> ( - >>> df_stang - >>> >> gr.tf_mutate(c=gr.logspace(0, 1, gr.n(DF.index))) - >>> ) + This is a symbolic alias for np.logspace(); you can use this in pipe-enabled functions. + + Examples:: + + import grama as gr + from grama.data import df_stang + DF = gr.Intention() + ( + df_stang + >> gr.tf_mutate(c=gr.logspace(0, 1, gr.n(DF.index))) + ) + """ return nplogspace(a, b, num=n, **kwargs) diff --git a/grama/plot_auto.py b/grama/plot_auto.py index c97b0e3c..e37a5cd5 100644 --- a/grama/plot_auto.py +++ b/grama/plot_auto.py @@ -48,9 +48,7 @@ def _sci_format(v): def plot_contour(df, var=None, out="out", level="level", aux=False): r"""Plot 2d contours - Plot contours. - - Usually called as a dispatch from plot_auto(). + Plot contours. Usually called as a dispatch from plot_auto(). Args: var (array of str): Variables for plot axes @@ -61,10 +59,21 @@ def plot_contour(df, var=None, out="out", level="level", aux=False): Returns: ggplot: Contour image - Examples: - - >>> import grama as gr - >>> from grama.models import make_cantilever_beam + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ( + md_beam + >> gr.ev_contour( + var=["w", "t"], + out=["g_stress"], + # Set auxilliary inputs to nominal levels + df=gr.eval_nominal(md_beam, df_det="nom"), + ) + >> gr.pt_auto() + ) """ # Check invariants @@ -128,24 +137,23 @@ def plot_scattermat(df, var=None): Returns: ggplot: Scatterplot matrix - Examples: - - >>> import grama as gr - >>> import matplotlib.pyplot as plt - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> ## Dispatch from autoplotter - >>> ( - >>> md - >>> >> gr.ev_sample(n=100, df_det="nom", skip=True) - >>> >> gr.pt_auto() - >>> ) - >>> ## Re-create plot without metadata - >>> ( - >>> md - >>> >> gr.ev_sample(n=100, df_det="nom") - >>> >> gr.pt_scattermat(var=md.var) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Dispatch from autoplotter + ( + md_beam + >> gr.ev_sample(n=100, df_det="nom", skip=True) + >> gr.pt_auto() + ) + ## Re-create plot without metadata + ( + md_beam + >> gr.ev_sample(n=100, df_det="nom") + >> gr.pt_scattermat(var=md.var) + ) """ if var is None: @@ -252,24 +260,23 @@ def plot_hists(df, out=None, **kwargs): Returns: Seaborn histogram plot - Examples: - - >>> import grama as gr - >>> import matplotlib.pyplot as plt - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> ## Dispatch from autoplotter - >>> ( - >>> md - >>> >> gr.ev_sample(n=100, df_det="nom") - >>> >> gr.pt_auto() - >>> ) - >>> ## Re-create without metadata - >>> ( - >>> md - >>> >> gr.ev_sample(n=100, df_det="nom") - >>> >> gr.pt_hists(out=md.out) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Dispatch from autoplotter + ( + md_beam + >> gr.ev_sample(n=100, df_det="nom") + >> gr.pt_auto() + ) + ## Re-create without metadata + ( + md_beam + >> gr.ev_sample(n=100, df_det="nom") + >> gr.pt_hists(out=md.out) + ) """ if out is None: @@ -314,24 +321,23 @@ def plot_sinew_inputs(df, var=None, sweep_ind="sweep_ind"): Returns: Seaborn scatterplot matrix - Examples: - - >>> import grama as gr - >>> import matplotlib.pyplot as plt - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> ## Dispatch from autoplotter - >>> ( - >>> md - >>> >> gr.ev_sinews(df_det="swp", skip=True) - >>> >> gr.pt_auto() - >>> ) - >>> ## Re-create without metadata - >>> ( - >>> md - >>> >> gr.ev_sinews(df_det="swp") - >>> >> gr.pt_sinew_inputs(var=md.var) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Dispatch from autoplotter + ( + md_beam + >> gr.ev_sinews(df_det="swp", skip=True) + >> gr.pt_auto() + ) + ## Re-create without metadata + ( + md_beam + >> gr.ev_sinews(df_det="swp") + >> gr.pt_sinew_inputs(var=md.var) + ) """ if var is None: @@ -431,24 +437,23 @@ def plot_sinew_outputs( Returns: Seaborn relational lineplot - Examples: - - >>> import grama as gr - >>> import matplotlib.pyplot as plt - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> ## Dispatch from autoplotter - >>> ( - >>> md - >>> >> gr.ev_sinews(df_det="swp") - >>> >> gr.pt_auto() - >>> ) - >>> ## Re-create without metadata - >>> ( - >>> md - >>> >> gr.ev_sinews(df_det="swp") - >>> >> gr.pt_sinew_inputs(var=md.var, out=md.out) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Dispatch from autoplotter + ( + md_beam + >> gr.ev_sinews(df_det="swp") + >> gr.pt_auto() + ) + ## Re-create without metadata + ( + md_beam + >> gr.ev_sinews(df_det="swp") + >> gr.pt_sinew_inputs(var=md.var, out=md.out) + ) """ if var is None: @@ -519,13 +524,10 @@ def plot_sinew_outputs( def plot_auto(df): r"""Automagic plotting - Convenience tool for various grama outputs. Prints delegated plotting - function, which can be called manually with different arguments for - more tailored plotting. + Convenience tool for various grama outputs. Prints delegated plotting function, which can be called manually with different arguments for more tailored plotting. Args: - df (DataFrame): Data output from appropriate grama routine. See - gr.plot_list.keys() for list of supported methods. + df (DataFrame): Data output from appropriate grama routine. See gr.plot_list.keys() for list of supported methods. Returns: Plot results diff --git a/grama/spc.py b/grama/spc.py index f1a3817d..7e84c815 100644 --- a/grama/spc.py +++ b/grama/spc.py @@ -23,10 +23,7 @@ def c_sd(n): r"""Anti-biasing constant for aggregate standard deviation - Returns the anti-biasing constant for aggregated standard deviation - estimates. If the average of $k$ samples each size $n$ are averaged to - produce $\overline{S} = (1/k) \sum_{i=1}^k S_i$, then the de-biased standard - deviation is: + Returns the anti-biasing constant for aggregated standard deviation estimates. If the average of $k$ samples each size $n$ are averaged to produce $\overline{S} = (1/k) \sum_{i=1}^k S_i$, then the de-biased standard deviation is: $$\hat{\sigma} = \overline{S} / c(n)$$ @@ -109,7 +106,7 @@ def plot_xbs(df, group, var, n_side=9, n_delta=6): Returns: plotnine object: Xbar and S chart - Examples: + Examples:: import grama as gr DF = gr.Intention() diff --git a/grama/support.py b/grama/support.py index 676473c7..f28eff59 100644 --- a/grama/support.py +++ b/grama/support.py @@ -150,24 +150,25 @@ def tran_sp( References: Mak and Joseph, "Support Points" (2018) *The Annals of Statistics* - Examples: - >>> import grama as gr - >>> # Compact an existing dataset - >>> from grama.data import df_diamonds - >>> df_sp = gr.tran_sp(df_diamonds, n=50, var=["price", "carat"]) - >>> - >>> # Use support points to reduce model runtime - >>> from grama.models import make_cantilever_beam - >>> md_beam = make_cantilever_beam() - >>> ( - >>> md_beam - >>> ## Generate input sample but don't evaluate outputs - >>> >> gr.ev_sample(n=1e4, df_det="nom", skip=True) - >>> ## Reduce to a smaller---but representative---sample - >>> >> gr.tf_sp(n=50) - >>> ## Evaluate the outputs - >>> >> gr.tf_md(md_beam) - >>> ) + Examples:: + + import grama as gr + # Compact an existing dataset + from grama.data import df_diamonds + df_sp = gr.tran_sp(df_diamonds, n=50, var=["price", "carat"]) + + # Use support points to reduce model runtime + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ( + md_beam + ## Generate input sample but don't evaluate outputs + >> gr.ev_sample(n=1e4, df_det="nom", skip=True) + ## Reduce to a smaller---but representative---sample + >> gr.tf_sp(n=50) + ## Evaluate the outputs + >> gr.tf_md(md_beam) + ) """ ## Setup diff --git a/grama/tools.py b/grama/tools.py index 241bee1b..d5f3d2e7 100644 --- a/grama/tools.py +++ b/grama/tools.py @@ -340,18 +340,19 @@ def tran_outer(df, df_outer): Returns: DataFrame: Merged data - Examples: - >>> import grama as gr - >>> import pandas as pd - >>> df = pd.DataFrame(dict(x=[1,2])) - >>> df_outer = pd.DataFrame(dict(y=[3,4])) - >>> df_res = gr.tran_outer(df, df_outer) - >>> df_res - >>> x y - >>> 0 1 3 - >>> 1 2 3 - >>> 2 1 4 - >>> 3 2 4 + Examples:: + + import grama as gr + import pandas as pd + df = pd.DataFrame(dict(x=[1,2])) + df_outer = pd.DataFrame(dict(y=[3,4])) + df_res = gr.tran_outer(df, df_outer) + df_res + x y + 0 1 3 + 1 2 3 + 2 1 4 + 3 2 4 """ # Check invariants diff --git a/grama/tran/tran_matminer.py b/grama/tran/tran_matminer.py index 791727d9..c9d84024 100644 --- a/grama/tran/tran_matminer.py +++ b/grama/tran/tran_matminer.py @@ -48,13 +48,14 @@ def tran_feat_composition( References: Ward, L., Dunn, A., Faghaninia, A., Zimmermann, N. E. R., Bajaj, S., Wang, Q., Montoya, J. H., Chen, J., Bystrom, K., Dylla, M., Chard, K., Asta, M., Persson, K., Snyder, G. J., Foster, I., Jain, A., Matminer: An open source toolkit for materials data mining. Comput. Mater. Sci. 152, 60-69 (2018). - Examples: - >>> import grama as gr - >>> from grama.tran import tf_feat_composition - >>> ( - >>> gr.df_make(FORMULA=["C6H12O6"]) - >>> >> gr.tf_feat_composition() - >>> ) + Examples:: + + import grama as gr + from grama.tran import tf_feat_composition + ( + gr.df_make(FORMULA=["C6H12O6"]) + >> gr.tf_feat_composition() + ) """ ## Check invariants diff --git a/grama/tran/tran_umap.py b/grama/tran/tran_umap.py index 6cf15e95..c0653fc3 100644 --- a/grama/tran/tran_umap.py +++ b/grama/tran/tran_umap.py @@ -40,14 +40,24 @@ def tran_umap( metric (str or function): Metric used for distance computations. See url: https://umap-learn.readthedocs.io/en/latest/parameters.html#metric Notes: - - A wrapper for umap.UMAP + A wrapper for umap.UMAP References: - - McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction, ArXiv e-prints 1802.03426, 2018 - - Andy Coenen, Adam Pearce "Understanding UMAP" url: https://pair-code.github.io/understanding-umap/ + McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction, ArXiv e-prints 1802.03426, 2018 + Andy Coenen, Adam Pearce "Understanding UMAP" url: https://pair-code.github.io/understanding-umap/ Examples: + import grama as gr + from grama.data import df_diamonds + ( + df_diamonds + >> gr.tf_sample(1000) # For speed + >> gr.tf_umap(var=["x", "y", "z", "carat"]) + >> gr.ggplot(gr.aes("xi0", "xi1")) + + gr.geom_point() + ) + """ ## Check invariants if var is None: diff --git a/grama/tran_is.py b/grama/tran_is.py index 98aad4f4..cf9f3b30 100644 --- a/grama/tran_is.py +++ b/grama/tran_is.py @@ -19,19 +19,11 @@ def tran_reweight( ): r"""Reweight a sample using likelihood ratio - Reweight is a tool to facilitate "What If?" Monte Carlo simulation; - specifically, to make testing a models with the same function(s) but - different distributions more computationally efficient. + Reweight is a tool to facilitate "What If?" Monte Carlo simulation; specifically, to make testing a models with the same function(s) but different distributions more computationally efficient. - This tool automates calulation of the *likelihood ratio* between the - distributions of two given models. Using the resulting weights to scale - (elementwise multiply) output values and compute summaries is called - *importance sampling*, enabling "What If?" testing. Use of this tool enables - one to generate a single Monte Carlo sample, rather than multiple samples - for each "What If?" scenario (avoiding extraneous function evaluations). + This tool automates calulation of the *likelihood ratio* between the distributions of two given models. Using the resulting weights to scale (elementwise multiply) output values and compute summaries is called *importance sampling*, enabling "What If?" testing. Use of this tool enables one to generate a single Monte Carlo sample, rather than multiple samples for each "What If?" scenario (avoiding extraneous function evaluations). - Let `y` be a generic output of the scenario. The importance sampling - procedure is as follows: + Let `y` be a generic output of the scenario. The importance sampling procedure is as follows: 1. Create a base scenario represented by `md_base`, and a desired number of alternative "What If?" scenarios represented my other models. @@ -64,60 +56,59 @@ def tran_reweight( References: A.B. Owen, "Monte Carlo theory, methods and examples" (2013) - Examples: - - >>> import grama as gr - >>> from grama.models import make_cantilever_beam - >>> DF = gr.Intention() - >>> - >>> md_base = make_cantilever_beam() - >>> md_new = ( - >>> md_base - >>> >> gr.cp_marginals( - >>> H=dict(dist="norm", loc=500.0, scale=50.0), - >>> ) - >>> ) - >>> - >>> ## Assess safety via simple Monte Carlo - >>> df_base = gr.eval_monte_carlo(md_base, df_det="nom", n=1e3) - >>> print( - >>> df_base - >>> >> gr.tf_summarize( - >>> pof_stress=gr.mean(DF.g_stress <= 0), - >>> pof_disp=gr.mean(DF.g_disp <= 0), - >>> ) - >>> ) - >>> - >>> ## Re-use samples to test another scenario - >>> print( - >>> df_base - >>> >> gr.tf_reweight(md_base=md_base, md_new=md_new) - >>> >> gr.tf_summarize( - >>> pof_stress=gr.mean((DF.g_stress <= 0) * DF.weight), - >>> pof_disp=gr.mean((DF.g_disp <= 0) * DF.weight), - >>> n_eff=gr.neff_is(DF.weight), - >>> ) - >>> ) - >>> - >>> ## It is unsafe to study new scenarios with wider uncertainty than the base - >>> ## scenario - >>> md_poor = ( - >>> md_base - >>> >> gr.cp_marginals( - >>> H=dict(dist="norm", loc=500.0, scale=400.0), - >>> ) - >>> ) - >>> ## Note the tiny effective size in this case - >>> print( - >>> md_base - >>> >> gr.ev_monte_carlo(n=1e3, df_det="nom") - >>> >> gr.tf_reweight(md_base=md_base, md_new=md_poor) - >>> >> gr.tf_summarize( - >>> pof_stress=gr.mean((DF.g_stress <= 0) * DF.weight), - >>> pof_disp=gr.mean((DF.g_disp <= 0) * DF.weight), - >>> n_eff=gr.neff_is(DF.weight), - >>> ) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + DF = gr.Intention() + + md_base = make_cantilever_beam() + md_new = ( + md_base + >> gr.cp_marginals( + H=dict(dist="norm", loc=500.0, scale=50.0), + ) + ) + + ## Assess safety via simple Monte Carlo + df_base = gr.eval_monte_carlo(md_base, df_det="nom", n=1e3) + print( + df_base + >> gr.tf_summarize( + pof_stress=gr.mean(DF.g_stress <= 0), + pof_disp=gr.mean(DF.g_disp <= 0), + ) + ) + + ## Re-use samples to test another scenario + print( + df_base + >> gr.tf_reweight(md_base=md_base, md_new=md_new) + >> gr.tf_summarize( + pof_stress=gr.mean((DF.g_stress <= 0) * DF.weight), + pof_disp=gr.mean((DF.g_disp <= 0) * DF.weight), + n_eff=gr.neff_is(DF.weight), + ) + ) + + ## It is unsafe to study new scenarios with wider uncertainty than the base scenario + md_poor = ( + md_base + >> gr.cp_marginals( + H=dict(dist="norm", loc=500.0, scale=400.0), + ) + ) + ## Note the tiny effective sample size in this case + print( + md_base + >> gr.ev_monte_carlo(n=1e3, df_det="nom") + >> gr.tf_reweight(md_base=md_base, md_new=md_poor) + >> gr.tf_summarize( + pof_stress=gr.mean((DF.g_stress <= 0) * DF.weight), + pof_disp=gr.mean((DF.g_disp <= 0) * DF.weight), + n_eff=gr.neff_is(DF.weight), + ) + ) """ ## Check invariants diff --git a/grama/tran_pivot.py b/grama/tran_pivot.py index cf9c0f75..1ac44e1a 100644 --- a/grama/tran_pivot.py +++ b/grama/tran_pivot.py @@ -59,10 +59,9 @@ def tran_pivot_longer ( Returns: DataFrame: result of being pivoted into a longer format - Examples: + Examples:: import grama as gr - ## Simple example ( gr.df_make( @@ -370,7 +369,8 @@ def tran_pivot_wider ( Returns: DataFrame: result of being pivoted wider - Example: + Examples:: + import grama as gr ## Simple example ( diff --git a/grama/tran_shapley.py b/grama/tran_shapley.py index 5839b1b1..6d47ef14 100644 --- a/grama/tran_shapley.py +++ b/grama/tran_shapley.py @@ -23,17 +23,9 @@ def powerset(iterable): def tran_shapley_cohort(df, var=None, out=None, bins=20, inds=None): """Compute cohort shapley values - Assess the impact of each variable on selected observations via cohort - shapley [1]. Shapley values are a game-theoretic way to assess the - importance of input variables (var) on each of a set of outputs (out). Since - values are computed on each observation, cohort shapley can distinguish - cases where a variable has a positive impact on one observation, and a - negative impact on a different observation. - - Note that cohort shapley is combinatorialy expensive in the number of - variables, and this expense is multiplied by the number of observations. Use - with caution in cases of high dimensionality. Consider using the `inds` - argument to analyze a small subset of your observations. + Assess the impact of each variable on selected observations via cohort shapley [1]. Shapley values are a game-theoretic way to assess the importance of input variables (var) on each of a set of outputs (out). Since values are computed on each observation, cohort shapley can distinguish cases where a variable has a positive impact on one observation, and a negative impact on a different observation. + + Note that cohort shapley is combinatorialy expensive in the number of variables, and this expense is multiplied by the number of observations. Use with caution in cases of high dimensionality. Consider using the `inds` argument to analyze a small subset of your observations. Args: df (DataFrame): Variable and output data to analyze @@ -43,35 +35,36 @@ def tran_shapley_cohort(df, var=None, out=None, bins=20, inds=None): inds (iterable of indices or None): Indices of rows to analyze References: - - [1] Mase, Owen, and Seiler, "Explaining black box decisions by Shapley cohort refinement" (2019) Arxiv - - Examples: - >>> import grama as gr - >>> from grama.data import df_stang - >>> X = gr.Intention() - >>> # Analyze all observations - >>> ( - >>> gr.tran_shapley_cohort( - >>> df_stang, - >>> var=["thick", "ang"], - >>> out=["E"], - >>> ) - >>> >> gr.tf_bind_cols(df_stang) - >>> >> gr.tf_filter(X.E_thick < 0) - >>> ) - >>> # Compute subset of values - >>> ( - >>> gr.tran_shapley_cohort( - >>> df_stang, - >>> var=["thick", "ang"], - >>> out=["E"], - >>> inds=( - >>> df_stang - >>> >> gr.tf_filter(X.thick > 0.08) - >>> ).index - >>> ) - >>> >> gr.tf_bind_cols(df_stang) - >>> ) + [1] Mase, Owen, and Seiler, "Explaining black box decisions by Shapley cohort refinement" (2019) Arxiv + + Examples:: + + import grama as gr + from grama.data import df_stang + DF = gr.Intention() + # Analyze all observations + ( + gr.tran_shapley_cohort( + df_stang, + var=["thick", "ang"], + out=["E"], + ) + >> gr.tf_bind_cols(df_stang) + >> gr.tf_filter(DF.E_thick < 0) + ) + # Compute subset of values + ( + gr.tran_shapley_cohort( + df_stang, + var=["thick", "ang"], + out=["E"], + inds=( + df_stang + >> gr.tf_filter(DF.thick > 0.08) + ).index + ) + >> gr.tf_bind_cols(df_stang) + ) """ ## Check invariants diff --git a/grama/tran_summaries.py b/grama/tran_summaries.py index 3c99c550..0037c267 100644 --- a/grama/tran_summaries.py +++ b/grama/tran_summaries.py @@ -55,16 +55,16 @@ def tran_sobol(df, typename="ind", digits=2, full=False): I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models" (1999) MMCE, Vol 1. - Examples: + Examples:: - >>> import grama as gr - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> df_first = md >> gr.ev_hybrid(df_det="nom", plan="first") - >>> df_first >> gr.tf_sobol() - >>> - >>> df_total = md >> gr.ev_hybrid(df_det="nom", plan="total") - >>> df_total >> gr.tf_sobol() + import grama as gr + from grama.models import make_cantilever_beam + md = make_cantilever_beam() + df_first = md >> gr.ev_hybrid(df_det="nom", plan="first") + df_first >> gr.tf_sobol() + + df_total = md >> gr.ev_hybrid(df_det="nom", plan="total") + df_total >> gr.tf_sobol() """ ## Determine plan from dataframe metadata @@ -179,11 +179,11 @@ def tran_pca(df, var=None, lamvar="lam", standardize=False): References: TODO - Examples: + Examples:: - >>> import grama as gr - >>> from grama.data import df_stang - >>> df_pca = df_stang >> gr.tf_pca() + import grama as gr + from grama.data import df_stang + df_pca = df_stang >> gr.tf_pca() """ ## Handle variable selection @@ -214,8 +214,9 @@ def tran_pca(df, var=None, lamvar="lam", standardize=False): def tran_asub(df, prefix="D", outvar="out", lamvar="lam"): r"""Active subspace estimator - Compute principal directions and eigenvalues for all outputs based on output - of ev_grad_fd() to estimate the /active subspace/ (Constantine, 2015). + Compute principal directions and eigenvalues for all outputs based on output of ev_grad_fd() to estimate the /active subspace/ (Constantine, 2015). + + See also ``gr.tran_polyridge()`` for a gradient-free approach to approximating the active subspace. Args: df (DataFrame): Gradient evaluations @@ -229,14 +230,14 @@ def tran_asub(df, prefix="D", outvar="out", lamvar="lam"): References: Constantine, "Active Subspaces" (2015) SIAM - Examples: + Examples:: - >>> import grama as gr - >>> from grama.models import make_cantilever_beam - >>> md = make_cantilever_beam() - >>> df_base = md >> gr.ev_monte_carlo(n=1e2, df_det="nom", skip=True) - >>> df_grad = md >> gr.ev_grad_fd(df_base=df_base) - >>> df_as = df_grad >> gr.tf_asub() + import grama as gr + from grama.models import make_cantilever_beam + md = make_cantilever_beam() + df_base = md >> gr.ev_sample(n=1e2, df_det="nom", skip=True) + df_grad = md >> gr.ev_grad_fd(df_base=df_base) + df_as = df_grad >> gr.tf_asub() """ ## Setup @@ -283,37 +284,47 @@ def tran_inner(df, df_weights, prefix="dot", name=None, append=True): Returns: DataFrame: Results of inner products - Examples: - - >>> ## Setup - >>> from dfply import * - >>> import grama as gr - >>> from grama.models import make_cantilever_beam - >>> import seaborn as sns - >>> import matplotlib.pyplot as plt - >>> md = make_cantilever_beam() - >>> # Generate active subspace results - >>> df_base = md >> gr.ev_monte_carlo(n=1e2, df_det="nom") - >>> df_grad = md >> gr.ev_grad_fd(df_base=df_base) - >>> df_as = df_grad >> \ - >>> gr.tf_asub() >> \ - >>> group_by(X.out) >> \ - >>> mask(min_rank(-X.lam) == 1) >> \ - >>> ungroup() - >>> # Post-process - >>> df_reduce = gr.tran_inner(df_base, df_as, name="out") - >>> sns.scatterplot( - >>> data=df_reduce, - >>> x="dot_g_stress", - >>> y="g_stress" - >>> ) - >>> plt.show() - >>> sns.scatterplot( - >>> data=df_reduce, - >>> x="dot_g_disp", - >>> y="g_disp" - >>> ) - >>> plt.show() + Examples:: + + ## Setup + import grama as gr + DF = gr.Intention() + + ## PCA example + from grama.data import df_diamonds + # Compute PCA weights + df_weights = gr.tran_pca( + df_diamonds + >> gr.tf_sample(1000), + var=["x", "y", "z", "carat"], + ) + # Visualize + ( + df_diamonds + >> gr.tf_inner(df_weights=df_weights) + >> gr.ggplot(gr.aes("dot0", "dot1")) + + gr.geom_point() + ) + + ## Active subspace example + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + # Approximate the active subspace + df_data = gr.ev_sample(md_beam, n=1e2, df_det="nom") + df_weights = gr.tran_polyridge( + df_data, + var=md_beam.var_rand, # Use meaningful predictors only + out="g_disp", # Target g_disp for reduction + n_degree=2, + n_degree=1, + ) + # Construct shadow plot; use tran_inner to calculate active variable + ( + df_data + >> gr.tf_inner(df_weights=df_weights) + >> gr.ggplot(gr.aes("dot", "g_disp")) + + gr.geom_point() + ) """ ## Check invariants diff --git a/grama/tran_tools.py b/grama/tran_tools.py index 90d8a2e8..50facd99 100644 --- a/grama/tran_tools.py +++ b/grama/tran_tools.py @@ -53,8 +53,7 @@ def tran_kfolds( ): r"""Perform k-fold CV - Perform k-fold cross-validation (CV) using a given fitting procedure (ft). - Optionally provide a fold identifier column, or (randomly) assign folds. + Perform k-fold cross-validation (CV) using a given fitting procedure (ft). Optionally provide a fold identifier column, or (randomly) assign folds. Args: df (DataFrame): Data to pass to given fitting procedure @@ -82,17 +81,17 @@ def tran_kfolds( References: [1] James, Witten, Hastie, and Tibshirani, "An introduction to statistical learning" (2017), Chapter 5. Resampling Methods - Examples: + Examples:: - >>> import grama as gr - >>> from grama.data import df_stang - >>> from grama.fit import ft_rf - >>> df_kfolds = ( - >>> df_stang - >>> >> gr.tf_kfolds( - >>> k=5, - >>> ft=ft_rf(out=["thick"], var=["E", "mu"]), - >>> ) + import grama as gr + from grama.data import df_stang + from grama.fit import ft_rf + df_kfolds = ( + df_stang + >> gr.tf_kfolds( + k=5, + ft=ft_rf(out=["thick"], var=["E", "mu"]), + ) """ ## Check invariants @@ -209,7 +208,7 @@ def tran_bootstrap( for more general problems, like setting a confidence interval for a correlation coefficient." - Examples: + Examples:: """ ## Set seed only if given @@ -290,9 +289,7 @@ def tran_bootstrap( def tran_angles(df, df2): r"""Subspace angles - Compute the subspace angles between two matrices. A wrapper for - scipy.linalg.subspace_angles that corrects for column ordering. Row ordering - is assumed. + Compute the subspace angles between two matrices. A wrapper for scipy.linalg.subspace_angles that corrects for column ordering. Row ordering is assumed. Args: df (DataFrame): First matrix to compare @@ -301,15 +298,15 @@ def tran_angles(df, df2): Returns: array: Array of angles (in radians) - Examples: + Examples:: - >>> import grama as gr - >>> import pandas as pd - >>> df = pd.DataFrame(dict(v=[+1, +1])) - >>> df_v1 = pd.DataFrame(dict(w=[+1, -1])) - >>> df_v2 = pd.DataFrame(dict(w=[+1, +1])) - >>> theta1 = angles(df, df_v1) - >>> theta2 = angles(df, df_v2) + import grama as gr + import pandas as pd + df = pd.DataFrame(dict(v=[+1, +1])) + df_v1 = pd.DataFrame(dict(w=[+1, -1])) + df_v2 = pd.DataFrame(dict(w=[+1, +1])) + theta1 = angles(df, df_v1) + theta2 = angles(df, df_v2) """ ## Compute subspace angles @@ -327,9 +324,9 @@ def tran_angles(df, df2): def tran_copula_corr(df, model=None, density=None): r"""Compute Gaussian copula correlations from data - Convenience function to fit a Gaussian copula (correlations) based on data - and pre-fitted marginals. Intended for use with gr.comp_copula_gaussian(). - Must provide either `model` or `density`. + Convenience function to fit a Gaussian copula (correlations) based on data and pre-fitted marginals. Intended for use with ``gr.comp_copula_gaussian()``. Must provide either `model` or `density`. + + Note: This is called automatically when you provide a dataset to ``gr.comp_copula_gaussian()``. Args: df (DataFrame): Matrix of data for correlation estimation @@ -337,19 +334,34 @@ def tran_copula_corr(df, model=None, density=None): density (gr.Density): Density with defined marginals Returns: - DataFrame: Correlation data ready for use with gr.comp_copula_gaussian() - - Examples: - - >>> import grama as gr - >>> from grama.data import df_stang - >>> md = gr.Model() >> \ - >>> gr.cp_marginals( - >>> E=gr.marg_named(df_stang.E, "norm"), - >>> mu=gr.marg_named(df_stang.mu, "beta"), - >>> thick=gr.marg_named(df_stang.thick, "norm") - >>> ) - >>> df_corr = gr.tran_copula_corr(df_stang, model=md) + DataFrame: Correlation data ready for use with ``gr.comp_copula_gaussian()`` + + Examples:: + + import grama as gr + from grama.data import df_stang + ## Verbose, manual approach + md = ( + gr.Model() + >> gr.cp_marginals( + E=gr.marg_named(df_stang.E, "norm"), + mu=gr.marg_named(df_stang.mu, "beta"), + thick=gr.marg_named(df_stang.thick, "norm"), + ) + ) + df_corr = gr.tran_copula_corr(df_stang, model=md) + md = gr.comp_copula_gaussian(md, df_corr=df_corr) + + ## Automatic approach + md = ( + gr.Model() + >> gr.cp_marginals( + E=gr.marg_named(df_stang.E, "norm"), + mu=gr.marg_named(df_stang.mu, "beta"), + thick=gr.marg_named(df_stang.thick, "norm"), + ) + >> gr.cp_copula_gaussian(df_data=df_stang) + ) """ if density is None: @@ -389,8 +401,7 @@ def tran_copula_corr(df, model=None, density=None): def tran_md(df, md=None, append=True): r"""Model as transform - Use a model to transform data; useful when pre-processing data to evaluate a - model. + Use a model to transform data; useful when pre-processing data to evaluate a model. Args: df (DataFrame): Data to merge @@ -399,16 +410,18 @@ def tran_md(df, md=None, append=True): Returns: DataFrame: Output of evaluated model - Examples: - >>> import grama as gr - >>> from grama.models import make_cantilever_beam - >>> md_beam = make_cantilever_beam() - >>> df_res = ( - >>> md_beam - >>> >> gr.ev_monte_carlo(n=1e3, df_det="nom", skip=True, seed=101) - >>> >> gr.tf_sp(n=100) - >>> >> gr.tf_md(md=md_beam) - >>> ) + Examples:: + + import grama as gr + from grama.models import make_cantilever_beam + md_beam = make_cantilever_beam() + ## Use support points to generate a smaller---but representative---sample + df_res = ( + md_beam + >> gr.ev_sample(n=1e3, df_det="nom", skip=True, seed=101) + >> gr.tf_sp(n=100) + >> gr.tf_md(md=md_beam) + ) """ if md is None: diff --git a/tests/test_contour.py b/tests/test_contour.py index 68275db1..ab2e5d45 100644 --- a/tests/test_contour.py +++ b/tests/test_contour.py @@ -79,6 +79,17 @@ def test_contour(self): # Correct manual levels self.assertTrue(set(df_res3.level) == {-1, 0, +1}) + # Drops redundant (swept) inputs under-the-hood + ( + md2 + >> gr.ev_contour( + var=["x", "y"], + out=["f"], + df=gr.eval_nominal(md2, df_det="nom"), + n_side=10, # Coarse, for speed + ) + ) + # Correct manual levels with self.assertWarns(Warning): df_res4 = (