diff --git a/grama/comp_building.py b/grama/comp_building.py index 8ad4aaf..581949a 100644 --- a/grama/comp_building.py +++ b/grama/comp_building.py @@ -20,9 +20,19 @@ "getvars", ] -from grama import add_pipe, CopulaGaussian, CopulaIndependence, Density, \ - Function, FunctionModel, FunctionVectorized, Marginal, MarginalNamed, \ - pipe, tran_copula_corr +from grama import ( + add_pipe, + CopulaGaussian, + CopulaIndependence, + Density, + Function, + FunctionModel, + FunctionVectorized, + Marginal, + MarginalNamed, + pipe, + tran_copula_corr, +) from .eval_defaults import eval_sample from collections import ChainMap from pandas import concat, DataFrame @@ -32,8 +42,7 @@ ## Model Building Interface (MBI) tools ################################################## def _comp_function_data(model, fun, var, out, name, runtime): - r"""Internal function builder - """ + r"""Internal function builder""" model_new = model.copy() # Check invariants @@ -114,6 +123,7 @@ def fun(x, y, z): """ return f.__code__.co_varnames + # Freeze inputs # ------------------------- @curry @@ -139,17 +149,15 @@ def comp_freeze(model, df=None, **var): # Process DataFrame if provided if not df is None: if df.shape[0] > 1: - raise ValueError( - "Provided DataFrame must have only one row." - ) + raise ValueError("Provided DataFrame must have only one row.") var = dict(zip(df.columns, df.values.flatten())) # All variables are provided var_miss = set(set(var.keys())).difference(model.var) if len(var_miss) != 0: raise ValueError( - "All inputs listed in `var` argument must be present in model.var.\n" + - "Missing inputs {}".format(var_miss) + "All inputs listed in `var` argument must be present in model.var.\n" + + "Missing inputs {}".format(var_miss) ) if any(map(lambda x: hasattr(x, "__iter__"), var.values())): @@ -164,7 +172,7 @@ def comp_freeze(model, df=None, **var): var_diff, list(var.keys()), "(Freeze inputs: {})".format(list(var.keys())), - 0 + 0, ) ## Add to model @@ -566,8 +574,13 @@ def comp_marginals(model, **kwargs): except KeyError: sign = 0 + try: + source = value_copy.pop("source") + except KeyError: + source = "real" + new_model.density.marginals[key] = MarginalNamed( - sign=sign, d_name=dist, d_param=value_copy + sign=sign, d_name=dist, d_param=value_copy, source=source ) ## Handle Marginal input @@ -583,7 +596,7 @@ def comp_marginals(model, **kwargs): # Add copula ################################################## @curry -def comp_copula_independence(model): +def comp_copula_independence(model, source="real"): r"""Add an independence copula to model Composition. Add an independence copula to an existing model. @@ -610,10 +623,28 @@ def comp_copula_independence(model): """ new_model = model.copy() - new_model.density = Density( - marginals=model.density.marginals, - copula=CopulaIndependence(new_model.var_rand), - ) + var_rand_real = [] + var_rand_err = [] + for var in new_model.var_rand: + if new_model.density.marginals[var].source == "real": + var_rand_real.append(var) + else: + var_rand_err.append(var) + + if source == "real": + copula_err = new_model.density.copula_err + new_model.density = Density( + marginals=model.density.marginals, + copula_real=CopulaIndependence(var_rand_real, source), + copula_err=copula_err, + ) + else: + copula_real = new_model.density.copula_real + new_model.density = Density( + marginals=model.density.marginals, + copula_err=CopulaIndependence(var_rand_err, source), + copula_real=copula_real, + ) new_model.update() return new_model @@ -623,7 +654,7 @@ def comp_copula_independence(model): # ------------------------- @curry -def comp_copula_gaussian(model, df_corr=None, df_data=None): +def comp_copula_gaussian(model, df_corr=None, df_data=None, source="real"): r"""Add a Gaussian copula to model Composition. Add a gaussian copula to an existing model. @@ -663,30 +694,55 @@ def comp_copula_gaussian(model, df_corr=None, df_data=None): ) """ + new_model = model.copy() + var_rand_real = [] + var_rand_err = [] + for var in new_model.var_rand: + if new_model.density.marginals[var].source == "real": + var_rand_real.append(var) + else: + var_rand_err.append(var) + + if not (df_data is None): + df_corr = tran_copula_corr(df_data, model=new_model) + if not (df_corr is None): - new_model = model.copy() - new_model.density = Density( - marginals=model.density.marginals, - copula=CopulaGaussian(list(model.density.marginals.keys()), df_corr,), - ) + if source == "real": + new_model.density = Density( + marginals=model.density.marginals, + copula_real=CopulaGaussian(var_rand_real, df_corr, source), + copula_err=new_model.density.copula_err, + ) + else: + new_model.density = Density( + marginals=model.density.marginals, + copula_err=CopulaGaussian(var_rand_err, df_corr, source), + copula_real=new_model.density.copula_real, + ) new_model.update() return new_model if not (df_data is None): - new_model = model.copy() df_corr = tran_copula_corr(df_data, model=new_model) - new_model.density = Density( - marginals=model.density.marginals, - copula=CopulaGaussian(list(model.density.marginals.keys()), df_corr,), - ) + if source == "real": + new_model.density = Density( + marginals=model.density.marginals, + copula_real=CopulaGaussian(var_rand_real, df_corr, source), + copula_err=new_model.density.copula_err, + ) + else: + new_model.density = Density( + marginals=model.density.marginals, + copula_err=CopulaGaussian(var_rand_err, df_corr, source), + copula_real=new_model.density.copula_real, + ) new_model.update() return new_model - else: - raise ValueError("Must provide df_corr or df_data") + raise ValueError("Must provide df_corr or df_data") cp_copula_gaussian = add_pipe(comp_copula_gaussian) diff --git a/grama/core.py b/grama/core.py index 58c7aa1..2865170 100644 --- a/grama/core.py +++ b/grama/core.py @@ -20,12 +20,25 @@ from .tools import tran_outer from abc import ABC, abstractmethod from itertools import chain -from numpy import ones, zeros, triu_indices, eye, array, Inf, NaN, sqrt, \ - dot, diag, isfinite, prod, exp +from numpy import ( + ones, + zeros, + triu_indices, + eye, + array, + Inf, + NaN, + sqrt, + dot, + diag, + isfinite, + prod, + exp, +) from numpy import min as npmin from numpy import max as npmax from numpy.linalg import cholesky, det, inv -from numpy.random import random, multivariate_normal +from numpy.random import random, multivariate_normal, rand from numpy.random import seed as set_seed from pandas import DataFrame, concat from scipy.linalg import det, LinAlgError, solve @@ -114,8 +127,7 @@ def eval(self, df): return DataFrame(data=results, columns=self.out) def summary(self): - """Returns a summary string - """ + """Returns a summary string""" return "{0:}: {1:} -> {2:}".format(self.name, self.var, self.out) @@ -144,8 +156,7 @@ def copy(self): class FunctionModel(Function): - """gr.Model as gr.Function - """ + """gr.Model as gr.Function""" def __init__(self, md, ev=None, var=None, out=None): """Model-Function constructor @@ -278,15 +289,16 @@ def get_nominal(self, var): def bound_summary(self, var): if var in self.bounds.keys(): return "{0:}: [{1:}, {2:}]".format( - var, self.bounds[var][0], self.bounds[var][1], + var, + self.bounds[var][0], + self.bounds[var][1], ) return "{0:}: (unbounded)".format(var) ## Copula base class class Copula(ABC): - """Parent class for copulas - """ + """Parent class for copulas""" @abstractmethod def __init__(self): @@ -298,32 +310,27 @@ def copy(self): @abstractmethod def sample(self, n=1): - r"""Draw a sample of a given size - """ + r"""Draw a sample of a given size""" raise NotImplementedError @abstractmethod def d(self, u): - r"""Copula density - """ + r"""Copula density""" raise NotImplementedError @abstractmethod def u2z(self, u): - r"""Transform from [0, 1]^d to sample space - """ + r"""Transform from [0, 1]^d to sample space""" raise NotImplementedError @abstractmethod def z2u(self, z): - r"""Transform from sample space to [0, 1]^d - """ + r"""Transform from sample space to [0, 1]^d""" raise NotImplementedError @abstractmethod def dudz(self, z): - r"""Jacobian of copula transform - """ + r"""Jacobian of copula transform""" raise NotImplementedError @abstractmethod @@ -332,7 +339,7 @@ def summary(self): class CopulaIndependence(Copula): - def __init__(self, var_rand): + def __init__(self, var_rand, source="real"): """Constructor Args: @@ -343,6 +350,11 @@ def __init__(self, var_rand): """ self.var_rand = var_rand + if source not in ["real", "error"]: + raise ValueError( + "Your source of variability must be either 'real' or 'error'!" + ) + self.source = source def copy(self): """Copy @@ -351,11 +363,11 @@ def copy(self): gr.CopulaIndependence: Copy of present copula """ - cop = CopulaIndependence(var_rand=self.var_rand) + cop = CopulaIndependence(var_rand=self.var_rand, source=self.source) return cop - def sample(self, n=1, seed=None): + def sample(self, n=1, seed=None, var_name=None): """Draw samples from copula Args: @@ -369,7 +381,12 @@ def sample(self, n=1, seed=None): if seed is not None: set_seed(seed) - return DataFrame(data=random((n, len(self.var_rand))), columns=self.var_rand) + if var_name is not None: + return DataFrame(data=random((n)), columns=var_name) + else: + return DataFrame( + data=random((n, len(self.var_rand))), columns=self.var_rand + ) def d(self, u): """Density function @@ -411,7 +428,7 @@ def dudz(self, z): """Jacobian Args: - z (array-like): + z (array-like) Returns: array: @@ -420,11 +437,11 @@ def dudz(self, z): return diag(norm.pdf(z)) def summary(self): - return "Independence copula" + return f"Independence copula (source: {self.source})" class CopulaGaussian(Copula): - def __init__(self, var_rand, df_corr): + def __init__(self, var_rand, df_corr, source="real"): """Constructor Args: @@ -464,7 +481,7 @@ def __init__(self, var_rand, df_corr): except LinAlgError: warnings.warn( "Correlation structure is not positive-definite; copula transforms not available", - RuntimeWarning + RuntimeWarning, ) Sigma_h = None Sigma_i = None @@ -473,6 +490,11 @@ def __init__(self, var_rand, df_corr): self.df_corr = df_corr self.var_rand = var_rand + if source not in ["real", "error"]: + raise ValueError( + "Your source of variability must be either 'real' or 'error'!" + ) + self.source = source self.Sigma = Sigma self.Sigma_h = Sigma_h self.Sigma_i = Sigma_i @@ -487,8 +509,7 @@ def copy(self): Returns: gr.CopulaGaussian: """ - cop = CopulaGaussian(self.var_rand, self.df_corr.copy()) - + cop = CopulaGaussian(self.var_rand, self.df_corr.copy(), self.source) return cop def sample(self, n=1, seed=None): @@ -533,9 +554,9 @@ def d(self, u): for i in range(n_obs): v = norm.ppf(u[i, :]) - l_values[i] = exp( - -0.5*dot(v, dot( self.Sigma_i - eye(n_dim), v)) - ) / sqrt(self.det) + l_values[i] = exp(-0.5 * dot(v, dot(self.Sigma_i - eye(n_dim), v))) / sqrt( + self.det + ) return l_values @@ -579,7 +600,7 @@ def dudz(self, z): return dot(self.Sigma_h.T, diag(norm.pdf(dot(self.Sigma_h, z)))) def summary(self): - return "Gaussian copula with correlations:\n{}".format(self.df_corr) + return f"Gaussian copula (source: {self.source}) with correlations:\n{self.df_corr}" ## Density parent class @@ -594,7 +615,7 @@ class Density: """ - def __init__(self, marginals=None, copula=None): + def __init__(self, marginals=None, copula_real=None, copula_err=None): """Constructor Construct a grama density. Generally not called directly; preferred @@ -609,7 +630,8 @@ def __init__(self, marginals=None, copula=None): """ self.marginals = marginals - self.copula = copula + self.copula_err = copula_err + self.copula_real = copula_real def copy(self): try: @@ -621,14 +643,67 @@ def copy(self): new_marginals = {} try: - new_copula = self.copula.copy() + new_copula_err = self.copula_err.copy() + except AttributeError: + new_copula_err = None + + try: + new_copula_real = self.copula_real.copy() except AttributeError: - new_copula = None + new_copula_real = None - new_density = Density(marginals=new_marginals, copula=new_copula) + new_density = Density( + marginals=new_marginals, + copula_err=new_copula_err, + copula_real=new_copula_real, + ) return new_density + def check_valid_sources(self): + """ + Checks that types of copula within the density match the sources of the marginals. + """ + copula_sources = [] + copula_vars = [] + if self.copula_real is not None: + copula_sources.append("real") + copula_vars = copula_vars + self.copula_real.var_rand + if self.copula_err is not None: + copula_sources.append("error") + copula_vars = copula_vars + self.copula_err.var_rand + + marginal_sources = [] + for var in self.marginals.keys(): + marginal_sources.append(self.marginals[var].source) + if self.copula_err is not None and self.copula_real is not None: + if var in self.copula_real.var_rand and var in self.copula_err.var_rand: + raise ValueError( + f"Random variable {var} belongs to mulitple copulas." + ) + if var not in copula_vars: + raise ValueError( + f"Random variable {var} does not belong to any copula." + ) + if self.marginals[var].source not in copula_sources: + raise ValueError( + f"Copula with a source type of '{self.marginals[var].source}' does not exist." + ) + + for source in copula_sources: + if source not in marginal_sources: + raise ValueError( + f"Copula of source type {source} has no matching variables." + ) + + # old checking method below + marginal_sources = set(marginal_sources) + + if sorted(copula_sources) != sorted(marginal_sources): + raise ValueError( + "Sources of variability within copulas do not match sources within marginal values." + ) + def d(self, df): r"""Evaluate PDF @@ -639,18 +714,37 @@ def d(self, df): """ # Get variable names - var = [key for key, _ in self.marginals.items()] - df_u = self.sample2pr(df)[var] + var = [] + var_real = [] + var_err = [] + for key in self.marginals.keys(): + var.append(key) + if self.marginals[var].source == "real": + var_real.append(key) + else: + var_err.append(key) + # Evaluate copula density - l_copula = self.copula.d(df_u.values) + if self.copula_real is None: + l_copula_real = 1 + else: + df_u_real = self.sample2pr(df)[var_real] + l_copula_real = self.copula_real.d(df_u_real.values) + + if self.copula_err is None: + l_copula_err = 1 + else: + df_u_err = self.sample2pr(df)[var_err] + l_copula_err = self.copula_err.d(df_u_err.values) + # Evaluate marginal densities L_marginals = zeros((df.shape[0], len(var))) for i, v in enumerate(var): L_marginals[:, i] = self.marginals[v].d(df[v]) l_marginals = prod(L_marginals, axis=1) - return l_copula * l_marginals - + # return l_copula * l_marginals + return l_copula_real * l_copula_err * l_marginals def pr2sample(self, df_prval): """Convert CDF probabilities to samples @@ -740,7 +834,7 @@ def sample2pr(self, df_sample): return DataFrame(data=prval, columns=var_comp) - def sample(self, n=1, seed=None): + def sample(self, n=None, n_r=None, n_e=None, seed=None, source_type="real"): """Draw samples from joint density Draw samples according to joint density using marginal and copula @@ -754,9 +848,8 @@ def sample(self, n=1, seed=None): DataFrame: Joint density samples """ - if not (self.copula is None): - df_pr = self.copula.sample(n=n, seed=seed) - else: + self.check_valid_sources() + if self.copula_real is None and self.copula_err is None: raise ValueError( "\n" + "Present model copula must be defined for sampling.\n" @@ -765,24 +858,49 @@ def sample(self, n=1, seed=None): + "Variable Modeling for more information.\n" + "https://py-grama.readthedocs.io/en/latest/source/rv_modeling.html" ) + + # check for different source cases + if source_type == "real": + df_pr = self.copula_real.sample(n=n_r, seed=seed) + elif source_type == "error": + df_pr = self.copula_err.sample(n=n_e, seed=seed) + elif source_type == "mixed": + df_real = self.copula_real.sample(n=n_r, seed=seed) + df_err = self.copula_err.sample(n=n_e, seed=seed) + df_pr = tran_outer(df_real, df_err) + elif source_type == "mixed_standard": + df_real = self.copula_real.sample(n=n, seed=seed) + df_err = self.copula_err.sample(n=n, seed=seed) + df_pr = concat([df_real, df_err], axis=1) + else: + raise ValueError( + "Invalid source_type argument. Source type may only be 'real', 'error', 'mixed', or 'mixed_standard'." + ) + return self.pr2sample(df_pr) def summary_marginal(self, var): return "{0:}: {1:}".format(var, self.marginals[var].summary()) def summary_copula(self): - if not (self.copula is None): - return self.copula.summary() - return None + summary = "" + if self.copula_real is not None: + summary += f"{self.copula_real.summary()}\n " + if self.copula_err is not None: + summary += f"{self.copula_err.summary()}" + return summary # Model parent class class Model: - """Parent class for grama models. - """ + """Parent class for grama models.""" def __init__( - self, name=None, functions=None, domain=None, density=None, + self, + name=None, + functions=None, + domain=None, + density=None, ): r"""Constructor @@ -864,6 +982,17 @@ def update(self): self.var_rand = [] self.var_det = list(set(self.var).difference(self.var_rand)) + self.source_list = [] + self.var_rand_real = [] + self.var_rand_err = [] + for key_ind in range(0, len(self.var_rand)): + var_key = list(self.var_rand)[key_ind] + self.source_list.append(self.density.marginals[var_key].source) + if self.density.marginals[var_key].source == "real": + self.var_rand_real.append(var_key) + else: + self.var_rand_err.append(var_key) + ## TODO parameters ## Convenience constants @@ -941,12 +1070,12 @@ def evaluate_df(self, df): """ ## Check invariant; model inputs must be subset of df columns - var_diff = set(self.var).difference(set(df.columns)) - if len(var_diff) != 0: - raise ValueError( - "Model inputs not a subset of given columns;\n" - + "missing var = {}".format(var_diff) - ) + # var_diff = set(self.var).difference(set(df.columns)) + # if len(var_diff) != 0: + # raise ValueError( + # "Model inputs not a subset of given columns;\n" + # + "missing var = {}".format(var_diff) + # ) df_tmp = df.copy().drop(self.out, axis=1, errors="ignore") ## Evaluate each function @@ -1107,8 +1236,7 @@ def norm2rand(self, df): return DataFrame(data=data, columns=self.var_rand) def name_corr(self): - """Name the correlation elements - """ + """Name the correlation elements""" raise NotImplementedError ## Build matrix of names corr_mat = [] @@ -1130,8 +1258,7 @@ def name_corr(self): ## Infrastructure # ------------------------- def copy(self): - """Make a copy of this model - """ + """Make a copy of this model""" new_model = Model( name=self.name, functions=copy.deepcopy(self.functions), @@ -1150,19 +1277,23 @@ def string_rep(self): "", " inputs:", " var_det:", - "".join([ - " {}\n".format(self.domain.bound_summary(var_det)) - for var_det in self.var_det - ]), + "".join( + [ + " {}\n".format(self.domain.bound_summary(var_det)) + for var_det in self.var_det + ] + ), " var_rand:", ] try: l = l + [ - "".join([ - " {}\n".format(self.density.summary_marginal(var_rand)) - for var_rand in self.density.marginals.keys() - ]), + "".join( + [ + " {}\n".format(self.density.summary_marginal(var_rand)) + for var_rand in self.density.marginals.keys() + ] + ), ] except AttributeError: l = l + ["\n"] @@ -1171,10 +1302,9 @@ def string_rep(self): " copula:", " {}\n".format(self.density.summary_copula()), " functions:", - "".join([ - " {}\n".format(function.summary()) - for function in self.functions - ]) + "".join( + [" {}\n".format(function.summary()) for function in self.functions] + ), ] return "\n".join(l) @@ -1186,13 +1316,11 @@ def __repr__(self): return self.string_rep() def printpretty(self): - """Formatted print of model attributes - """ + """Formatted print of model attributes""" print(self.string_rep()) def make_dag(self, expand=set()): - """Generate a DAG for the model - """ + """Generate a DAG for the model""" G = nx.DiGraph() ## Inputs-to-Functions @@ -1275,8 +1403,7 @@ def make_dag(self, expand=set()): return G def show_dag(self, expand=set()): - """Generate and show a DAG for the model - """ + """Generate and show a DAG for the model""" from matplotlib.pyplot import show as pltshow G = self.make_dag(expand=expand) @@ -1285,7 +1412,16 @@ def show_dag(self, expand=set()): warnings.simplefilter("ignore") ## Plotting edge_labels = dict( - [((u, v,), d["label"]) for u, v, d in G.edges(data=True)] + [ + ( + ( + u, + v, + ), + d["label"], + ) + for u, v, d in G.edges(data=True) + ] ) n = G.size() diff --git a/grama/eval_defaults.py b/grama/eval_defaults.py index 7b22229..150a70d 100644 --- a/grama/eval_defaults.py +++ b/grama/eval_defaults.py @@ -24,6 +24,7 @@ formatwarning = custom_formatwarning + def invariants_eval_model(md, skip=False): r"""Helper function to group common model argument invariant checks for eval functions. @@ -40,17 +41,21 @@ def invariants_eval_model(md, skip=False): if md is None: raise TypeError("No input model given") elif isinstance(md, tuple): - raise TypeError("Given model argument is type tuple. Have you " + - "declared your model with an extra comma after the closing `)`?") + raise TypeError( + "Given model argument is type tuple. Have you " + + "declared your model with an extra comma after the closing `)`?" + ) else: - raise TypeError("Type gr.Model was expected, a " + str(type(md)) + - " was passed.") + raise TypeError( + "Type gr.Model was expected, a " + str(type(md)) + " was passed." + ) ## Value checking if not skip and len(md.functions) == 0: raise ValueError("Given model has no functions.") return + def invariants_eval_df(df, arg_name="df", valid_str=None, acc_none=False): r"""Helper function to group common DataFrame argument invariant checks for eval functions. @@ -69,6 +74,7 @@ def invariants_eval_df(df, arg_name="df", valid_str=None, acc_none=False): invariants_eval_df(df_test, arg_name="df_test", acc_none=()) """ + def aps(string): r"""Helper function for valid_args_msg() to put apostrophes around a string. @@ -91,7 +97,7 @@ def valid_args_msg(df_arg, acc_str, valid_str): Returns: String""" - msg = df_arg + " must be DataFrame" # general msg for valid args + msg = df_arg + " must be DataFrame" # general msg for valid args if acc_str: # add on string options to msg if len(valid_str) == 1: @@ -117,17 +123,27 @@ def valid_args_msg(df_arg, acc_str, valid_str): if df is None: if not acc_none: # allow "None" df if None accepted - raise TypeError("No " + arg_name + " argument given. " + - valid_args_msg(arg_name, acc_str, valid_str)) + raise TypeError( + "No " + + arg_name + + " argument given. " + + valid_args_msg(arg_name, acc_str, valid_str) + ) elif isinstance(df, str) and acc_str: # case check for invalid str input if df not in valid_str: - raise ValueError(arg_name + " shortcut string invalid. " + - valid_args_msg(arg_name, acc_str, valid_str)) + raise ValueError( + arg_name + + " shortcut string invalid. " + + valid_args_msg(arg_name, acc_str, valid_str) + ) else: - raise TypeError(valid_args_msg(arg_name, acc_str, valid_str) + - " Given argument is type " + str(type(df)) + - ". ") + raise TypeError( + valid_args_msg(arg_name, acc_str, valid_str) + + " Given argument is type " + + str(type(df)) + + ". " + ) ## Value checking #### TO DO @@ -135,7 +151,6 @@ def valid_args_msg(df_arg, acc_str, valid_str): return - ## Default evaluation function # -------------------------------------------------- @curry @@ -191,7 +206,9 @@ def eval_df(model, df=None, append=True, verbose=True): ## Linear Uncertainty Propagation # -------------------------------------------------- @curry -def eval_linup(model, df_base=None, append=True, decomp=False, decimals=2, n=1e4, seed=None): +def eval_linup( + model, df_base=None, append=True, decomp=False, decimals=2, n=1e4, seed=None +): r"""Linear uncertainty propagation Approximates the variance of output models using a linearization of functions---linear uncertainty propagation. Optionally decomposes the output variance according to additive factors from each input. @@ -231,7 +248,9 @@ def eval_linup(model, df_base=None, append=True, decomp=False, decimals=2, n=1e4 df_base = eval_df(model, df=df_base) ## Approximate the covariance matrix - df_sample = eval_sample(model, n=n, seed=seed, df_det=df_base[model.var_det], skip=True) + df_sample = eval_sample( + model, n=n, seed=seed, df_det=df_base[model.var_det], skip=True + ) cov = df_sample[model.var_rand].cov() ## Approximate the gradient @@ -242,10 +261,7 @@ def eval_linup(model, df_base=None, append=True, decomp=False, decimals=2, n=1e4 for out in model.out: for i in range(df_grad.shape[0]): # Build gradient column names - var_names = map( - lambda s: "D" + out + "_D" + s, - model.var_rand - ) + var_names = map(lambda s: "D" + out + "_D" + s, model.var_rand) # Extract gradient values grad = df_grad.iloc[i][var_names].values.flatten() # Approximate variance @@ -263,8 +279,8 @@ def eval_linup(model, df_base=None, append=True, decomp=False, decimals=2, n=1e4 U, V = triu_indices(sens_mat.shape[0]) sens_values = [ round( - sens_mat[U[j], V[j]] * (1 + (U[j] != V[j])), # Double off-diag - decimals=decimals + sens_mat[U[j], V[j]] * (1 + (U[j] != V[j])), # Double off-diag + decimals=decimals, ) for j in range(len(U)) ] @@ -273,19 +289,17 @@ def eval_linup(model, df_base=None, append=True, decomp=False, decimals=2, n=1e4 for j in range(len(U)) ] - df_sens = DataFrame({ - "var_frac": sens_values, - "var_rand": sens_var - }) + df_sens = DataFrame({"var_frac": sens_values, "var_rand": sens_var}) df_tmp = tran_outer(df_tmp, df_sens) df_res = concat( - (df_res,df_tmp), + (df_res, df_tmp), axis=0, ).reset_index(drop=True) return df_res + ev_linup = add_pipe(eval_linup) @@ -322,8 +336,9 @@ def eval_nominal(model, df_det=None, append=True, skip=False): """ ## Perform common invariant tests invariants_eval_model(model, skip) - invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"], - acc_none=(model.n_var_det==0)) + invariants_eval_df( + df_det, arg_name="df_det", valid_str=["nom"], acc_none=(model.n_var_det == 0) + ) ## Draw from underlying gaussian quantiles = ones((1, model.n_var_rand)) * 0.5 # Median @@ -498,8 +513,9 @@ def eval_conservative(model, quantiles=None, df_det=None, append=True, skip=Fals """ ## Check invariants invariants_eval_model(model, skip) - invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"], - acc_none=(model.n_var_det==0)) + invariants_eval_df( + df_det, arg_name="df_det", valid_str=["nom"], acc_none=(model.n_var_det == 0) + ) ## Default behavior if quantiles is None: @@ -540,7 +556,18 @@ def eval_conservative(model, quantiles=None, df_det=None, append=True, skip=Fals ## Random sampling # -------------------------------------------------- @curry -def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False, comm=True, ind_comm=None): +def eval_sample( + model, + n=None, + n_r=None, + n_e=None, + df_det=None, + seed=None, + append=True, + skip=False, + comm=True, + ind_comm=None, +): r"""Draw a random sample Evaluates a model with a random sample of the random model inputs. Generates outer product with deterministic levels (common random numbers) OR generates a sample fully-independent of deterministic levels (non-common random numbers). @@ -637,37 +664,185 @@ def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False, """ ## Check invariants invariants_eval_model(model, skip) - invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"], - acc_none=(model.n_var_det==0)) - if n is None: + invariants_eval_df( + df_det, arg_name="df_det", valid_str=["nom"], acc_none=(model.n_var_det == 0) + ) + if n is None and n_r is None and n_e is None: raise ValueError("Must provide a valid n value.") ## Set seed only if given if seed is not None: set_seed(seed) - ## Ensure sample count is int - if not isinstance(n, Integral): + # Ensure sample count is int + if not isinstance(n, Integral) and n is not None: print("eval_sample() is rounding n...") n = int(n) - ## Draw realizations - # Common random numbers - if comm: - df_rand = model.density.sample(n=n, seed=seed) - if not ind_comm is None: - df_rand[ind_comm] = df_rand.index - df_samp = model.var_outer(df_rand, df_det=df_det) - # Non-common random numbers + # Ensure sample count is int + if not isinstance(n_r, Integral) and n_r is not None: + print("eval_sample() is rounding n_r...") + n_r = int(n_r) + + # Ensure sample count is int + if not isinstance(n_e, Integral) and n_e is not None: + print("eval_sample() is rounding n_e...") + n_e = int(n_e) + + ## Check for mixed variability sources + if "real" in model.source_list and "error" in model.source_list: + if n is not None and n_r is None and n_e is None: + source_type = "mixed_standard" + if comm: + df_rand_data = model.density.sample( + n=n, seed=seed, source_type=source_type + ) + + df_rand_ind = DataFrame({"ind": [*range(0, n, 1)]}) + + df_rand = concat([df_rand_ind, df_rand_data], axis=1) + + df_samp = model.var_outer(df_rand, df_det=df_det) + # Non-common random numbers + else: + df_rand = model.density.sample(n=n * df_det.shape[0], seed=seed) + if not ind_comm is None: + df_rand[ind_comm] = df_rand.index + df_samp = concat( + ( + df_rand, + concat([df_det[model.var_det]] * n, axis=0).reset_index( + drop=True + ), + ), + axis=1, + ).reset_index(drop=True) + + else: + if n_r is None or n_e is None: + raise ValueError("Must provide both a valid n_r value and n_e value.") + + source_type = "mixed" + ## Draw realizations + # Common random numbers + if comm: + df_rand_data = model.density.sample( + n_r=n_r, n_e=n_e, seed=seed, source_type=source_type + ) + + df_r_ind = DataFrame({"ind_r": [*range(0, n_r, 1)]}) + df_e_ind = DataFrame({"ind_e": [*range(0, n_e, 1)]}) + df_rand_ind = tran_outer(df_r_ind, df_e_ind) + + df_rand = concat([df_rand_ind, df_rand_data], axis=1) + + df_samp = model.var_outer(df_rand, df_det=df_det) + # Non-common random numbers + else: + df_rand = model.density.sample(n_r=n_r * df_det.shape[0], seed=seed) + if not ind_comm is None: + df_rand[ind_comm] = df_rand.index + df_samp = concat( + ( + df_rand, + concat([df_det[model.var_det]] * n_r, axis=0).reset_index( + drop=True + ), + ), + axis=1, + ).reset_index(drop=True) + elif "error" in model.source_list: + if n_e is None: + if n is not None: + n_e = n + else: + raise ValueError("Must provide a valid n_e value.") + + source_type = "error" + ## Draw realizations + # Common random numbers + if comm: + df_rand_data = model.density.sample( + n_e=n_e, seed=seed, source_type=source_type + ) + df_rand_ind = DataFrame({"ind_e": [*range(0, n_e, 1)]}) + df_rand = concat([df_rand_ind, df_rand_data], axis=1) + + df_samp = model.var_outer(df_rand, df_det=df_det) + # Non-common random numbers + else: + df_rand = model.density.sample( + n_e=n_e * df_det.shape[0], seed=seed, source_type=source_type + ) + if not ind_comm is None: + df_rand[ind_comm] = df_rand.index + df_samp = concat( + ( + df_rand, + concat([df_det[model.var_det]] * n_e, axis=0).reset_index( + drop=True + ), + ), + axis=1, + ).reset_index(drop=True) else: - df_rand = model.density.sample(n=n * df_det.shape[0], seed=seed) - if not ind_comm is None: - df_rand[ind_comm] = df_rand.index - df_samp = concat( - (df_rand, concat([df_det[model.var_det]]*n, axis=0).reset_index(drop=True)), - axis=1, - ).reset_index(drop=True) + if n_r is None: + if n is not None: + n_r = n + else: + raise ValueError("Must provide a valid n_r value.") + + source_type = "real" + ## Draw realizations + # Common random numbers + if comm: + df_rand_data = model.density.sample( + n_r=n_r, seed=seed, source_type=source_type + ) + df_rand_ind = DataFrame({"ind_r": [*range(0, n_r, 1)]}) + df_rand = concat([df_rand_ind, df_rand_data], axis=1) + df_samp = model.var_outer(df_rand, df_det=df_det) + # Non-common random numbers + else: + df_rand = model.density.sample( + n_r=n_r * df_det.shape[0], seed=seed, source_type=source_type + ) + if not ind_comm is None: + df_rand[ind_comm] = df_rand.index + df_samp = concat( + ( + df_rand, + concat([df_det[model.var_det]] * n_r, axis=0).reset_index( + drop=True + ), + ), + axis=1, + ).reset_index(drop=True) + + # ## Draw realizations + # # Common random numbers + # if comm: + # df_rand = model.density.sample(n=n, seed=seed) + # df_e = DataFrame( + # {"ind_e": [*range(0, n_e, 1)]} + # ) + + # df_r = DataFrame( + # {"ind_r": [*range(0, n_r, 1)]} + # ) + # if not ind_comm is None: + # df_rand[ind_comm] = df_rand.index + # df_samp = model.var_outer(df_rand, df_det=df_det) + # # Non-common random numbers + # else: + # df_rand = model.density.sample(n=n * df_det.shape[0], seed=seed) + # if not ind_comm is None: + # df_rand[ind_comm] = df_rand.index + # df_samp = concat( + # (df_rand, concat([df_det[model.var_det]]*n, axis=0).reset_index(drop=True)), + # axis=1, + # ).reset_index(drop=True) if skip: ## Evaluation estimate @@ -684,7 +859,6 @@ def eval_sample(model, n=None, df_det=None, seed=None, append=True, skip=False, return df_samp - df_res = eval_df(model, df=df_samp, append=append) ## Attach metadata with catch_warnings(): diff --git a/grama/marginals.py b/grama/marginals.py index 1bf446d..1222338 100644 --- a/grama/marginals.py +++ b/grama/marginals.py @@ -19,21 +19,103 @@ from numpy.random import uniform as runif from pandas import DataFrame from scipy.optimize import root_scalar, root -from scipy.stats import alpha, anglit, arcsine, argus, beta, betaprime, \ - bradford, burr, burr12, cauchy, chi, chi2, cosine, crystalball, dgamma, \ - dweibull, erlang, expon, exponnorm, exponweib, exponpow, f, fatiguelife, \ - fisk, foldcauchy, foldnorm, gaussian_kde, genlogistic, gennorm, genpareto, \ - genexpon, genextreme, gausshyper, gamma, gengamma, genhalflogistic, \ - gibrat, gompertz, gumbel_r, gumbel_l, halfcauchy, halflogistic, \ - halfnorm, halfgennorm, hypsecant, invgamma, invgauss, invweibull, \ - johnsonsb, johnsonsu, kappa4, kappa3, ksone, kstwobign, laplace, levy, \ - levy_l, levy_stable, logistic, loggamma, loglaplace, lognorm, lomax, \ - maxwell, mielke, moyal, nakagami, ncx2, ncf, nct, norm, norminvgauss, \ - pareto, pearson3, powerlaw, powerlognorm, powernorm, rdist, rayleigh, \ - rice, recipinvgauss, skewnorm, t, trapz, triang, truncexpon, truncnorm, \ - tukeylambda, uniform, vonmises, vonmises_line, wald, weibull_min, \ - weibull_max, wrapcauchy - +from scipy.stats import ( + alpha, + anglit, + arcsine, + argus, + beta, + betaprime, + bradford, + burr, + burr12, + cauchy, + chi, + chi2, + cosine, + crystalball, + dgamma, + dweibull, + erlang, + expon, + exponnorm, + exponweib, + exponpow, + f, + fatiguelife, + fisk, + foldcauchy, + foldnorm, + gaussian_kde, + genlogistic, + gennorm, + genpareto, + genexpon, + genextreme, + gausshyper, + gamma, + gengamma, + genhalflogistic, + gibrat, + gompertz, + gumbel_r, + gumbel_l, + halfcauchy, + halflogistic, + halfnorm, + halfgennorm, + hypsecant, + invgamma, + invgauss, + invweibull, + johnsonsb, + johnsonsu, + kappa4, + kappa3, + ksone, + kstwobign, + laplace, + levy, + levy_l, + levy_stable, + logistic, + loggamma, + loglaplace, + lognorm, + lomax, + maxwell, + mielke, + moyal, + nakagami, + ncx2, + ncf, + nct, + norm, + norminvgauss, + pareto, + pearson3, + powerlaw, + powerlognorm, + powernorm, + rdist, + rayleigh, + rice, + recipinvgauss, + skewnorm, + t, + trapz, + triang, + truncexpon, + truncnorm, + tukeylambda, + uniform, + vonmises, + vonmises_line, + wald, + weibull_min, + weibull_max, + wrapcauchy, +) ## Scipy metadata valid_dist = { @@ -243,11 +325,15 @@ ################################################## # Marginal parent class class Marginal(ABC): - """Parent class for marginal distributions - """ + """Parent class for marginal distributions""" - def __init__(self, sign=0): + def __init__(self, source="real", sign=0): self.sign = sign + if source not in ["real", "error"]: + raise ValueError( + "Your source of variability must be either 'real' or 'error'!" + ) + self.source = source @abstractmethod def copy(self): @@ -296,13 +382,15 @@ class MarginalNamed(Marginal): def __init__(self, d_name=None, d_param=None, **kw): super().__init__(**kw) - self.d_name = d_name self.d_param = d_param def copy(self): new_marginal = MarginalNamed( - sign=self.sign, d_name=self.d_name, d_param=copy.deepcopy(self.d_param) + sign=self.sign, + source=self.source, + d_name=self.d_name, + d_param=copy.deepcopy(self.d_param), ) return new_marginal @@ -331,11 +419,12 @@ def q(self, p): def summary(self, dig=2): stats = valid_dist[self.d_name](**self.d_param).stats("mvsk") param = { + "source": self.source, "mean": "{0:4.3e}".format(stats[0].round(dig)), "s.d.": "{0:4.3e}".format(sqrt(stats[1]).round(dig)), "COV": round(sqrt(stats[1]) / stats[0], dig), "skew.": stats[2].round(dig), - "kurt.": stats[3].round(dig) + 3, # full kurtosis + "kurt.": stats[3].round(dig) + 3, # full kurtosis } return "({0:+}) {1:}, {2:}".format(self.sign, self.d_name, param) @@ -352,7 +441,10 @@ def __init__(self, kde, atol=1e-6, **kw): self._set_bracket() def copy(self): - new_marginal = MarginalGKDE(kde=copy.deepcopy(self.kde), atol=self.atol,) + new_marginal = MarginalGKDE( + kde=copy.deepcopy(self.kde), + atol=self.atol, + ) return new_marginal @@ -432,20 +524,22 @@ def summary(self): self.bracket[0], self.bracket[1], self.atol ) + ## Marginal functions ################################################## def marg_mom( - dist, - mean=None, - sd=None, - cov=None, - var=None, - skew=None, - kurt=None, - kurt_excess=None, - floc=None, - sign=0, - dict_x0=None, + dist, + mean=None, + sd=None, + cov=None, + var=None, + skew=None, + kurt=None, + kurt_excess=None, + floc=None, + sign=0, + dict_x0=None, + source="real", ): r"""Fit scipy.stats continuous distribution via moments @@ -508,35 +602,29 @@ def marg_mom( if mean is None: raise ValueError("Must provide `mean` argument.") if (sd is None) and (var is None) and (cov is None): - raise ValueError( - "One of `sd`, `cov`, or `var` must be provided." - ) + raise ValueError("One of `sd`, `cov`, or `var` must be provided.") if sum([(not sd is None), (not var is None), (not cov is None)]) > 1: - raise ValueError( - "Only one of `sd`, `cov`, and `var` may be provided." - ) + raise ValueError("Only one of `sd`, `cov`, and `var` may be provided.") if (not kurt is None) and (not kurt_excess is None): - raise ValueError( - "Only one of `kurt` and `kurt_excess` may be provided." - ) + raise ValueError("Only one of `kurt` and `kurt_excess` may be provided.") ## Process arguments # Transform to "standard" moments - if (not sd is None): + if not sd is None: var = sd**2 - if (not cov is None): - var = (mean * cov)**2 - if (not kurt is None): + if not cov is None: + var = (mean * cov) ** 2 + if not kurt is None: kurt_excess = kurt - 3 # Build up target moments s = "mv" m_target = array([mean, var]) - if (not skew is None): + if not skew is None: s = s + "s" m_target = concatenate((m_target, array([skew]))) - if (not kurt is None): + if not kurt is None: s = s + "k" m_target = concatenate((m_target, array([kurt_excess]))) n_provided = len(s) @@ -561,16 +649,18 @@ def marg_mom( key_wk = {key for key in param_dist[dist] if key != "loc"} if floc is None: + def _obj(v): kw = dict(zip(key_wk, v)) return array(valid_dist[dist](**kw).stats(s)) - m_target + else: + def _obj(v): kw = dict(zip(key_wk, v)) kw["loc"] = floc return array(valid_dist[dist](**kw).stats(s)) - m_target - ## Generate initial guess if dict_x0 is None: if dist == "lognorm": @@ -598,8 +688,8 @@ def _obj(v): df=10, c=10, beta=1, - #K=None, - #chi=None, + # K=None, + # chi=None, ) # Repackage for optimizer @@ -612,15 +702,16 @@ def _obj(v): if res.success is False: raise RuntimeError( "Moment matching failed; initial guess may be poor, or requested " - "moments may be infeasible. Try setting `dict_x0`. " + - "Printing optimization results for debugging:\n\n{}".format(res) + "moments may be infeasible. Try setting `dict_x0`. " + + "Printing optimization results for debugging:\n\n{}".format(res) ) ## Repackage and return param = dict(zip(key_wk, res.x)) if floc is not None: param["loc"] = floc - return MarginalNamed(sign=sign, d_name=dist, d_param=param) + return MarginalNamed(sign=sign, d_name=dist, d_param=param, source=source) + ## Fit a named scipy.stats distribution def marg_fit(dist, data, name=True, sign=None, **kwargs):