Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Confidence intervals #27

Merged
merged 6 commits into from
Jun 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 4 additions & 22 deletions alea/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Parameter:
relative_uncertainty (bool, optional): Indicates if the uncertainty is relative to the nominal_value.
blueice_anchors (list, optional): Anchors for blueice template morphing.
fit_limits (tuple, optional): The limits for fitting the parameter.
parameter_interval_bounds (tupe, optional): limits for computing confidence intervals
fit_guess (float, optional): The initial guess for fitting the parameter.
description (str, optional): A description of the parameter.
"""
Expand All @@ -31,6 +32,7 @@ def __init__(
relative_uncertainty: Optional[bool] = None,
blueice_anchors: Optional[List] = None,
fit_limits: Optional[Tuple] = None,
parameter_interval_bounds: Optional[Tuple] = None,
fit_guess: Optional[float] = None,
description: Optional[str] = None,
):
Expand All @@ -42,6 +44,7 @@ def __init__(
self.relative_uncertainty = relative_uncertainty
self.blueice_anchors = blueice_anchors
self.fit_limits = fit_limits
self.parameter_interval_bounds = parameter_interval_bounds
self._fit_guess = fit_guess
self.description = description

Expand Down Expand Up @@ -128,28 +131,7 @@ def from_config(cls, config: Dict[str, dict]):
"""
parameters = cls()
for name, param_config in config.items():
nominal_value = param_config.get("nominal_value", None)
fittable = param_config.get("fittable", True)
ptype = param_config.get("ptype", None)
uncertainty = param_config.get("uncertainty", None)
relative_uncertainty = param_config.get(
"relative_uncertainty", None)
blueice_anchors = param_config.get("blueice_anchors", None)
fit_limits = param_config.get("fit_limits", None)
fit_guess = param_config.get("fit_guess", None)
description = param_config.get("description", None)
parameter = Parameter(
name=name,
nominal_value=nominal_value,
fittable=fittable,
ptype=ptype,
uncertainty=uncertainty,
relative_uncertainty=relative_uncertainty,
blueice_anchors=blueice_anchors,
fit_limits=fit_limits,
fit_guess=fit_guess,
description=description
)
parameter = Parameter(name=name, **param_config)
parameters.add_parameter(parameter)
return parameters

Expand Down
131 changes: 124 additions & 7 deletions alea/statistical_model.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import inspect
from inference_interface import toydata_from_file, toydata_to_file
from typing import Tuple, Optional
from typing import Tuple, Optional, Callable
import numpy as np
from scipy.stats import chi2
from scipy.optimize import brentq
from iminuit import Minuit
from iminuit.util import make_func_code
from alea.parameters import Parameters
import warnings

class StatisticalModel:
"""
Expand Down Expand Up @@ -42,7 +45,8 @@ class StatisticalModel:
_data = None
_config = {}
_confidence_level = 0.9
_confidence_interval_kind = "upper,lower,unified"
_confidence_interval_kind = "upper,lower,central" (if your threshold is the FC threshold, "central" gives you the unified interval)
_confidence_interval_threshold: function that defines the Neyman threshold for limit calculations
_fit_guess = {}
_fixed_parameters = []
"""
Expand All @@ -68,11 +72,13 @@ def __init__(self,
data = None,
parameter_definition: dict or list = None,
confidence_level: float = 0.9,
confidence_interval_kind:str = "unified",
confidence_interval_kind:str = "central", #one of central, upper, lower
hammannr marked this conversation as resolved.
Show resolved Hide resolved
confidence_interval_threshold: Callable[[float], float] = None,
**kwargs):
self._data = data
self._confidence_level = confidence_level
self._confidence_interval_kind = confidence_interval_kind
self.confidence_interval_threshold = confidence_interval_threshold
self._define_parameters(parameter_definition)

self._check_ll_and_generate_data_signature()
Expand Down Expand Up @@ -113,10 +119,116 @@ def store_data(self, file_name, data_list, data_name_list=None, metadata = None)
kw = dict(metadata = metadata) if metadata is not None else dict()
toydata_to_file(file_name, data_list, data_name_list, **kw)

def _confidence_interval_checks(self, parameter: str,
parameter_interval_bounds: Tuple[float,float],
confidence_level: float,
confidence_interval_kind:str,
**kwargs):
"""
helper function for confidence_interval that does the input checks and returns bounds +
"""
if confidence_level is None:
confidence_level = self._confidence_level
if confidence_interval_kind is None:
confidence_interval_kind = self._confidence_interval_kind

assert (0<confidence_level) & (confidence_level<1), "the confidence level must lie between 0 and 1"
parameter_of_interest = self.parameters[parameter]
assert parameter_of_interest.fittable, "The parameter of interest must be fittable"
assert parameter not in kwargs, "you cannot set the parameter you're constraining"

if parameter_interval_bounds is None:
parameter_interval_bounds = parameter_of_interest.parameter_interval_bounds
if parameter_interval_bounds is None:
raise ValueError("You must set parameter_interval_bounds in the parameter config or when calling confidence_interval")

if parameter_of_interest.type == "rate":
try:
mu_parameter = self.get_expectations(**kwargs)[parameter_of_interest.name]
parameter_interval_bounds = (parameter_interval_bounds[0]/mu_parameter, parameter_interval_bounds[1]/mu_parameter)
except NotImplementedError:
warnings.warn("The statistical model does not have a get_expectations model implemented, \
confidence interval bounds will be set directly.")
pass #no problem, continuing with bounds as set

#define threshold if none is defined:
if self.confidence_interval_threshold is not None:
confidence_interval_threshold = self.confidence_interval_threshold
else:
#use asymptotic thresholds assuming the test statistic is Chi2 distributed
if confidence_interval_kind in ["lower","upper"]:
critical_value = chi2(1).isf(2*(1. - confidence_level))
elif confidence_interval_kind == "central":
critical_value = chi2(1).isf(1. - confidence_level)

confidence_interval_threshold = lambda x: critical_value
kdund marked this conversation as resolved.
Show resolved Hide resolved

return confidence_interval_kind, confidence_interval_threshold, parameter_interval_bounds


def confidence_interval(self, parameter: str,
parameter_interval_bounds: Tuple[float,float] = None,
confidence_level: float=None,
confidence_interval_kind:str = None,
**kwargs) -> Tuple[float, float]:
"""
Uses self.fit to compute confidence intervals for a certain named parameter

parameter: string, name of fittable parameter of the model
parameter_interval_bounds: range in which to search for the confidence interval edges. May be specified as:
- setting the property "parameter_interval_bounds" for the parameter
- passing a list here
If the parameter is a rate parameter, and the model has expectation values implemented,
the bounds will be interpreted as bounds on the expectation value (so that the range in the fit is parameter_interval_bounds/mus)
otherwise the bound is taken as-is.


"""

ci_objects = self._confidence_interval_checks(parameter,
parameter_interval_bounds,
confidence_level,
confidence_interval_kind,
**kwargs)
confidence_interval_kind, confidence_interval_threshold, parameter_interval_bounds = ci_objects


#find best-fit:
best_result, best_ll = self.fit(**kwargs)
best_parameter = best_result[parameter]
assert (parameter_interval_bounds[0] < best_parameter) & (best_parameter<parameter_interval_bounds[1]), "the best-fit is outside your confidence interval search limits in parameter_interval_bounds"
#log-likelihood - critical value:

#define intersection between likelihood ratio curve and the critical curve:
def t(hypothesis): #define the intersection between the profile-log-likelihood curve and the rejection threshold
kwargs[parameter] = hypothesis
_, ll = self.fit( **kwargs) # ll is + log-likelihood here
ret = 2. * (best_ll - ll) #likelihood curve "right way up" (smiling)
return ret - confidence_interval_threshold(hypothesis) #if positive, hypothesis is excluded

if confidence_interval_kind in ["upper","central"]:
if 0<t(parameter_interval_bounds[1]):
ul = brentq(t, best_parameter, parameter_interval_bounds[1])
else:
ul = np.inf
else:
ul = np.nan

if confidence_interval_kind in ["lower","central"]:
if 0<t(parameter_interval_bounds[0]):
dl = brentq(t, parameter_interval_bounds[0], best_parameter)
else:
dl = -1*np.inf
else:
dl = np.nan

return dl, ul






def get_confidence_interval(self) -> Tuple[float, float]:
return NotImplementedError("todo")
def get_expectations(self):
return NotImplementedError("get_expectation is optional to implement")

Expand Down Expand Up @@ -164,7 +276,12 @@ def cost(args):

return cost

def fit(self, verbose=False, **kwargs):
def fit(self, verbose=False, **kwargs)-> Tuple[dict,float]:
"""
Fit the model to the data by maximizing the likelihood
returns a dict containing best-fit values of each parameter, and the value of the likelihood evaluated there.
While the optimization is a minimization, the likelihood returned is the _maximum_ of the likelihood.
"""
fixed_parameters = list(kwargs.keys())
guesses = self.parameters.fit_guesses
guesses.update(kwargs)
Expand Down Expand Up @@ -204,7 +321,7 @@ def __call__(self, *args):
m.migrad()
if verbose:
print(m)
return m.values.to_dict(), -1 * m.fval
return m.values.to_dict(), -1 * m.fval # alert! This gives the _maximum_ likelihood

def _check_ll_and_generate_data_signature(self):
ll_params = set(inspect.signature(self._ll).parameters)
Expand Down