Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First runner manipulating statistical model #47

Merged
merged 12 commits into from
Jul 21, 2023
9 changes: 1 addition & 8 deletions alea/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
import os

_ROOT = os.path.abspath(os.path.dirname(__file__))

from alea import likelihoods

from alea import plotting
from alea import simulators
from alea import template_source
from alea import toymc_running
from alea import utils


def get_data_path(data_path):
return os.path.join(_ROOT, "data", data_path)
from alea import runner
80 changes: 51 additions & 29 deletions alea/blueice_extended_model.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from pydoc import locate # to lookup likelihood class
from typing import List

from alea.statistical_model import StatisticalModel
from alea.simulators import BlueiceDataGenerator
from alea.utils import adapt_likelihood_config_for_blueice
from alea.parameters import Parameters
import yaml
import numpy as np
import scipy.stats as stats
from blueice.likelihood import LogAncillaryLikelihood
from blueice.likelihood import LogLikelihoodSum

from alea.statistical_model import StatisticalModel
from alea.simulators import BlueiceDataGenerator
from alea.utils import adapt_likelihood_config_for_blueice
from alea.parameters import Parameters


class BlueiceExtendedModel(StatisticalModel):
"""
Expand All @@ -37,7 +38,8 @@ def __init__(self, parameter_definition: dict, likelihood_config: dict):
self.likelihood_names.append("ancillary_likelihood")
self.data_generators = self._build_data_generators()

# TODO analysis_space could be inferred from the data (assert that all sources have the same analysis space)
# TODO analysis_space could be inferred from the data
# (assert that all sources have the same analysis space)

@classmethod
def from_config(cls, config_file_path: str) -> "BlueiceExtendedModel":
Expand All @@ -62,7 +64,8 @@ def data(self) -> list:
def data(self, data: list):
"""
Overrides default setter. Will also set the data of the blueice ll.
Data-sets are expected to be in the form of a list of one or more structured arrays-- representing the data-sets of one or more likelihood terms.
Data-sets are expected to be in the form of a list of one
or more structured arrays-- representing the data-sets of one or more likelihood terms.
"""
# iterate through all likelihood terms and set the science data in the blueice ll
# last entry in data are the generate_values
Expand All @@ -77,7 +80,8 @@ def get_expectation_values(self, **kwargs) -> dict:
given a number of named parameters (kwargs)
"""
ret = dict()
for ll in self._likelihood.likelihood_list[:-1]: # ancillary likelihood does not contribute
# ancillary likelihood does not contribute
for ll in self._likelihood.likelihood_list[:-1]:

ll_pars = list(ll.rate_parameters.keys()) + list(ll.shape_parameters.keys())
ll_pars += ["livetime_days"]
Expand Down Expand Up @@ -117,8 +121,9 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":

# add all parameters to extra_dont_hash for each source unless it is used:
for i, source in enumerate(config["sources"]):
parameters_to_ignore: List[str] = [p.name for p in self.parameters if (p.ptype == "shape")
& (p.name not in source["parameters"])]
parameters_to_ignore: List[str] = [
p.name for p in self.parameters if (
dachengx marked this conversation as resolved.
Show resolved Hide resolved
dachengx marked this conversation as resolved.
Show resolved Hide resolved
p.ptype == "shape") & (p.name not in source["parameters"])]
dachengx marked this conversation as resolved.
Show resolved Hide resolved
dachengx marked this conversation as resolved.
Show resolved Hide resolved
# no efficiency affects PDF:
parameters_to_ignore += [p.name for p in self.parameters if (p.ptype == "efficiency")]
parameters_to_ignore += source.get("extra_dont_hash_settings", [])
Expand All @@ -142,7 +147,8 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
ll.add_rate_parameter(rate_parameter, log_prior=None)
else:
raise NotImplementedError(
"Only rate multipliers that end on _rate_multiplier are currently supported.")
"Only rate multipliers that end on _rate_multiplier"
" are currently supported.")

# Set shape parameters
shape_parameters = [
Expand All @@ -167,7 +173,8 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
def _build_data_generators(self) -> list:
# last one is AncillaryLikelihood
# IDEA: Also implement data generator for ancillary ll term.
return [BlueiceDataGenerator(ll_term) for ll_term in self._likelihood.likelihood_list[:-1]]
return [
BlueiceDataGenerator(ll_term) for ll_term in self._likelihood.likelihood_list[:-1]]

def _ll(self, **generate_values) -> float:
return self._likelihood(**generate_values)
Expand All @@ -183,8 +190,8 @@ def _generate_data(self, **generate_values) -> list:
return science_data + [ancillary_measurements] + [generate_values]

def _generate_science_data(self, **generate_values) -> list:
science_data = [gen.simulate(**generate_values)
for gen in self.data_generators]
science_data = [
gen.simulate(**generate_values) for gen in self.data_generators]
return science_data

def _generate_ancillary_measurements(self, **generate_values) -> dict:
Expand All @@ -205,18 +212,28 @@ def _generate_ancillary_measurements(self, **generate_values) -> dict:
return ancillary_measurements

def _set_efficiency(self, source, ll):
assert "efficiency_name" in source, "Unspecified efficiency_name for source {:s}".format(source["name"])
if "efficiency_name" not in source:
raise ValueError(f"Unspecified efficiency_name for source {source['name']:s}")
efficiency_name = source["efficiency_name"]
assert efficiency_name in source[
"parameters"], "The efficiency_name for source {:s} is not in its parameter list".format(source["name"])

if efficiency_name not in source["parameters"]:
raise ValueError(
f"The efficiency_name for source {source['name']:s}"
" is not in its parameter list")
efficiency_parameter = self.parameters[efficiency_name]
assert efficiency_parameter.ptype == "efficiency", "The parameter {:s} must" \
" be an efficiency".format(efficiency_name)

if efficiency_parameter.ptype != "efficiency":
raise ValueError(f"The parameter {efficiency_name:s} must be an efficiency")
limits = efficiency_parameter.fit_limits
assert 0 <= limits[0], 'Efficiency parameters including {:s} must be' \
' constrained to be nonnegative'.format(efficiency_name)
assert np.isfinite(limits[1]), 'Efficiency parameters including {:s} must be' \
' constrained to be finite'.format(efficiency_name)

if limits[0] < 0:
raise ValueError(
f"Efficiency parameters including {efficiency_name:s}"
" must be constrained to be nonnegative")
if ~np.isfinite(limits[1]):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"Efficiency parameters including {efficiency_name:s}"
" must be constrained to be finite")
ll.add_shape_parameter(efficiency_name, anchors=(limits[0], limits[1]))


Expand All @@ -239,13 +256,16 @@ def __init__(self, parameters: Parameters):
"""
self.parameters = parameters
# check that there are no None values in the uncertainties dict
assert set(self.parameters.uncertainties.keys()) == set(self.parameters.names)
if set(self.parameters.uncertainties.keys()) != set(self.parameters.names):
raise ValueError(
"The uncertainties dict must contain all parameters as keys.")
parameter_list = self.parameters.names

self.constraint_functions = self._get_constraint_functions()
super().__init__(func=self.ancillary_likelihood_sum,
parameter_list=parameter_list,
config=self.parameters.nominal_values)
super().__init__(
func=self.ancillary_likelihood_sum,
parameter_list=parameter_list,
config=self.parameters.nominal_values)

@property
def constraint_terms(self) -> dict:
Expand All @@ -263,7 +283,9 @@ def set_data(self, d: dict):
d (dict): Data in this case is a dict of ancillary measurements.
"""
# This results in shifted constraint terms.
assert set(d.keys()) == set(self.parameters.names)
if set(d.keys()) != set(self.parameters.names):
raise ValueError(
"The data dict must contain all parameters as keys in CustomAncillaryLikelihood.")
self.constraint_functions = self._get_constraint_functions(**d)

def ancillary_likelihood_sum(self, evaluate_at: dict) -> float:
Expand All @@ -288,8 +310,8 @@ def _get_constraint_functions(self, **generate_values) -> dict:
if param.relative_uncertainty:
uncertainty *= param.nominal_value
if isinstance(uncertainty, float):
func = stats.norm(central_values[name],
uncertainty)
func = stats.norm(
central_values[name], uncertainty)
else:
# TODO: Implement str-type uncertainties
NotImplementedError(
Expand Down
7 changes: 5 additions & 2 deletions alea/examples/gaussian_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@


class GaussianModel(StatisticalModel):
def __init__(self, parameter_definition: Optional[dict or list] = None,
**kwargs):
def __init__(
self,
parameter_definition: Optional[dict or list] = None,
**kwargs,
):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""
Initialise a model of a gaussian measurement (hatmu),
where the model has parameters mu and sigma
Expand Down
11 changes: 7 additions & 4 deletions alea/examples/unbinned_wimp_running.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ computation:
discovery_power:
parameters_to_zip: {}
parameters_to_vary:
{poi_expectation: "np.linspace(0, 30, 10)", wimp_mass: [10, 50, 200] }
{
poi_expectation: "np.linspace(0, 30, 10)",
wimp_mass: [10, 50, 200],
}
parameters_in_common:
{
hypotheses: ["true", "null", "free"],
Expand All @@ -19,7 +22,7 @@ computation:

threshold:
parameters_to_zip: {}
parameters_to_vary: { wimp_mass: [10, 50, 200] }
parameters_to_vary: {wimp_mass: [10, 50, 200]}
parameters_in_common:
{
hypotheses: ["true", "null", "free"],
Expand All @@ -33,7 +36,7 @@ computation:

sensitivity:
parameters_to_zip: {}
parameters_to_vary: { poi_expectation: [0.], wimp_mass: [10, 50, 200] }
parameters_to_vary: {poi_expectation: [0.], wimp_mass: [10, 50, 200]}
parameters_in_common:
{
hypotheses: ["true", "null", "free"],
Expand All @@ -51,4 +54,4 @@ OSG_parameters:
request_memory: "8000Mb"
singularity_container: null

outputfolder: null
outputfolder: null
2 changes: 1 addition & 1 deletion alea/examples/unbinned_wimp_statistical_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,4 @@ likelihood_config:
- signal_efficiency
template_filename: wimp50gev_template.h5
apply_efficiency: True
efficiency_name: signal_efficiency
efficiency_name: signal_efficiency
Loading