XENONnT · hammannr · Jul 17, 2023 · Jun 22, 2023 · Jun 22, 2023 · Jun 22, 2023
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,4 @@ development_scripts/*hdf
 __pycache__/
 *ipynb
 !examples/*.ipynb
+debug.py
diff --git a/alea/blueice_extended_model.py b/alea/blueice_extended_model.py
@@ -0,0 +1,178 @@
+from pydoc import locate  # to lookup likelihood class
+from alea.statistical_model import StatisticalModel
+from alea.simulators import BlueiceDataGenerator
+from alea.utils import adapt_likelihood_config_for_blueice
+import yaml
+import numpy as np
+import scipy.stats as stats
+from blueice.likelihood import LogAncillaryLikelihood
+from blueice.likelihood import LogLikelihoodSum
+# from inference_interface import dict_to_structured_array
+
+
+class BlueiceExtendedModel(StatisticalModel):
+    def __init__(self, parameter_definition: dict, likelihood_terms: dict):
+        """
+        # TODO write docstring
+        """
+        super().__init__(parameter_definition=parameter_definition)
+        self._likelihood = self._build_ll_from_config(likelihood_terms)
+        self.likelihood_names = [c["name"] for c in likelihood_terms]
+        self.likelihood_names.append("ancillary_likelihood")
+        self.data_generators = self._build_data_generators()
+
+        # TODO analysis_space should be inferred from the data (assert that all sources have the same analysis space)
+
+    @classmethod
+    def from_config(cls, config_file):
+        with open(config_file, "r") as f:
+            config = yaml.safe_load(f)
+        return cls(**config)
+
+    def _ll(self, **generate_values):
+        # TODO: Does this make sense?
+        return self._likelihood(**generate_values)
+
+    def _generate_data(self, **generate_values):
+        # generate_values are already filtered and filled by the nominal values through the generate_data method in the parent class
+        science_data = self._generate_science_data(**generate_values)
+        ancillary_keys = self.parameters.with_uncertainty.names
+        generate_values_anc = {k: v for k, v in generate_values.items() if k in ancillary_keys}
+        ancillary_measurements = self._generate_ancillary_measurements(
+            **generate_values_anc)
+        # generate_values = dict_to_structured_array(generate_values)
+        return science_data + [ancillary_measurements] + [generate_values]
+
+    def _generate_science_data(self, **generate_values):
+        science_data = [gen.simulate(**generate_values)
+                        for gen in self.data_generators]
+        return science_data
+
+    def _generate_ancillary_measurements(self, **generate_values):
+        ancillary_measurements = {}
+        anc_ll = self._likelihood.likelihood_list[-1]
+        ancillary_generators = anc_ll._get_constraint_functions(**generate_values)
+        for name, gen in ancillary_generators.items():
+            parameter_meas = gen.rvs()
+            # correct parameter_meas if out of bounds
+            param = self.parameters[name]
+            if not param.value_in_fit_limits(parameter_meas):
+                if param.fit_limits[0] is not None and parameter_meas < param.fit_limits[0]:
+                    parameter_meas = param.fit_limits[0]
+                elif param.fit_limits[1] is not None and parameter_meas > param.fit_limits[1]:
+                    parameter_meas = param.fit_limits[1]
+            ancillary_measurements[name] = parameter_meas
+        # TODO: Do we need this as a structured array?
+        # ancillary_measurements = dict_to_structured_array(ancillary_measurements)
+
+        return ancillary_measurements
+
+    # TODO: Override uncertainty setter to also set the uncertainty of the ancillary ll term (func_args). Or for now override the uncertainty setter to not work and raise a warning.
+
+    @property
+    def data(self):
+        return super().data
+
+    @data.setter
+    def data(self, data):
+        """
+        Overrides default setter. Will also set the data of the blueice ll.
+        Data-sets are expected to be in the form of a list of one or more structured arrays-- representing the data-sets of one or more likelihood terms.
+        """
+        # iterate through all likelihood terms and set the science data in the blueice ll
+        # last entry in data are the generate_values
+        for d, ll_term in zip(data[:-1], self._likelihood.likelihood_list):
+            ll_term.set_data(d)
+
+        self._data = data
+
+    @property
+    def nominal_expectation_values(self):
+        # TODO
+        # IDEA also enable a setter that changes the rate parameters?
+        pass
+
+    def get_expectation_values(self, **kwargs):
+        # TODO
+        pass
+
+    def _build_ll_from_config(self, likelihood_terms):
+        # iterate through ll_config and build blueice ll
+        lls = []
+        for config in likelihood_terms:
+            likelihood_object = locate(config["likelihood_type"])
+            blueice_config = adapt_likelihood_config_for_blueice(config)
+            blueice_config["livetime_days"] = self.parameters[
+                blueice_config["livetime_parameter"]].nominal_value
+            ll = likelihood_object(blueice_config)
+            # Set rate parameters
+            for source in config["sources"]:
+                for param_name in source["parameters"]:
+                    if self.parameters[param_name].type == "rate":
+                        # TODO: Check that only one rate per source is set?
+                        if param_name.endswith("_rate_multiplier"):
+                            param_name = param_name.replace("_rate_multiplier", "")
+                            ll.add_rate_parameter(param_name, log_prior=None)
+                        else:
+                            NotImplementedError
+            # TODO: Set shape parameters
+
+            ll.prepare()
+            lls.append(ll)
+        # Ancillary likelihood
+        ll = CustomAncillaryLikelihood(self.parameters.with_uncertainty)
+        lls.append(ll)
+
+        # TODO: Include likelihood_weights
+        return LogLikelihoodSum(lls, likelihood_weights=None)
+
+    def _build_data_generators(self):
+        # last one is AncillaryLikelihood
+        # IDEA: Also implement data generator for ancillary ll term.
+        return [BlueiceDataGenerator(ll_term) for ll_term in self._likelihood.likelihood_list[:-1]]
+
+# Build wrapper to conveniently define a constraint likelihood term
+
+
+class CustomAncillaryLikelihood(LogAncillaryLikelihood):
+# TODO: Make sure the functions and terms are properly implemented now.
+    def __init__(self, parameters):
+        self.parameters = parameters
+        # check that there are no None values in the uncertainties dict
+        assert set(self.parameters.uncertainties.keys()) == set(self.parameters.names)
+        parameter_list = self.parameters.names
+
+        self.constraint_functions = self._get_constraint_functions()
+        super().__init__(func=self.ancillary_likelihood_sum,
+                         parameter_list=parameter_list,
+                         config=self.parameters.nominal_values)
+
+    @property
+    def constraint_terms(self):
+        return {name: func.logpdf for name, func in self.constraint_functions.items()}
+
+    def set_data(self, d: dict):
+        # data in this case is a set of ancillary measurements.
+        # This results in shifted constraint terms.
+        assert set(d.keys()) == set(self.parameters.names)
+        self.constraint_functions = self._get_constraint_functions(**d)
+
+    def _get_constraint_functions(self, **generate_values) -> dict:
+        central_values = self.parameters(**generate_values)
+        constraint_functions = {}
+        for name, uncertainty in self.parameters.uncertainties.items():
+            param = self.parameters[name]
+            if param.relative_uncertainty:
+                uncertainty *= param.nominal_value
+            if isinstance(uncertainty, float):
+                func = stats.norm(central_values[name],
+                                  uncertainty)
+            else:
+                # TODO: Implement str-type uncertainties
+                NotImplementedError(
+                    "Only float uncertainties are supported at the moment.")
+            constraint_functions[name] = func
+        return constraint_functions
+
+    def ancillary_likelihood_sum(self, evaluate_at: dict):
+        return np.sum([term(evaluate_at[name]) for name, term in self.constraint_terms.items()])
diff --git a/alea/examples/gaussian_model.py b/alea/examples/gaussian_model.py
@@ -7,16 +7,17 @@
 
 
 class GaussianModel(StatisticalModel):
-    def __init__(self, parameter_definition: Optional[dict or list] = None):
+    def __init__(self, parameter_definition: Optional[dict or list] = None,
+                 **kwargs):
         """
         Initialise a model of a gaussian measurement (hatmu),
         where the model has parameters mu and sigma
         For illustration, we show how required nominal parameters can be added to the init
         sigma is fixed in this example.
         """
         if parameter_definition is None:
-            parameter_definition = ['mu', 'sigma']
-        super().__init__(parameter_definition=parameter_definition)
+            parameter_definition = ["mu", "sigma"]
+        super().__init__(parameter_definition=parameter_definition, **kwargs)
 
     def _ll(self, mu=None, sigma=None):
         hat_mu = self.data[0]['hat_mu'][0]

diff --git a/alea/examples/unbinned_wimp_running.yaml b/alea/examples/unbinned_wimp_running.yaml
@@ -0,0 +1,54 @@
+# Just a placeholder for now to help thinking about the structure.
+statistical_model_config: unbinned_wimp_statistical_model.yaml
+
+poi: wimp_rate_multiplier
+
+computation:
+  discovery_power:
+    parameters_to_zip: {}
+    parameters_to_vary:
+      {poi_expectation: "np.linspace(0, 30, 10)", wimp_mass: [10, 50, 200] }
+    parameters_in_common:
+      {
+        hypotheses: ["true", "null", "free"],
+        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5",
+        n_mc: 5000,
+        n_batch: 40,
+      }
+    toydata_mode: "generate"
+
+  threshold:
+    parameters_to_zip: {}
+    parameters_to_vary: { wimp_mass: [10, 50, 200] }
+    parameters_in_common:
+      {
+        hypotheses: ["true", "null", "free"],
+        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5",
+        n_mc: 5000,
+        n_batch: 40,
+      }
+    limit_threshold: "thresholds.hdf5"
+    toydata_mode: "generate"
+    parameters_as_wildcards: ["poi_expectation", "n_mc", "n_batch"]
+
+  sensitivity:
+    parameters_to_zip: {}
+    parameters_to_vary: { poi_expectation: [0.], wimp_mass: [10, 50, 200] }
+    parameters_in_common:
+      {
+        hypotheses: ["true", "null", "free"],
+        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5",
+        n_mc: 5000,
+        n_batch: 40,
+        compute_confidence_interval: True,
+        limit_threshold: "thresholds.hdf5",
+      }
+    toydata_mode: "generate"
+
+midway_path: null
+OSG_path: null
+OSG_parameters:
+  request_memory: "8000Mb"
+  singularity_container: null
+
+outputfolder: null
diff --git a/alea/examples/unbinned_wimp_statistical_model.yaml b/alea/examples/unbinned_wimp_statistical_model.yaml
@@ -0,0 +1,107 @@
+parameter_definition:
+  wimp_mass:
+    nominal_value: 50
+    fittable: false
+    description: WIMP mass in GeV/c^2
+
+  livetime_0:
+    nominal_value: 0.2
+    fittable: false
+    description: Livetime of SR0 in years
+
+  livetime_1:
+    nominal_value: 1.0
+    fittable: false
+    description: Livetime of SR1 in years
+
+  wimp_rate_multiplier:
+    nominal_value: 1.0
+    ptype: rate
+    fittable: true
+    fit_limits:
+      - 0
+      - null
+
+  er_rate_multiplier:
+    nominal_value: 1.0
+    ptype: rate
+    uncertainty: 0.2
+    relative_uncertainty: true
+    fittable: true
+    fit_limits:
+      - 0
+      - null
+    fit_guess: 1.0
+
+  # er_band_shift:
+  #   nominal_value: 0
+  #   ptype: shape
+  #   uncertainty: scipy.stats.uniform(loc=-1, scale=2)
+  #   relative_uncertainty: false
+  #   fittable: true
+  #   blueice_anchors:
+  #     - -1
+  #     - 0
+  #     - 1
+  #   fit_limits:
+  #     - -1
+  #     - 1
+  #   description: ER band shape parameter (shifts the ER band up and down)
+
+
+likelihood_terms:
+# SR0
+  - name: sr0
+    default_source_class: alea.template_source.TemplateSource
+    likelihood_type: blueice.likelihood.UnbinnedLogLikelihood
+    analysis_space:
+      - "cs1": 'np.arange(0, 102, 2)'
+      - "cs2": 'np.geomspace(100, 100000, 51)'
+    in_events_per_bin: true
+    livetime_parameter: livetime_0
+    slice_args: {}
+    sources:
+    - name: er
+      histname: er_template  # TODO: implement a default histname based on the source name
+      parameters:
+        - er_rate_multiplier
+        # - er_band_shift
+      templatepath: examples/er_template.h5
+      histogram_scale_factor: 1
+
+    - name: wimp
+      histname: wimp_template
+      parameters:
+        - wimp_rate_multiplier
+        - wimp_mass
+      templatepath: examples/wimp50gev_template.h5
+      apply_efficiency: False
+      efficiency_name: 'signal_eff'  # TODO: Check
+
+# SR1
+  - name: sr1
+    default_source_class: alea.template_source.TemplateSource
+    likelihood_type: blueice.likelihood.UnbinnedLogLikelihood
+    analysis_space:
+      - "cs1": 'np.arange(0, 102, 2)'
+      - "cs2": 'np.geomspace(100, 100000, 51)'
+    in_events_per_bin: true
+    livetime_parameter: livetime_1
+    slice_args: {}
+    sources:
+    - name: er
+      histname: er_template
+      parameters:
+        - er_rate_multiplier
+        # - er_band_shift
+      templatepath: examples/er_template.h5
+      histogram_scale_factor: 2
+
+    - name: wimp
+      histname: wimp_template
+      parameters:
+        - wimp_rate_multiplier
+        - wimp_mass
+      templatepath: examples/wimp50gev_template.h5
+      apply_efficiency: False
+      efficiency_name: 'wimp_eff'  # TODO: Check
diff --git a/alea/parameters.py b/alea/parameters.py
@@ -213,6 +213,25 @@ def not_fittable(self) -> List[str]:
         """
         return [name for name, param in self.parameters.items() if not param.fittable]
 
+    @property
+    def uncertainties(self) -> dict:
+        """
+        return a dict of name:uncertainty for all parameters with a not-NaN uncertainty.
+        """
+        return {k: i.uncertainty for k, i in self.parameters.items() if i.uncertainty is not None}
+
+    @property
+    def with_uncertainty(self) -> "Parameters":
+        """
+        Return parameters with a not-NaN uncertainty.
+        The parameters are the same objects as in the original Parameters object, not a copy.
+        """
+        param_dict = {k: i for k, i in self.parameters.items() if i.uncertainty is not None}
+        params = Parameters()
+        for param in param_dict.values():
+            params.add_parameter(param)
+        return params
+
     @property
     def nominal_values(self) -> dict:
         """