Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add first tests module and file indexing system #54

Merged
merged 24 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a259c9a
Add tests of blueice_extended_model
dachengx Jul 22, 2023
905b09f
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 22, 2023
9fc8e43
Add docstrings
dachengx Jul 22, 2023
ef1d713
Add iminuit
dachengx Jul 22, 2023
7feabb4
Set data before get expectation values
dachengx Jul 22, 2023
863ef67
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 25, 2023
33fe8ab
Change import path
dachengx Jul 25, 2023
ecd10ee
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 25, 2023
59a0122
Add a fucntion get_file_path to get file path
dachengx Jul 25, 2023
ca81ac2
Make Parameters deepcopyable
dachengx Jul 25, 2023
82e858d
Happier code style
dachengx Jul 25, 2023
166033e
Move template_folder_list outside the loop
dachengx Jul 25, 2023
e769327
Raise error when initialize StatisticalModel directly
dachengx Jul 25, 2023
3c27ec7
Do not need to be too cautious because 166033e9a3157d40bc41f009d2e919…
dachengx Jul 25, 2023
d3a3723
Merge remote-tracking branch 'origin/main' into init_unittest
dachengx Jul 25, 2023
a1654dc
This is why we want to accelerate the unittest
dachengx Jul 25, 2023
37f00ff
Directly call ll of model
dachengx Jul 25, 2023
5273334
some more fitting tests
kdund Jul 25, 2023
264e663
rd drudgery
kdund Jul 25, 2023
59a3902
Merge pull request #59 from XENONnT/knuttest
dachengx Jul 25, 2023
258a15b
Use setUp instead of __init__ of TestCase
dachengx Jul 25, 2023
7da8d53
Remove url_base, add get_template_folder_list function
dachengx Jul 26, 2023
0250340
Add TODO comment on get_expectation_values
dachengx Jul 26, 2023
34d1c44
Minor change
dachengx Jul 26, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion alea/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from scipy.optimize import brentq
from iminuit import Minuit
from iminuit.util import make_func_code
from blueice.likelihood import _needs_data
dachengx marked this conversation as resolved.
Show resolved Hide resolved
from inference_interface import toydata_to_file

from alea.parameters import Parameters
Expand Down Expand Up @@ -58,7 +59,10 @@ def __init__(
confidence_interval_kind: str = "central", # one of central, upper, lower
confidence_interval_threshold: Callable[[float], float] = None,
):
self._data = data
# following https://github.com/JelleAalbers/blueice/blob/7c10222a13227e78dc7224b1a7e56ff91e4a8043/blueice/likelihood.py#L97
dachengx marked this conversation as resolved.
Show resolved Hide resolved
self.is_data_set = False
kdund marked this conversation as resolved.
Show resolved Hide resolved
if data is not None:
self.data = data
self._confidence_level = confidence_level
self._confidence_interval_kind = confidence_interval_kind
self.confidence_interval_threshold = confidence_interval_threshold
Expand Down Expand Up @@ -93,6 +97,7 @@ def _generate_data(self, **kwargs):
"You must write a data-generation method (_generate_data) for your statistical model"
" or use a subclass where it is written for you")

@_needs_data
def ll(self, **kwargs) -> float:
"""
Likelihod function, returns the loglikelihood for the given parameters.
Expand Down Expand Up @@ -143,6 +148,7 @@ def data(self, data):
representing the data-sets of one or more likelihood terms.
"""
self._data = data
self.is_data_set = True

def store_data(
self, file_name, data_list, data_name_list=None, metadata = None):
Expand Down Expand Up @@ -205,6 +211,7 @@ def cost(args):

return cost

@_needs_data
def fit(self, verbose=False, **kwargs) -> Tuple[dict, float]:
"""
Fit the model to the data by maximizing the likelihood
Expand Down
2 changes: 1 addition & 1 deletion alea/model_configs/unbinned_wimp_statistical_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ parameter_definition:

likelihood_config:
likelihood_weights: [1, 1, 1]
template_folder: alea/templates
template_folder: []
likelihood_terms:
# SR0
- name: sr0
Expand Down
26 changes: 18 additions & 8 deletions alea/models/blueice_extended_model.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from pydoc import locate # to lookup likelihood class
from typing import List
from copy import deepcopy
from pydoc import locate

import yaml
import numpy as np
import scipy.stats as stats
from blueice.likelihood import LogAncillaryLikelihood, LogLikelihoodSum

from alea.model import StatisticalModel
from alea.parameters import Parameters
from alea.simulators import BlueiceDataGenerator
from alea.utils import adapt_likelihood_config_for_blueice
from alea.parameters import Parameters


class BlueiceExtendedModel(StatisticalModel):
Expand All @@ -32,7 +33,9 @@ def __init__(self, parameter_definition: dict, likelihood_config: dict):
likelihood_config (dict): A dictionary defining the likelihood.
"""
super().__init__(parameter_definition=parameter_definition)
self._likelihood = self._build_ll_from_config(likelihood_config)
# deepcopy likelihood_config to prevent it to be
# changed by adapt_likelihood_config_for_blueice
self._likelihood = self._build_ll_from_config(deepcopy(likelihood_config))
self.likelihood_names = [t["name"] for t in likelihood_config["likelihood_terms"]]
self.likelihood_names.append("ancillary_likelihood")
self.data_generators = self._build_data_generators()
Expand Down Expand Up @@ -72,22 +75,27 @@ def data(self, data: list):
ll_term.set_data(d)

self._data = data
self.is_data_set = True

def get_expectation_values(self, **kwargs) -> dict:
"""
Return total expectation values (summed over all likelihood terms with the same name)
given a number of named parameters (kwargs)
"""
ret = dict()
# ancillary likelihood does not contribute
for ll in self._likelihood.likelihood_list[:-1]:

ll_pars = list(ll.rate_parameters.keys()) + list(ll.shape_parameters.keys())
# calling ll need data to be set
self_copy = deepcopy(self)
self_copy.data = self_copy.generate_data()

# ancillary likelihood does not contribute
for ll_term in self_copy._likelihood.likelihood_list[:-1]:
ll_pars = list(ll_term.rate_parameters.keys()) + list(ll_term.shape_parameters.keys())
dachengx marked this conversation as resolved.
Show resolved Hide resolved
kdund marked this conversation as resolved.
Show resolved Hide resolved
ll_pars += ["livetime_days"]
call_args = {k: i for k, i in kwargs.items() if k in ll_pars}

mus = ll(full_output=True, **call_args)[1]
for n, mu in zip(ll.source_name_list, mus):
mus = ll_term(full_output=True, **call_args)[1]
for n, mu in zip(ll_term.source_name_list, mus):
ret[n] = ret.get(n, 0) + mu
return ret

Expand All @@ -103,6 +111,8 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
# Iterate through each likelihood term in the configuration
for config in likelihood_config["likelihood_terms"]:
likelihood_object = locate(config["likelihood_type"])
if "template_folder" not in likelihood_config:
likelihood_config["template_folder"] = []
dachengx marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(likelihood_config["template_folder"], str):
template_folder_list = [likelihood_config["template_folder"]]
elif isinstance(likelihood_config["template_folder"], list):
Expand Down
11 changes: 6 additions & 5 deletions alea/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,9 @@ def __call__(
values[name] = new_val if new_val is not None else param.nominal_value
if any(i is None for k, i in values.items()):
emptypars = ", ".join([k for k, i in values.items() if i is None])
raise AssertionError("All parameters must be set explicitly, or have a nominal value,"
" encountered for: " + emptypars)
raise AssertionError(
"All parameters must be set explicitly, or have a nominal value,"
" encountered for: " + emptypars)
return values

def __getattr__(self, name: str) -> Parameter:
Expand All @@ -291,9 +292,9 @@ def __getattr__(self, name: str) -> Parameter:
Raises:
AttributeError: If the attribute is not found.
"""
if name in self.parameters:
return self.parameters[name]
else:
try:
return super().__getattribute__('parameters')[name]
dachengx marked this conversation as resolved.
Show resolved Hide resolved
except KeyError:
hammannr marked this conversation as resolved.
Show resolved Hide resolved
raise AttributeError(f"Attribute '{name}' not found.")

def __getitem__(self, name: str) -> Parameter:
Expand Down
82 changes: 61 additions & 21 deletions alea/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import yaml
import pkg_resources
from pydoc import locate
from warnings import warn

import numpy as np
import alea


def get_analysis_space(analysis_space: dict) -> list:
Expand Down Expand Up @@ -31,29 +33,12 @@ def adapt_likelihood_config_for_blueice(

Args:
likelihood_config (dict): likelihood config dict
template_folder_list (list): list of possible base folders where
templates are located. If a folder starts with alea/,
the alea folder is used as base.
template_folder_list (list): list of possible base folders.
dachengx marked this conversation as resolved.
Show resolved Hide resolved
Ordered by priority.

Returns:
dict: adapted likelihood config
"""
template_folder = None
for template_folder in template_folder_list:
# if template folder starts with alea: get location of alea
if template_folder.startswith("alea/"):
alea_dir = os.path.dirname(os.path.abspath(alea.__file__))
template_folder = os.path.join(alea_dir, template_folder.replace("alea/", ""))
# check if template folder exists
if not os.path.isdir(template_folder):
template_folder = None
else:
break

# raise error if no template folder is found
if template_folder is None:
raise FileNotFoundError("No template folder found. Please provide a valid template folder.")

likelihood_config["analysis_space"] = get_analysis_space(
likelihood_config["analysis_space"])
Expand All @@ -62,6 +47,61 @@ def adapt_likelihood_config_for_blueice(
likelihood_config["default_source_class"])

for source in likelihood_config["sources"]:
source["templatename"] = os.path.join(
template_folder, source["template_filename"])
source["templatename"] = get_file_path(
source["template_filename"], template_folder_list)
return likelihood_config


def load_yaml(file_name: str):
"""Load data from yaml file."""
with open(get_file_path(file_name), 'r') as file:
data = yaml.safe_load(file)
return data


def _get_abspath(file_name):
"""Get the abspath of the file. Raise FileNotFoundError when not found in any subfolder"""
for sub_dir in ('model_configs', 'runner_configs', 'templates'):
p = os.path.join(_package_path(sub_dir), file_name)
if os.path.exists(p):
return p
raise FileNotFoundError(f'Cannot find {file_name}')


def _package_path(sub_directory):
"""Get the abs path of the requested sub folder"""
return pkg_resources.resource_filename('alea', f'{sub_directory}')


def get_file_path(fname, folder_list=[]):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Find the full path to the resource file
Try 5 methods in the following order

#. fname begin with '/', return absolute path
#. url_base begin with '/', return url_base + name
#. can get file from _get_abspath, return alea internal file path
#. can be found in local installed ntauxfiles, return ntauxfiles absolute path
#. can be downloaded from MongoDB, download and return cached path
"""
# 1. From absolute path
# Usually Config.default is a absolute path
if fname.startswith('/'):
return fname

# 2. From local folder
# Use url_base as prefix
kdund marked this conversation as resolved.
Show resolved Hide resolved
for folder in folder_list:
if folder.startswith('/'):
fpath = os.path.join(folder, fname)
if os.path.exists(fpath):
warn(f'Load {fname} successfully from {fpath}')
kdund marked this conversation as resolved.
Show resolved Hide resolved
return fpath

# 3. From alea internal files
try:
return _get_abspath(fname)
except FileNotFoundError:
pass

# raise error when can not find corresponding file
raise RuntimeError(f'Can not find {fname}, please check your file system')
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
atomicwrites
git+https://github.com/JelleAalbers/blueice
h5py
iminuit
git+https://github.com/XENONnT/inference_interface
matplotlib
mergedeep
Expand Down
71 changes: 71 additions & 0 deletions tests/test_blueice_extended_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from unittest import TestCase

from blueice.likelihood import LogLikelihoodSum
from alea.utils import load_yaml
from alea.models import BlueiceExtendedModel, CustomAncillaryLikelihood


class TestBlueiceExtendedModel(TestCase):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the BlueiceExtendedModel class"""

def __init__(self, *args, **kwargs):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Initialize the BlueiceExtendedModel class"""
super().__init__(*args, **kwargs)
self.config = load_yaml('unbinned_wimp_statistical_model.yaml')
self.n_likelihood_terms = len(self.config['likelihood_config']['likelihood_terms'])
self.set_new_model()

def set_new_model(self):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
self.model = BlueiceExtendedModel(
parameter_definition=self.config['parameter_definition'],
likelihood_config=self.config['likelihood_config'],
)

def test_expectation_values(self):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the expectation_values method"""
self.set_new_model()
expectation_values = self.model.get_expectation_values()
dachengx marked this conversation as resolved.
Show resolved Hide resolved

# should avoid accidentally set data
is_data_set = False
for ll_term in self.model._likelihood.likelihood_list[:-1]:
is_data_set |= ll_term.is_data_set
if is_data_set:
raise ValueError('Data should not be set after get_expectation_values.')

# TODO: assert expectation values after test template source
# self.assertEqual()
dachengx marked this conversation as resolved.
Show resolved Hide resolved

def test_generate_data(self):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the generate_data method"""
data = self.model.generate_data()
self.assertEqual(
len(data), self.n_likelihood_terms + 2)
if not all(['source' in d.dtype.names for d in data[:-2]]):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
dachengx marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError('Data does not contain source information.')

def test_likelihood(self):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the _likelihood attribute"""
self.assertIsInstance(self.model._likelihood, LogLikelihoodSum)
self.assertIsInstance(self.model._likelihood.likelihood_list[-1], CustomAncillaryLikelihood)
self.assertEqual(
len(self.model._likelihood.likelihood_list),
self.n_likelihood_terms + 1)
self.model.data = self.model.generate_data()
self.model._likelihood()
dachengx marked this conversation as resolved.
Show resolved Hide resolved

def test_fit(self):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the fit method"""
self.model.data = self.model.generate_data()
fit_result, max_llh = self.model.fit()
# TODO: check whether all parameters are in fit_result
# and whether fittable parameters are fitted


class TestCustomAncillaryLikelihood(TestCase):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the CustomAncillaryLikelihood class"""
dachengx marked this conversation as resolved.
Show resolved Hide resolved

def test_ancillary_likelihood(self):
dachengx marked this conversation as resolved.
Show resolved Hide resolved
"""Test of the ancillary_likelihood method"""
# TODO:
pass
33 changes: 33 additions & 0 deletions tests/test_gaussian_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from unittest import TestCase

from alea.models import GaussianModel


class TestGaussianModel(TestCase):
"""Test of the Parameters class"""

def test_gaussian_model(self):
"""Test of generate_data and fit method of the GaussianModel class"""
parameter_definition = {
'mu': {
'fit_guess': 0.,
'fittable': True,
'nominal_value': 0.,
},
'sigma': {
'fit_guess': 1.,
'fit_limits': [
0.,
None,
],
'fittable': True,
'nominal_value': 1.,
}
dachengx marked this conversation as resolved.
Show resolved Hide resolved
}
simple_model = GaussianModel(
parameter_definition=parameter_definition)
simple_model.data = simple_model.generate_data(mu=0, sigma=2)
fit_result, max_llh = simple_model.fit()

toydata_file = 'simple_data.hdf5'
simple_model.store_data(toydata_file, [simple_model.data])
20 changes: 20 additions & 0 deletions tests/test_parameter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from copy import deepcopy
from unittest import TestCase

from alea.utils import load_yaml
from alea.parameters import Parameters
dachengx marked this conversation as resolved.
Show resolved Hide resolved

dachengx marked this conversation as resolved.
Show resolved Hide resolved

class TestParameters(TestCase):
"""Test of the Parameters class"""

def __init__(self, *args, **kwargs):
"""Initialize the BlueiceExtendedModel class"""
super().__init__(*args, **kwargs)
self.config = load_yaml('unbinned_wimp_statistical_model.yaml')
self.parameters = Parameters.from_config(self.config['parameter_definition'])

def test_deep_copyable(self):
"""Test of whether Parameters instance can be deepcopied"""
if deepcopy(self.parameters) != self.parameters:
raise ValueError('Parameters instance cannot be correctly deepcopied.')
2 changes: 0 additions & 2 deletions tests/test_statistical_model.py

This file was deleted.

Loading