diff --git a/alea/__init__.py b/alea/__init__.py
index 9ee77721..3a59b33b 100644
--- a/alea/__init__.py
+++ b/alea/__init__.py
@@ -4,6 +4,8 @@
 
 from .models import *
 
+from .runner import *
+
 from .utils import *
 
 from .parameters import *
diff --git a/alea/examples/configs/unbinned_wimp_running.yaml b/alea/examples/configs/unbinned_wimp_running.yaml
index d8115d52..31b29678 100644
--- a/alea/examples/configs/unbinned_wimp_running.yaml
+++ b/alea/examples/configs/unbinned_wimp_running.yaml
@@ -1,3 +1,4 @@
+statistical_model: alea.models.BlueiceExtendedModel
 statistical_model_config: unbinned_wimp_statistical_model.yaml
 
 poi: wimp_rate_multiplier
@@ -12,8 +13,8 @@ computation:
       }
     parameters_in_common:
       {
-        hypotheses: ["true", "null", "free"],
-        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5",
+        hypotheses: ["free", "null", "true"],
+        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.h5",
         n_mc: 5000,
         n_batch: 40,
       }
@@ -24,12 +25,12 @@ computation:
     parameters_to_vary: {wimp_mass: [10, 50, 200]}
     parameters_in_common:
       {
-        hypotheses: ["true", "null", "free"],
-        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5",
+        hypotheses: ["free", "null", "true"],
+        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.h5",
         n_mc: 5000,
         n_batch: 40,
       }
-    limit_threshold: "thresholds.hdf5"
+    limit_threshold: "thresholds.h5"
     toydata_mode: "generate"
     parameters_as_wildcards: ["poi_expectation", "n_mc", "n_batch"]
 
@@ -38,12 +39,12 @@ computation:
     parameters_to_vary: {poi_expectation: [0.], wimp_mass: [10, 50, 200]}
     parameters_in_common:
       {
-        hypotheses: ["true", "null", "free"],
-        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.hdf5",
+        hypotheses: ["free", "null", "true"],
+        output_filename: "toymc_power_wimp_mass_{wimp_mass:d}_poi_expectation_{poi_expectation:.2f}.h5",
         n_mc: 5000,
         n_batch: 40,
         compute_confidence_interval: True,
-        limit_threshold: "thresholds.hdf5",
+        limit_threshold: "thresholds.h5",
       }
     toydata_mode: "generate"
 
diff --git a/alea/examples/configs/unbinned_wimp_statistical_model.yaml b/alea/examples/configs/unbinned_wimp_statistical_model.yaml
index cd4f3f37..5176345a 100644
--- a/alea/examples/configs/unbinned_wimp_statistical_model.yaml
+++ b/alea/examples/configs/unbinned_wimp_statistical_model.yaml
@@ -21,6 +21,9 @@ parameter_definition:
     fit_limits:
       - 0
       - null
+    parameter_interval_bounds:
+      - 0
+      - null
 
   er_rate_multiplier:
     nominal_value: 1.0
@@ -48,7 +51,7 @@ parameter_definition:
   er_band_shift:
     nominal_value: 0
     ptype: shape
-    uncertainty: null  # scipy.stats.uniform(loc=-1, scale=2)
+    uncertainty: null  # 'scipy.stats.uniform(loc=-1, scale=2)'
     # relative_uncertainty: false
     fittable: true
     blueice_anchors:
diff --git a/alea/model.py b/alea/model.py
index 742cab37..e33a9c47 100644
--- a/alea/model.py
+++ b/alea/model.py
@@ -1,5 +1,6 @@
 import inspect
 import warnings
+from copy import deepcopy
 from typing import Dict, List, Tuple, Callable, Optional
 
 import numpy as np
@@ -10,6 +11,7 @@
 from inference_interface import toydata_to_file
 
 from alea.parameters import Parameters
+from alea.utils import within_limits, clip_limits
 
 
 class StatisticalModel:
@@ -71,6 +73,7 @@ def __init__(
             confidence_level: float = 0.9,
             confidence_interval_kind: str = "central",  # one of central, upper, lower
             confidence_interval_threshold: Callable[[float], float] = None,
+            **kwargs,
         ):
         """Initialize a statistical model"""
         if type(self) == StatisticalModel:
@@ -183,7 +186,8 @@ def store_data(
             data_name_list: Optional[List] = None,
             metadata: Optional[Dict] = None):
         """
-        Store a list of datasets (each on the form of a list of one or more structured arrays)
+        Store a list of datasets.
+        (each on the form of a list of one or more structured arrays or dicts)
         Using inference_interface, but included here to allow over-writing.
         The structure would be: [[datasets1], [datasets2], ..., [datasetsn]],
         where each of datasets is a list of structured arrays.
@@ -198,14 +202,26 @@ def store_data(
             metadata (dict, optional (default=None)): metadata to store with the data.
                 If None, no metadata is stored.
         """
+        if all([isinstance(d, dict) for d in data_list]):
+            _data_list = [list(d.values()) for d in data_list]
+        elif all([isinstance(d, list) for d in data_list]):
+            _data_list = data_list
+        else:
+            raise ValueError(
+                'Unsupported mixed toydata format! '
+                'toydata should be a list of dict or a list of list',)
+
         if data_name_list is None:
             if hasattr(self, "likelihood_names"):
                 data_name_list = self.likelihood_names
             else:
-                data_name_list = ["{:d}".format(i) for i in range(len(data_list[0]))]
+                data_name_list = ["{:d}".format(i) for i in range(len(_data_list[0]))]
 
         kw = {'metadata': metadata} if metadata is not None else dict()
-        toydata_to_file(file_name, data_list, data_name_list, **kw)
+        if len(_data_list[0]) != len(data_name_list):
+            raise ValueError(
+                "The number of data sets and data names must be the same")
+        toydata_to_file(file_name, _data_list, data_name_list, **kw)
 
     def get_expectation_values(self, **parameter_values):
         """
@@ -257,6 +273,7 @@ def cost(args):
             call_kwargs = {}
             for i, k in enumerate(self.parameters.names):
                 call_kwargs[k] = args[i]
+            # for optimization, we want to minimize the negative log-likelihood
             return self.ll(**call_kwargs) * -1
 
         return cost
@@ -338,10 +355,10 @@ def _confidence_interval_checks(
 
         if parameter_interval_bounds is None:
             parameter_interval_bounds = parameter_of_interest.parameter_interval_bounds
-            if parameter_interval_bounds is None:
-                raise ValueError(
-                    "You must set parameter_interval_bounds in the parameter config"
-                    " or when calling confidence_interval")
+        else:
+            value = parameter_interval_bounds
+            parameter_of_interest._check_parameter_interval_bounds(value)
+            parameter_interval_bounds = clip_limits(value)
 
         if parameter_of_interest.ptype == "rate":
             try:
@@ -350,6 +367,7 @@ def _confidence_interval_checks(
                 else:
                     source_name = poi_name
                 mu_parameter = self.get_expectation_values(**kwargs)[source_name]
+                # update parameter_interval_bounds because poi is rate_multiplier
                 parameter_interval_bounds = (
                     parameter_interval_bounds[0] / mu_parameter,
                     parameter_interval_bounds[1] / mu_parameter)
@@ -378,7 +396,9 @@ def confidence_interval(
             parameter_interval_bounds: Tuple[float, float] = None,
             confidence_level: float = None,
             confidence_interval_kind: str = None,
-            **kwargs) -> Tuple[float, float]:
+            best_fit_args: dict = None,
+            confidence_interval_args: dict = None,
+        ) -> Tuple[float, float]:
         """
         Uses self.fit to compute confidence intervals for a certain named parameter.
         If the parameter is a rate parameter, and the model has expectation values implemented,
@@ -401,36 +421,52 @@ def confidence_interval(
                 If None, the default kind of the model is used.
 
         Keyword Args:
-            kwargs: the parameters for get_expectation_values and fit
+            best_fit_args: the parameters to **only** get the global best-fit likelihood
+            confidence_interval_args: the parameters to get the profile-likelihood,
+                also the best-fit parameters of profile-likelihood,
+                parameter_interval_bounds, and confidence interval
         """
+        if best_fit_args is None:
+            best_fit_args = {}
+        if confidence_interval_args is None:
+            confidence_interval_args = {}
         ci_objects = self._confidence_interval_checks(
             poi_name,
             parameter_interval_bounds,
             confidence_level,
             confidence_interval_kind,
-            **kwargs)
+            **confidence_interval_args)
         confidence_interval_kind, confidence_interval_threshold, parameter_interval_bounds = ci_objects
 
-        # find best-fit:
-        best_result, best_ll = self.fit(**kwargs)
+        # best_fit_args only provides the best-fit likelihood
+        _, best_ll = self.fit(**best_fit_args)
+        # the optimization of profile-likelihood under
+        # confidence_interval_args provides the best_parameter
+        best_result, _ = self.fit(**confidence_interval_args)
         best_parameter = best_result[poi_name]
-        mask = (parameter_interval_bounds[0] < best_parameter)
-        mask &= (best_parameter < parameter_interval_bounds[1])
-        assert mask, ("the best-fit is outside your confidence interval"
-            " search limits in parameter_interval_bounds")
-        # log-likelihood - critical value:
+        mask = within_limits(best_parameter, parameter_interval_bounds)
+        if not mask:
+            raise ValueError(
+                f"The best-fit {best_parameter} is outside your confidence interval "
+                f"search limits in parameter_interval_bounds {parameter_interval_bounds}.")
 
         # define intersection between likelihood ratio curve and the critical curve:
-        def t(hypothesis):
+        def t(hypothesis_value):
             # define the intersection
             # between the profile-log-likelihood curve and the rejection threshold
-            kwargs[poi_name] = hypothesis
-            _, ll = self.fit(**kwargs)  # ll is + log-likelihood here
+            _confidence_interval_args = deepcopy(confidence_interval_args)
+            _confidence_interval_args[poi_name] = hypothesis_value
+            _, ll = self.fit(**_confidence_interval_args)  # ll is + log-likelihood here
             ret = 2. * (best_ll - ll)  # likelihood curve "right way up" (smiling)
             # if positive, hypothesis is excluded
-            return ret - confidence_interval_threshold(hypothesis)
+            return ret - confidence_interval_threshold(hypothesis_value)
+
+        t_best_parameter = t(best_parameter)
+
+        if t_best_parameter > 0:
+            warnings.warn(f"CL calculation failed, given fixed parameters {confidence_interval_args}.")
 
-        if confidence_interval_kind in {"upper", "central"}:
+        if confidence_interval_kind in {"upper", "central"} and t_best_parameter < 0:
             if t(parameter_interval_bounds[1]) > 0:
                 ul = brentq(t, best_parameter, parameter_interval_bounds[1])
             else:
@@ -438,7 +474,7 @@ def t(hypothesis):
         else:
             ul = np.nan
 
-        if confidence_interval_kind in {"lower", "central"}:
+        if confidence_interval_kind in {"lower", "central"} and t_best_parameter < 0:
             if t(parameter_interval_bounds[0]) > 0:
                 dl = brentq(t, parameter_interval_bounds[0], best_parameter)
             else:
diff --git a/alea/models/blueice_extended_model.py b/alea/models/blueice_extended_model.py
index f12a0bd3..69d84581 100644
--- a/alea/models/blueice_extended_model.py
+++ b/alea/models/blueice_extended_model.py
@@ -38,9 +38,14 @@ class BlueiceExtendedModel(StatisticalModel):
         (assert that all sources have the same analysis_space)
     """
 
-    def __init__(self, parameter_definition: dict, likelihood_config: dict):
-        """Initializes the statistical model."""
-        super().__init__(parameter_definition=parameter_definition)
+    def __init__(self, parameter_definition: dict, likelihood_config: dict, **kwargs):
+        """Initializes the statistical model.
+
+        Args:
+            parameter_definition (dict): A dictionary defining the model parameters.
+            likelihood_config (dict): A dictionary defining the likelihood.
+        """
+        super().__init__(parameter_definition=parameter_definition, **kwargs)
         self._likelihood = self._build_ll_from_config(likelihood_config)
         self.likelihood_names = [t["name"] for t in likelihood_config["likelihood_terms"]]
         self.likelihood_names.append("ancillary_likelihood")
@@ -66,14 +71,23 @@ def data(self) -> dict:
         return super().data
 
     @data.setter
-    def data(self, data: dict):
+    def data(self, data: dict or list):
         """
         Overrides default setter. Will also set the data of the blueice ll.
         Data-sets are expected to be in the form of a list of one
         or more structured arrays representing the data-sets of one or more likelihood terms.
+
+        Args:
+            data (dict or list): Data of the statistical model.
+                If data is a list, it must be a list of length len(self.likelihood_names) + 1.
         """
         # iterate through all likelihood terms and set the science data in the blueice ll
-        # except the generate_values
+        # last entry in data are the generate_values
+        if isinstance(data, list):
+            if len(data) != len(self.likelihood_names) + 1:
+                raise ValueError(
+                    f"Data must be a list of length {len(self.likelihood_names) + 1}")
+            data = dict(zip(self.likelihood_names + ["generate_values"], data))
         for i, (dataset_name, d) in enumerate(data.items()):
             if dataset_name != "generate_values":
                 ll_term = self._likelihood.likelihood_list[i]
@@ -159,7 +173,7 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
             for i, source in enumerate(config["sources"]):
                 parameters_to_ignore: List[str] = [
                     p.name for p in self.parameters if (
-                        p.ptype == "shape") & (p.name not in source["parameters"])]
+                        p.ptype == "shape") and (p.name not in source["parameters"])]
                 # no efficiency affects PDF:
                 parameters_to_ignore += [p.name for p in self.parameters if (p.ptype == "efficiency")]
                 parameters_to_ignore += source.get("extra_dont_hash_settings", [])
@@ -179,6 +193,7 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
                 rate_parameter = rate_parameters[0]
                 if rate_parameter.endswith("_rate_multiplier"):
                     rate_parameter = rate_parameter.replace("_rate_multiplier", "")
+                    # The ancillary term is handled in CustomAncillaryLikelihood
                     ll.add_rate_parameter(rate_parameter, log_prior=None)
                 else:
                     raise NotImplementedError(
@@ -196,6 +211,7 @@ def _build_ll_from_config(self, likelihood_config: dict) -> "LogLikelihoodSum":
                     anchors = self.parameters[p].blueice_anchors
                     if anchors is None:
                         raise ValueError(f"Shape parameter {p} does not have any anchors.")
+                    # The ancillary term is handled in CustomAncillaryLikelihood
                     ll.add_shape_parameter(p, anchors=anchors, log_prior=None)
 
             ll.prepare()
@@ -245,12 +261,20 @@ def _generate_data(self, **generate_values) -> dict:
         data["generate_values"] = dict_to_structured_array(generate_values)
         return data
 
+    def store_data(self, file_name, data_list, data_name_list=None, metadata=None):
+        """
+        Store data in a file.
+        Append the generate_values to the data_name_list.
+        """
+        if data_name_list is None:
+            data_name_list = self.likelihood_names + ["generate_values"]
+        super().store_data(file_name, data_list, data_name_list, metadata)
+
     def _generate_science_data(self, **generate_values) -> dict:
-        """Generate data for all science data terms terms."""
+        """Generate the science data for all likelihood terms except the ancillary likelihood."""
         science_data = [
-            gen.simulate(**generate_values)
-            for gen in self.data_generators]
-        return dict(zip(self.likelihood_names, science_data))
+            gen.simulate(**generate_values) for gen in self.data_generators]
+        return dict(zip(self.likelihood_names[:-1], science_data))
 
     def _generate_ancillary_measurements(self, **generate_values) -> dict:
         """
diff --git a/alea/parameters.py b/alea/parameters.py
index 4825bcbc..5de3ef10 100644
--- a/alea/parameters.py
+++ b/alea/parameters.py
@@ -1,5 +1,12 @@
+import warnings
 from typing import Any, Dict, List, Optional, Tuple
 
+# These imports are needed to evaluate the uncertainty string
+import numpy  # noqa: F401
+import scipy  # noqa: F401
+
+from alea.utils import within_limits, clip_limits
+
 
 class Parameter:
     """
@@ -43,12 +50,12 @@ def __init__(
         self.nominal_value = nominal_value
         self.fittable = fittable
         self.ptype = ptype
-        self._uncertainty = uncertainty
+        self.uncertainty = uncertainty
         self.relative_uncertainty = relative_uncertainty
         self.blueice_anchors = blueice_anchors
         self.fit_limits = fit_limits
         self.parameter_interval_bounds = parameter_interval_bounds
-        self._fit_guess = fit_guess
+        self.fit_guess = fit_guess
         self.description = description
 
     def __repr__(self) -> str:
@@ -66,7 +73,7 @@ def uncertainty(self) -> float or Any:
         If the uncertainty is a string, it can be evaluated as a numpy or scipy function.
         """
         if isinstance(self._uncertainty, str):
-            # Evaluate the uncertainty if it's a string
+            # Evaluate the uncertainty if it's a string starting with "scipy." or "numpy."
             if self._uncertainty.startswith("scipy.") or self._uncertainty.startswith("numpy."):
                 return eval(self._uncertainty)
             else:
@@ -94,6 +101,22 @@ def fit_guess(self) -> float:
     def fit_guess(self, value: float) -> None:
         self._fit_guess = value
 
+    @property
+    def parameter_interval_bounds(self) -> float:
+        # make sure to only return parameter_interval_bounds if fittable
+        if self._parameter_interval_bounds is not None and not self.fittable:
+            raise ValueError(
+                f"Parameter {self.name} is not fittable, but has a parameter_interval_bounds.")
+        else:
+            # print warning when value contains None
+            value = self._parameter_interval_bounds
+            self._check_parameter_interval_bounds(value)
+            return clip_limits(value)
+
+    @parameter_interval_bounds.setter
+    def parameter_interval_bounds(self, value: Optional[List]) -> None:
+        self._parameter_interval_bounds = value
+
     def __eq__(self, other: object) -> bool:
         """Return True if all attributes are equal"""
         if isinstance(other, Parameter):
@@ -102,15 +125,20 @@ def __eq__(self, other: object) -> bool:
             return False
 
     def value_in_fit_limits(self, value: float) -> bool:
-        """Return True if value is within fit_limits"""
-        if self.fit_limits is None:
-            return True
-        elif self.fit_limits[0] is None:
-            return value <= self.fit_limits[1]
-        elif self.fit_limits[1] is None:
-            return value >= self.fit_limits[0]
-        else:
-            return self.fit_limits[0] <= value <= self.fit_limits[1]
+        """Returns True if value is within fit_limits"""
+        return within_limits(value, self.fit_limits)
+
+    def _check_parameter_interval_bounds(self, value):
+        """Check if parameter_interval_bounds is within fit_limits and is not None."""
+        if (value is None) or (value[0] is None) or (value[1] is None):
+            warnings.warn(
+                f"parameter_interval_bounds not defined for parameter {self.name}. "
+                "This may cause numerical overflow when calculating confidential interval.")
+        value = clip_limits(value)
+        if not (self.value_in_fit_limits(value[0]) and self.value_in_fit_limits(value[1])):
+            raise ValueError(
+                f"parameter_interval_bounds {value} not within "
+                f"fit_limits {self.fit_limits} for parameter {self.name}.")
 
 
 class Parameters:
@@ -288,8 +316,8 @@ def __call__(
         if any(i is None for k, i in values.items()):
             emptypars = ", ".join([k for k, i in values.items() if i is None])
             raise AssertionError(
-                "All parameters must be set explicitly, or have a nominal value,"
-                " encountered for: " + emptypars)
+                "All parameters must be set explicitly, or have a nominal value, "
+                "not satisfied for: " + emptypars)
         return values
 
     def __getattr__(self, name: str) -> Parameter:
diff --git a/alea/runner.py b/alea/runner.py
new file mode 100644
index 00000000..7e4a600e
--- /dev/null
+++ b/alea/runner.py
@@ -0,0 +1,275 @@
+from copy import deepcopy
+from typing import Callable, Optional
+from datetime import datetime
+from pydoc import locate
+import inspect
+import warnings
+
+from tqdm import tqdm
+import numpy as np
+
+from inference_interface import toydata_from_file, numpy_to_toyfile
+from alea.model import StatisticalModel
+
+
+class Runner:
+    """
+    Runner manipulates statistical model and toydata.
+        - initialize the statistical model
+        - generate or reads toy data
+        - save toy data if needed
+        - fit fittable parameters
+        - write the output file
+    One toyfile can contain multiple toydata, but all of them are from the same generate_values.
+
+    Attributes:
+        statistical_model: statistical model
+        poi: parameter of interest
+        hypotheses (list): list of hypotheses
+        common_hypothesis (dict): common hypothesis, the values are copied to each hypothesis
+        generate_values (dict): generate values for toydata
+        _compute_confidence_interval (bool): whether compute confidence interval
+        _n_mc (int): number of Monte Carlo
+        _toydata_file (str): toydata filename
+        _toydata_mode (str): toydata mode, 'read', 'generate', 'generate_and_write', 'no_toydata'
+        _metadata (dict): metadata, if None, it is set to {}
+        _output_file (str): output filename
+        _result_names (list): list of result names
+        _result_dtype (list): list of result dtypes
+        _hypotheses_values (list): list of values for hypotheses
+
+    Args:
+        statistical_model (str): statistical model class name
+        poi (str): parameter of interest
+        hypotheses (list): list of hypotheses
+        _n_mc (int): number of Monte Carlo
+        statistical_model_args (dict, optional (default={})): arguments for statistical model
+        parameter_definition (dict or list, optional (default=None)): parameter definition
+        likelihood_config (dict, optional (default=None)): likelihood configuration
+        compute_confidence_interval (bool, optional (default=False)):
+            whether compute confidence interval
+        confidence_level (float, optional (default=0.9)): confidence level
+        confidence_interval_kind (str, optional (default='central')):
+            kind of confidence interval, choice from 'central', 'upper' or 'lower'
+        confidence_interval_threshold (Callable[[float], float], optional (default=None)):
+            confidence interval threshold of likelihood ratio
+        common_hypothesis (dict, optional (default=None)):
+            common hypothesis, the values are copied to each hypothesis
+        generate_values (dict, optional (default=None)):
+            generate values of toydata. If None, toydata depend on statistical model.
+        toydata_mode (str, optional (default='generate_and_write')):
+            toydata mode, choice from 'read', 'generate', 'generate_and_write', 'no_toydata'
+        toydata_file (str, optional (default=None)): toydata filename
+        metadata (dict, optional (default=None)): metadata
+        output_file (str, optional (default='test_toymc.h5')): output filename
+    """
+
+    def __init__(
+            self,
+            statistical_model: str,
+            poi: str,
+            hypotheses: list,
+            n_mc: int,
+            common_hypothesis: dict = None,
+            generate_values: dict = None,
+            statistical_model_args: dict = None,
+            parameter_definition: Optional[dict or list] = None,
+            likelihood_config: dict = None,
+            compute_confidence_interval: bool = False,
+            confidence_level: float = 0.9,
+            confidence_interval_kind: str = 'central',
+            confidence_interval_threshold: Callable[[float], float] = None,
+            toydata_mode: str = 'generate_and_write',
+            toydata_file: str = None,
+            metadata: dict = None,
+            output_file: str = 'test_toymc.h5',
+        ):
+        """
+        Initialize statistical model,
+        parameters list, and generate values list
+        """
+        statistical_model_class = locate(statistical_model)
+        if statistical_model_class is None:
+            raise ValueError(f'Could not find {statistical_model}!')
+        if not inspect.isclass(statistical_model_class):
+            raise ValueError(f'{statistical_model_class} is not a class!')
+        if not issubclass(statistical_model_class, StatisticalModel):
+            raise ValueError(f'{statistical_model_class} is not a subclass of StatisticalModel!')
+
+        # likelihood_config is keyword argument, because not all statistical model needs it
+        if statistical_model_args is None:
+            statistical_model_args = {}
+        statistical_model_args['likelihood_config'] = likelihood_config
+        self.statistical_model = statistical_model_class(
+            parameter_definition=parameter_definition,
+            confidence_level=confidence_level,
+            confidence_interval_kind=confidence_interval_kind,
+            confidence_interval_threshold=confidence_interval_threshold,
+            **(statistical_model_args if statistical_model_args else {}),
+        )
+
+        self.poi = poi
+        self.hypotheses = hypotheses if hypotheses else []
+        self.common_hypothesis = common_hypothesis if common_hypothesis else {}
+        self.generate_values = generate_values if generate_values else {}
+        self._compute_confidence_interval = compute_confidence_interval
+        self._n_mc = n_mc
+        self._toydata_file = toydata_file
+        self._toydata_mode = toydata_mode
+        self._output_file = output_file
+        self._metadata = metadata if metadata else {}
+
+        self._result_names, self._result_dtype = self._get_parameter_list()
+
+        self._hypotheses_values = self._get_hypotheses()
+
+    def _get_parameter_list(self):
+        """Get parameter list and result list from statistical model"""
+        parameter_list = sorted(self.statistical_model.get_parameter_list())
+        # add likelihood, lower limit, and upper limit
+        result_names = parameter_list + ['ll', 'dl', 'ul']
+        result_dtype = [(n, float) for n in parameter_list]
+        result_dtype += [(n, float) for n in ['ll', 'dl', 'ul']]
+        return result_names, result_dtype
+
+    def _get_hypotheses(self):
+        """Get generate values list from hypotheses"""
+        hypotheses_values = []
+        hypotheses = deepcopy(self.hypotheses)
+        if 'free' not in hypotheses and self._compute_confidence_interval:
+            raise ValueError('free hypothesis is needed for confidence interval calculation!')
+        if 'free' in hypotheses and hypotheses.index('free') != 0:
+            raise ValueError('free hypothesis should be the first hypothesis!')
+
+        for hypothesis in hypotheses:
+            if hypothesis == 'null':
+                # there is no signal component
+                hypothesis = {self.poi: 0.}
+            elif hypothesis == 'true':
+                # the true signal component is used
+                if self.poi not in self.generate_values:
+                    raise ValueError(
+                        f'{self.poi} should be provided in generate_values',
+                    )
+                hypothesis = {
+                    self.poi: self.generate_values.get(self.poi),
+                }
+            elif hypothesis == 'free':
+                hypothesis = {}
+
+            array = deepcopy(self.common_hypothesis)
+            array.update(hypothesis)
+            hypotheses_values.append(array)
+        return hypotheses_values
+
+    def write_output(self, results):
+        """Write output file with metadata"""
+        metadata = deepcopy(self._metadata)
+
+        result_names = [f'{i:d}' for i in range(len(self._hypotheses_values))]
+        for i, ea in enumerate(self.hypotheses):
+            if ea in {'free', 'null', 'true'}:
+                result_names[i] = ea
+
+        metadata['date'] = datetime.now().strftime('%Y%m%d_%H:%M:%S')
+        metadata['poi'] = self.poi
+        metadata['common_hypothesis'] = self.common_hypothesis
+        metadata['generate_values'] = self.generate_values
+
+        array_metadatas = [{'hypotheses_values': ea} for ea in self._hypotheses_values]
+        numpy_arrays_and_names = [(r, rn) for r, rn in zip(results, result_names)]
+
+        print(f'Saving {self._output_file}')
+        numpy_to_toyfile(
+            self._output_file,
+            numpy_arrays_and_names=numpy_arrays_and_names,
+            metadata=metadata,
+            array_metadatas=array_metadatas)
+
+    def read_toydata(self):
+        """Read toydata from file"""
+        toydata, toydata_names = toydata_from_file(self._toydata_file)
+        return toydata, toydata_names
+
+    def write_toydata(self, toydata, toydata_names):
+        """
+        Write toydata to file.
+        If toydata is a list of dict, convert it to a list of list.
+        """
+        self.statistical_model.store_data(self._toydata_file, toydata, toydata_names)
+
+    def data_generator(self):
+        """Generate, save or read toydata"""
+        # check toydata mode
+        if self._toydata_mode not in {
+            'read', 'generate', 'generate_and_write', 'no_toydata',
+        }:
+            raise ValueError(f'Unknown toydata mode: {self._toydata_mode}')
+        # check toydata file size
+        if self._toydata_mode == 'read':
+            toydata, toydata_names = self.read_toydata()
+            if len(toydata) < self._n_mc:
+                raise ValueError(
+                    f'Number of stored toydata {len(toydata)} is '
+                    f'less than number of Monte Carlo {self._n_mc}!')
+            elif len(toydata) > self._n_mc:
+                warnings.warn(
+                    f'Number of stored toydata {len(toydata)} is '
+                    f'larger than number of Monte Carlo {self._n_mc}.')
+        else:
+            toydata = []
+            toydata_names = None
+        # generate toydata
+        for i_mc in range(self._n_mc):
+            if self._toydata_mode == 'generate' or self._toydata_mode == 'generate_and_write':
+                data = self.statistical_model.generate_data(
+                    **self.generate_values)
+                if self._toydata_mode == 'generate_and_write':
+                    # append toydata
+                    toydata.append(data)
+            elif self._toydata_mode == 'read':
+                data = toydata[i_mc]
+            elif self._toydata_mode == 'no_toydata':
+                data = None
+            yield data
+        # save toydata
+        if self._toydata_mode == 'generate_and_write':
+            self.write_toydata(toydata, toydata_names)
+
+    def toy_simulation(self):
+        """
+        For each Monte Carlo:
+            - run toy simulation a specified toydata mode and generate values.
+            - loop over hypotheses.
+
+        Todo:
+            Implement per-hypothesis switching on whether to compute confidence intervals
+        """
+        results = [np.zeros(self._n_mc, dtype=self._result_dtype) for _ in self._hypotheses_values]
+        for i_mc, data in tqdm(enumerate(self.data_generator()), total=self._n_mc):
+            self.statistical_model.data = data
+            fit_results = []
+            for hypothesis_values in self._hypotheses_values:
+                fit_result, max_llh = self.statistical_model.fit(**hypothesis_values)
+                fit_result['ll'] = max_llh
+                if self._compute_confidence_interval and (self.poi not in hypothesis_values):
+                    dl, ul = self.statistical_model.confidence_interval(
+                        poi_name=self.poi,
+                        best_fit_args=self._hypotheses_values[0],
+                        confidence_interval_args=hypothesis_values)
+                else:
+                    dl, ul = np.nan, np.nan
+                fit_result['dl'] = dl
+                fit_result['ul'] = ul
+
+                fit_results.append(fit_result)
+            # assign fitting results
+            for fit_result, result_array in zip(fit_results, results):
+                result_array[i_mc] = tuple(fit_result[pn] for pn in self._result_names)
+        return results
+
+    def run(self):
+        """Run toy simulation"""
+        results = self.toy_simulation()
+
+        self.write_output(results)
diff --git a/alea/template_source.py b/alea/template_source.py
index b1322c95..9ff75ef8 100644
--- a/alea/template_source.py
+++ b/alea/template_source.py
@@ -169,10 +169,10 @@ class CombinedSource(blueice.HistogramPdfSource):
     but takes in lists of histogram names.
     :param weights: weights
     :param histnames: list of filenames containing the histograms
-    :param templatenames: list of names of histograms within the hdf5 files
+    :param templatenames: list of names of histograms within the h5 files
     :param named_parameters : list of names of weights to be applied to histograms.
     Must be 1 shorter than histnames, templatenames
-    :param histogram_parameters: names of parameters that should be put in the hdf5/histogram names,
+    :param histogram_parameters: names of parameters that should be put in the h5/histogram names,
     """
 
     def build_histogram(self):
@@ -305,7 +305,7 @@ def build_histogram(self):
                 logging.debug("h.bin_edges type", type(h.bin_edges))
                 if len(seen_bin_edges) != len(expected_bin_edges):
                     raise ValueError(
-                        "Axis %d of histogram %s in hdf5 file %s has %d bin edges, but expected %d"
+                        "Axis %d of histogram %s in h5 file %s has %d bin edges, but expected %d"
                         % (
                             axis_i, histname, templatename, len(seen_bin_edges),
                             len(expected_bin_edges)))
@@ -315,7 +315,7 @@ def build_histogram(self):
                         expected_bin_edges, decimal=4)
                 except AssertionError:
                     warnings.warn(
-                        "Axis %d of histogram %s in hdf5 file %s has bin edges %s, "
+                        "Axis %d of histogram %s in h5 file %s has bin edges %s, "
                         "but expected %s. Since length matches, setting it expected values..."
                         % (
                             axis_i, histname, templatename, seen_bin_edges,
diff --git a/alea/utils.py b/alea/utils.py
index 409b78a6..f468a617 100644
--- a/alea/utils.py
+++ b/alea/utils.py
@@ -1,7 +1,7 @@
 import os
 import re
 import yaml
-import pkg_resources
+import importlib_resources
 from glob import glob
 from copy import deepcopy
 from pydoc import locate
@@ -12,6 +12,9 @@
 logging.basicConfig(level=logging.INFO)
 
 
+MAX_FLOAT = np.sqrt(np.finfo(np.float32).max)
+
+
 def get_analysis_space(analysis_space: dict) -> list:
     """Convert analysis_space to a list of tuples with evaluated values."""
     eval_analysis_space = []
@@ -79,7 +82,7 @@ def _get_abspath(file_name):
 
 def _package_path(sub_directory):
     """Get the abs path of the requested sub folder"""
-    return pkg_resources.resource_filename('alea', f'{sub_directory}')
+    return importlib_resources.files('alea') / sub_directory
 
 
 def formatted_to_asterisked(formatted):
@@ -161,3 +164,30 @@ def get_template_folder_list(likelihood_config):
         raise ValueError(
             "template_folder must be either a string or a list of strings.")
     return template_folder_list
+
+
+def within_limits(value, limits):
+    """Returns True if value is within limits"""
+    if limits is None:
+        return True
+    elif limits[0] is None:
+        return value <= limits[1]
+    elif limits[1] is None:
+        return value >= limits[0]
+    else:
+        return limits[0] <= value <= limits[1]
+
+
+def clip_limits(value):
+    """
+    Clip limits to be within [-MAX_FLOAT, MAX_FLOAT]
+    by converting None to -MAX_FLOAT and MAX_FLOAT.
+    """
+    if value is None:
+        value = [-MAX_FLOAT, MAX_FLOAT]
+    else:
+        if value[0] is None:
+            value[0] = -MAX_FLOAT
+        if value[1] is None:
+            value[1] = MAX_FLOAT
+    return value
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8f5ebe83..7ca4be5f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -38,7 +38,7 @@ Alea is a tool to perform toyMC-based inference constructions.
     :maxdepth: 1
     :caption: Example notebooks
 
-    notebooks/placeholders.ipynb
+    notebooks/gaussian_model.ipynb
 
 .. toctree::
     :maxdepth: 2
diff --git a/docs/source/notebooks/gaussian_model.ipynb b/docs/source/notebooks/gaussian_model.ipynb
new file mode 120000
index 00000000..3de578be
--- /dev/null
+++ b/docs/source/notebooks/gaussian_model.ipynb
@@ -0,0 +1 @@
+../../../notebooks/gaussian_model.ipynb
\ No newline at end of file
diff --git a/docs/source/notebooks/placeholder.ipynb b/docs/source/notebooks/placeholder.ipynb
deleted file mode 120000
index c5208c92..00000000
--- a/docs/source/notebooks/placeholder.ipynb
+++ /dev/null
@@ -1 +0,0 @@
-../../../notebooks/placeholder.ipynb
\ No newline at end of file
diff --git a/docs/source/reference/alea.rst b/docs/source/reference/alea.rst
index 82980323..9f487fbc 100644
--- a/docs/source/reference/alea.rst
+++ b/docs/source/reference/alea.rst
@@ -29,6 +29,14 @@ alea.parameters module
    :undoc-members:
    :show-inheritance:
 
+alea.runner module
+------------------
+
+.. automodule:: alea.runner
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 alea.simulators module
 ----------------------
 
diff --git a/notebooks/placeholder.ipynb b/notebooks/gaussian_model.ipynb
similarity index 98%
rename from notebooks/placeholder.ipynb
rename to notebooks/gaussian_model.ipynb
index fe4e9a0d..d636c3e0 100644
--- a/notebooks/placeholder.ipynb
+++ b/notebooks/gaussian_model.ipynb
@@ -2,10 +2,10 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "76edc00b-f220-47e2-914b-0781c288594d",
+   "id": "766ddb65-2b9c-4c08-85db-96fdae472503",
    "metadata": {},
    "source": [
-    "# Please delete this when new meaningful notebooks are added."
+    "# Example of GaussianModel"
    ]
   },
   {
@@ -33,17 +33,17 @@
     "        'fit_guess': 0.,\n",
     "        'fittable': True,\n",
     "        'nominal_value': 0.,\n",
+    "        'parameter_interval_bounds': [\n",
+    "            -10,\n",
+    "            10,\n",
+    "        ],\n",
     "    },\n",
     "    'sigma': {\n",
-    "        'fit_guess': 1.,\n",
-    "        'fit_limits': [\n",
-    "            0.,\n",
-    "            None,\n",
-    "        ],\n",
-    "        'fittable': True,\n",
+    "        'fittable': False,\n",
     "        'nominal_value': 1.,\n",
     "    },\n",
     "}\n",
+    "\n",
     "model = GaussianModel(parameter_definition=parameter_definition)"
    ]
   },
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_gaussian_model.py b/tests/test_gaussian_model.py
index ea21309e..9d007cd9 100644
--- a/tests/test_gaussian_model.py
+++ b/tests/test_gaussian_model.py
@@ -7,30 +7,31 @@
 from alea.examples import GaussianModel
 
 
+gaussian_model_parameter_definition = {
+    'mu': {
+        'fit_guess': 0.,
+        'fittable': True,
+        'nominal_value': 0.,
+        'parameter_interval_bounds': [
+            -10,
+            10,
+        ],
+    },
+    'sigma': {
+        'fittable': False,
+        'nominal_value': 1.,
+    },
+}
+
+
 class TestGaussianModel(TestCase):
     """Test of the GaussianModel class"""
 
     @classmethod
     def setUp(cls):
         """Initialise the GaussianModel instance"""
-        parameter_definition = {
-            'mu': {
-                'fit_guess': 0.,
-                'fittable': True,
-                'nominal_value': 0.,
-            },
-            'sigma': {
-                'fit_guess': 1.,
-                'fit_limits': [
-                    0.,
-                    None,
-                ],
-                'fittable': True,
-                'nominal_value': 1.,
-            },
-        }
         cls.model = GaussianModel(
-            parameter_definition=parameter_definition)
+            parameter_definition=gaussian_model_parameter_definition)
 
     def test_data_generation(self):
         """Test generation of data"""
@@ -38,7 +39,7 @@ def test_data_generation(self):
 
     def test_data_storage(self):
         """Test storage of data to file and retrieval of data from file"""
-        toydata_file = 'simple_data.hdf5'
+        toydata_file = 'simple_data.h5'
         self.model.data = self.model.generate_data(mu=0, sigma=2)
         self.model.store_data(toydata_file, [self.model.data])
         stored_data = inference_interface.toydata_from_file(toydata_file)
diff --git a/tests/test_runner.py b/tests/test_runner.py
new file mode 100644
index 00000000..2e4c78f8
--- /dev/null
+++ b/tests/test_runner.py
@@ -0,0 +1,77 @@
+from os import remove
+import pytest
+from unittest import TestCase
+
+import numpy as np
+
+from alea.utils import load_yaml
+from alea.runner import Runner
+from inference_interface import toyfiles_to_numpy
+from .test_gaussian_model import gaussian_model_parameter_definition
+
+
+@pytest.mark.usefixtures('rm_cache')
+class TestRunner(TestCase):
+    """Test of the Runner class"""
+
+    @classmethod
+    def setUp(cls):
+        """Initialise the Runner instance"""
+        cls.runner_config = load_yaml('unbinned_wimp_running.yaml')
+        cls.model_config = load_yaml(cls.runner_config['statistical_model_config'])
+        cls.toydata_file = 'simple_data.h5'
+        cls.output_file = 'test_toymc.h5'
+        cls.n_mc = 3
+
+    def set_gaussian_runner(self, toydata_mode='generate_and_write'):
+        """Set a new runner instance with GaussianModel"""
+        self.runner = Runner(
+            statistical_model='alea.examples.gaussian_model.GaussianModel',
+            poi='mu',
+            hypotheses=['free', 'null', 'true'],
+            n_mc=self.n_mc,
+            generate_values={'mu': 1., 'sigma': 1.},
+            parameter_definition=gaussian_model_parameter_definition,
+            compute_confidence_interval=True,
+            toydata_mode=toydata_mode,
+            toydata_file=self.toydata_file,
+            output_file=self.output_file,
+        )
+
+    def set_blueice_runner(self, toydata_mode='generate_and_write'):
+        """Set a new runner instance with BlueiceExtendedModel"""
+        # TODO: interpret the config file after submitter class is implemented
+        parameter_zvc = self.runner_config['computation']['discovery_power']
+        self.runner = Runner(
+            statistical_model=self.runner_config['statistical_model'],
+            poi=self.runner_config['poi'],
+            hypotheses=parameter_zvc['parameters_in_common']['hypotheses'],
+            n_mc=self.n_mc,
+            generate_values={'wimp_rate_multiplier': 1.0},
+            parameter_definition=self.model_config['parameter_definition'],
+            likelihood_config=self.model_config['likelihood_config'],
+            compute_confidence_interval=True,
+            toydata_mode=toydata_mode,
+            toydata_file=self.toydata_file,
+            output_file=self.output_file,
+        )
+
+    def test_runners(self):
+        """Test of the toy_simulation and write_output method"""
+        set_runners = [self.set_gaussian_runner, self.set_blueice_runner]
+        for set_runner in set_runners:
+            # test toydata_mode generate_and_write
+            set_runner()
+            self.runner.run()
+            remove(self.output_file)
+
+            # test toydata_mode read
+            set_runner(toydata_mode='read')
+            self.runner.run()
+            remove(self.toydata_file)
+
+            # check confidence interval computation
+            results = toyfiles_to_numpy(self.runner._output_file)
+            if np.any(np.isnan(results['free']['dl'])) or np.any(np.isnan(results['free']['ul'])):
+                raise ValueError('Confidence interval computation failed!')
+            remove(self.output_file)