UGentBiomath · stijnvanhoey · Jun 8, 2020 · Jun 8, 2020 · Jun 8, 2020 · Jun 8, 2020
diff --git a/notebooks/0.2-stijnvanhoey-refactor-optimization.ipynb b/notebooks/0.2-stijnvanhoey-refactor-optimization.ipynb
diff --git a/src/covid19model/data/polymod.py b/src/covid19model/data/polymod.py
@@ -36,7 +36,6 @@ def get_interaction_matrices():
     -----------
     initN, Nc_home, Nc_work, Nc_schools, Nc_transport, Nc_leisure, Nc_others, Nc_total = get_interaction_matrices()
     """
-
     abs_dir = os.path.dirname(__file__)
     polymod_path = os.path.join(abs_dir, "../../../data/raw/polymod/")
 

diff --git a/src/covid19model/optimization/objective_fcns.py b/src/covid19model/optimization/objective_fcns.py
@@ -1,5 +1,76 @@
 import numpy as np
 
+
+def sse(theta, model, data, model_parameters, lag_time=None):
+    """Calculate the sum of squared errors from data and model instance
+
+    The function assumes the number of values in the theta array corresponds
+    to the number of defined parameters, but can be extended with a lag_time
+    parameters to make this adjustable as well by an optimization algorithm.
+
+
+    Parameters
+    ----------
+    theta : np.array
+        Array with N (if lag_time is defined) or N+1 (if lag_time is not
+        defined) values.
+    model : covidmodel.model instance
+        Covid model instance
+    data : xarray.DataArray
+        Xarray DataArray with time as a dimension and the name corresponding
+        to an existing model output state.
+    model_parameters : list of str
+        Parameter names of the model parameters to adjust in order to get the
+        SSE.
+    lag_time : int
+        Warming up period before comparing the data with the model output.
+        e.g. if 40; comparison of data only starts after 40 days.
+
+
+    Notes
+    -----
+    Assumes daily time step(!) # TODO: need to generalize this
+
+    Examples
+    --------
+    >>> data_to_fit = xr.DataArray([ 54,  79, 100, 131, 165, 228, 290],
+                                   coords={"time": range(1, 8)}, name="ICU",
+                                   dims=['time'])
+    >>> sse([1.6, 0.025, 42], sir_model, data_to_fit, ['sigma', 'beta'], lag_time=None)
+    >>> # previous is equivalent to
+    >>> sse([1.6, 0.025], sir_model, data_to_fit, ['sigma', 'beta'], lag_time=42)
+    >>> # but the latter will use a fixed lag_time,
+    >>> # whereas the first one can be used inside optimization
+    """
+
+    if data.name not in model.state_names:
+        raise Exception("Data variable to fit is not available as model output")
+
+    # define new parameters in model
+    for i, parameter in enumerate(model_parameters):
+        model.parameters.update({parameter: theta[i]})
+
+    # extract additional parameter # TODO - check alternatives for generalisation here
+    if not lag_time:
+        lag_time = int(round(theta[-1]))
+    else:
+        lag_time = int(round(lag_time))
+
+    # run model
+    time = len(data.time) + lag_time # at least this length
+    output = model.sim(time)
+
+    # extract the variable of interest
+    subset_output = output.sum(dim="stratification")[data.name]
+    # adjust to fix lag time to extract start of comparison
+    output_to_compare = subset_output.sel(time=slice(lag_time, lag_time - 1 + len(data)))
+
+    # calculate sse -> we could maybe enable more function options on this level?
+    sse = np.sum((data.values - output_to_compare.values)**2)
+
+    return sse
+
+
 def SSE(thetas,BaseModel,data,states,parNames,weights,checkpoints=None):
 
     """
@@ -30,7 +101,7 @@ def SSE(thetas,BaseModel,data,states,parNames,weights,checkpoints=None):
     -----------
     SSE = SSE(model,thetas,data,parNames,positions,weights)
     """
-    
+
     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     # assign estimates to correct variable
     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -46,7 +117,7 @@ def SSE(thetas,BaseModel,data,states,parNames,weights,checkpoints=None):
     # Run simulation
     # ~~~~~~~~~~~~~~
     # number of dataseries
-    n = len(data) 
+    n = len(data)
     # Compute simulation time
     data_length =[]
     for i in range(n):
@@ -124,7 +195,7 @@ def MLE(thetas,BaseModel,data,states,parNames,checkpoints=None):
     # Run simulation
     # ~~~~~~~~~~~~~~
     # number of dataseries
-    n = len(data) 
+    n = len(data)
     # Compute simulation time
     data_length =[]
     for i in range(n):

diff --git a/src/covid19model/optimization/MCMC.py → src/covid19model/optimization/optimize.py b/src/covid19model/optimization/MCMC.py → src/covid19model/optimization/optimize.py
@@ -3,6 +3,59 @@
 from covid19model.optimization import objective_fcns
 from covid19model.optimization import pso
 
+
+def fit(func, model, data, model_parameters, bounds,
+        disp=True, maxiter=30, popsize=10):
+    """Fit a model using a defined SSE function
+
+    Parameters
+    ----------
+    func : objective function
+        The arguments ``model``, ``data``
+        and ``model_parameters`` should be the
+        other arguments of the optimization func
+    model : covidmodel.model instance
+        Covid19model instance
+    data : xarray.DataArray
+        Xarray DataArray with time as a dimension and the name corresponding
+        to an existing model output state.
+    model_parameters : list of str
+        Parameter names of the model parameters to adjust in order to get the
+        SSE.
+    bounds : list of (min, max)
+        For each parameter (model parameter or others), define the min and
+        max boundaries as tuples. If func has non-model parameters to
+        optimize as well, len(bounds) > len(model_parameters).
+    disp: boolean
+        display the pso output stream
+    maxiter: float or int
+        maximum number of pso iterations
+    popsize: float or int
+        population size of particle swarm
+        increasing this variable lowers the chance of finding local minima but slows down calculations
+    """
+    # TODO - add checks on inputs,...
+
+    original_parameters = model.parameters.copy()
+
+    (theta_hat, obj_fun_val, pars_final_swarm,
+         obj_fun_val_final_swarm) = pso.optim(func,
+                                              bounds,
+                                              args=(model, data, model_parameters),
+                                              swarmsize=popsize,
+                                              maxiter=maxiter,
+                                              processes=mp.cpu_count()-1,
+                                              minfunc=1e-9,
+                                              minstep=1e-9,
+                                              debug=True,
+                                              particle_output=True)
+
+    # reset the model parameters to original values
+    model.parameters = original_parameters
+
+    return theta_hat
+
+
 def fit_pso(BaseModel,data,parNames,states,bounds,checkpoints=None,disp=True,maxiter=30,popsize=10):
     """
     A function to compute the mimimum of the absolute value of the maximum likelihood estimator using a particle swarm optimization
@@ -12,7 +65,7 @@ def fit_pso(BaseModel,data,parNames,states,bounds,checkpoints=None,disp=True,max
     BaseModel: model object
         correctly initialised model to be fitted to the dataset
     data: array
-        list containing dataseries        
+        list containing dataseries
     parNames: array
         list containing the names of the parameters to be fitted
     states: array
@@ -38,7 +91,7 @@ def fit_pso(BaseModel,data,parNames,states,bounds,checkpoints=None,disp=True,max
 
     Notes
     -----------
-    Use all available cores minus one by default (optimal number of processors for 2-,4- or 6-core PC's with an OS). 
+    Use all available cores minus one by default (optimal number of processors for 2-,4- or 6-core PC's with an OS).
 
     Example use
     -----------