diff --git a/.travis.yml b/.travis.yml
index b463bc3..15bca0d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,11 @@
 sudo: false
 
+os:
+- linux
+
+git:
+  depth: 200
+
 env:
   global:
     - BINSTAR_USER: menpo
@@ -8,11 +14,11 @@ env:
     - PYTHON_VERSION: 3.4
 
 install:
-  - wget https://raw.githubusercontent.com/menpo/condaci/v0.4.2/condaci.py -O condaci.py
-  - python condaci.py setup
+- wget https://raw.githubusercontent.com/menpo/condaci/v0.4.6/condaci.py -O condaci.py
+- python condaci.py setup
 
 script:
-  - ~/miniconda/bin/python condaci.py build ./conda
+- ~/miniconda/bin/python condaci.py build ./conda
 
 notifications:
   slack:
diff --git a/appveyor.yml b/appveyor.yml
index 2f668bb..14fa231 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -10,18 +10,21 @@ environment:
     - PYTHON_VERSION: 3.4
 
 matrix:
-   fast_finish: true
+  fast_finish: true
 
 platform:
-  - x86
-  - x64
+- x86
+- x64
 
 init:
-  - ps: Start-FileDownload 'https://raw.githubusercontent.com/menpo/condaci/v0.4.2/condaci.py' C:\\condaci.py; echo "Done"
-  - cmd: python C:\\condaci.py setup
+- ps: Start-FileDownload 'https://raw.githubusercontent.com/menpo/condaci/v0.4.6/condaci.py' C:\\condaci.py; echo "Done"
+- cmd: python C:\\condaci.py setup
 
 install:
-  - cmd: C:\\Miniconda\\python C:\\condaci.py build ./conda
+- cmd: set tmp_cmd=python C:\\condaci.py miniconda_dir
+- cmd: "FOR /F %%i IN (' %tmp_cmd% ') DO SET APPV_MINICONDA_DIR=%%i"
+- cmd: set APPV_MINICONDA_EXE="%APPV_MINICONDA_DIR%\python.exe"
+- cmd: "%APPV_MINICONDA_EXE% C:\\condaci.py build conda"
 
 notifications:
   - provider: Slack
@@ -31,3 +34,4 @@ notifications:
     on_build_status_changed: true
     on_build_success: false
     on_build_failure: false
+
diff --git a/menpofit/aam/__init__.py b/menpofit/aam/__init__.py
index 37bd629..cbc979c 100644
--- a/menpofit/aam/__init__.py
+++ b/menpofit/aam/__init__.py
@@ -1,3 +1,14 @@
-from .base import AAM, PatchBasedAAM
-from .builder import AAMBuilder, PatchBasedAAMBuilder
-from .fitter import LucasKanadeAAMFitter
+from .base import HolisticAAM, LinearAAM, LinearMaskedAAM, PatchAAM, MaskedAAM
+from .fitter import (
+    LucasKanadeAAMFitter, SupervisedDescentAAMFitter,
+    holistic_sampling_from_scale, holistic_sampling_from_step)
+from .algorithm import (
+    ProjectOutForwardCompositional, ProjectOutInverseCompositional,
+    SimultaneousForwardCompositional, SimultaneousInverseCompositional,
+    AlternatingForwardCompositional, AlternatingInverseCompositional,
+    ModifiedAlternatingForwardCompositional,
+    ModifiedAlternatingInverseCompositional,
+    WibergForwardCompositional, WibergInverseCompositional,
+    MeanTemplateNewton, MeanTemplateGaussNewton,
+    ProjectOutNewton, ProjectOutGaussNewton,
+    AppearanceWeightsNewton, AppearanceWeightsGaussNewton)
diff --git a/menpofit/aam/algorithm/__init__.py b/menpofit/aam/algorithm/__init__.py
new file mode 100644
index 0000000..636c758
--- /dev/null
+++ b/menpofit/aam/algorithm/__init__.py
@@ -0,0 +1,14 @@
+from .lk import (
+    ProjectOutForwardCompositional, ProjectOutInverseCompositional,
+    SimultaneousForwardCompositional, SimultaneousInverseCompositional,
+    AlternatingForwardCompositional, AlternatingInverseCompositional,
+    ModifiedAlternatingForwardCompositional,
+    ModifiedAlternatingInverseCompositional,
+    WibergForwardCompositional, WibergInverseCompositional)
+from .sd import (
+    MeanTemplateNewton,
+    MeanTemplateGaussNewton,
+    ProjectOutNewton,
+    ProjectOutGaussNewton,
+    AppearanceWeightsNewton,
+    AppearanceWeightsGaussNewton)
diff --git a/menpofit/aam/algorithm/lk.py b/menpofit/aam/algorithm/lk.py
new file mode 100644
index 0000000..3dd6960
--- /dev/null
+++ b/menpofit/aam/algorithm/lk.py
@@ -0,0 +1,948 @@
+from __future__ import division
+import numpy as np
+from menpo.image import Image
+from menpo.feature import gradient as fast_gradient, no_op
+from ..result import AAMAlgorithmResult, LinearAAMAlgorithmResult
+
+
+def _solve_all_map(H, J, e, Ja_prior, c, Js_prior, p, m, n):
+    if n is not H.shape[0] - m:
+        # Bidirectional Compositional case
+        Js_prior = np.hstack((Js_prior, Js_prior))
+        p = np.hstack((p, p))
+        # compute and return MAP solution
+    J_prior = np.hstack((Ja_prior, Js_prior))
+    H += np.diag(J_prior)
+    Je = J_prior * np.hstack((c, p)) + J.T.dot(e)
+    dq = - np.linalg.solve(H, Je)
+    return dq[:m], dq[m:]
+
+
+def _solve_all_ml(H, J, e, m):
+    # compute ML solution
+    dq = - np.linalg.solve(H, J.T.dot(e))
+    return dq[:m], dq[m:]
+
+
+# TODO document me!
+class LucasKanadeBaseInterface(object):
+    r"""
+    """
+    def __init__(self, transform, template, sampling=None):
+        self.transform = transform
+        self.template = template
+
+        self._build_sampling_mask(sampling)
+
+    def _build_sampling_mask(self, sampling):
+        n_true_pixels = self.template.n_true_pixels()
+        n_channels = self.template.n_channels
+        n_parameters = self.transform.n_parameters
+
+        sampling_mask = np.zeros(n_true_pixels, dtype=np.bool)
+
+        if sampling is None:
+            sampling = range(0, n_true_pixels, 1)
+        elif isinstance(sampling, np.int):
+            sampling = range(0, n_true_pixels, sampling)
+
+        sampling_mask[sampling] = 1
+
+        self.i_mask = np.nonzero(np.tile(
+            sampling_mask[None, ...], (n_channels, 1)).flatten())[0]
+        self.dW_dp_mask = np.nonzero(np.tile(
+            sampling_mask[None, ..., None], (2, 1, n_parameters)))
+        self.nabla_mask = np.nonzero(np.tile(
+            sampling_mask[None, None, ...], (2, n_channels, 1)))
+        self.nabla2_mask = np.nonzero(np.tile(
+            sampling_mask[None, None, None, ...], (2, 2, n_channels, 1)))
+
+    @property
+    def shape_model(self):
+        return self.transform.pdm.model
+
+    @property
+    def n(self):
+        return self.transform.n_parameters
+
+    @property
+    def true_indices(self):
+        return self.template.mask.true_indices()
+
+    def warp_jacobian(self):
+        dW_dp = np.rollaxis(self.transform.d_dp(self.true_indices), -1)
+        return dW_dp[self.dW_dp_mask].reshape((dW_dp.shape[0], -1,
+                                               dW_dp.shape[2]))
+
+    def warp(self, image):
+        return image.warp_to_mask(self.template.mask, self.transform)
+
+    def gradient(self, img):
+        nabla = fast_gradient(img)
+        nabla.set_boundary_pixels()
+        return nabla.as_vector().reshape((2, img.n_channels, -1))
+
+    def steepest_descent_images(self, nabla, dW_dp):
+        # reshape gradient
+        # nabla: n_dims x n_channels x n_pixels
+        nabla = nabla[self.nabla_mask].reshape(nabla.shape[:2] + (-1,))
+        # compute steepest descent images
+        # nabla: n_dims x n_channels x n_pixels
+        # warp_jacobian: n_dims x            x n_pixels x n_params
+        # sdi:            n_channels x n_pixels x n_params
+        sdi = 0
+        a = nabla[..., None] * dW_dp[:, None, ...]
+        for d in a:
+            sdi += d
+        # reshape steepest descent images
+        # sdi: (n_channels x n_pixels) x n_params
+        return sdi.reshape((-1, sdi.shape[2]))
+
+    @classmethod
+    def solve_shape_map(cls, H, J, e, J_prior, p):
+        if p.shape[0] is not H.shape[0]:
+            # Bidirectional Compositional case
+            J_prior = np.hstack((J_prior, J_prior))
+            p = np.hstack((p, p))
+        # compute and return MAP solution
+        H += np.diag(J_prior)
+        Je = J_prior * p + J.T.dot(e)
+        return - np.linalg.solve(H, Je)
+
+    @classmethod
+    def solve_shape_ml(cls, H, J, e):
+        # compute and return ML solution
+        return -np.linalg.solve(H, J.T.dot(e))
+
+
+class LucasKanadeStandardInterface(LucasKanadeBaseInterface):
+
+    def __init__(self, appearance_model, transform, template, sampling=None):
+        super(LucasKanadeStandardInterface, self).__init__(transform, template,
+                                                           sampling=sampling)
+        self.appearance_model = appearance_model
+
+    @property
+    def m(self):
+        return self.appearance_model.n_active_components
+
+    def solve_all_map(self, H, J, e, Ja_prior, c, Js_prior, p):
+        return _solve_all_map(H, J, e, Ja_prior, c, Js_prior, p,
+                              self.m, self.n)
+
+    def solve_all_ml(self, H, J, e):
+        return _solve_all_ml(H, J, e, self.m)
+
+    def algorithm_result(self, image, shape_parameters, cost_functions=None,
+                         appearance_parameters=None, gt_shape=None):
+        return AAMAlgorithmResult(
+            image, self, shape_parameters,
+            cost_functions=cost_functions,
+            appearance_parameters=appearance_parameters, gt_shape=gt_shape)
+
+
+# TODO document me!
+class LucasKanadeLinearInterface(LucasKanadeStandardInterface):
+    r"""
+    """
+    @property
+    def shape_model(self):
+        return self.transform.model
+
+    def algorithm_result(self, image, shape_parameters, cost_functions=None,
+                         appearance_parameters=None, gt_shape=None):
+        return LinearAAMAlgorithmResult(
+            image, self, shape_parameters,
+            cost_functions=cost_functions,
+            appearance_parameters=appearance_parameters, gt_shape=gt_shape)
+
+
+# TODO document me!
+class LucasKanadePatchBaseInterface(LucasKanadeBaseInterface):
+    r"""
+    """
+    def __init__(self, transform, template, sampling=None,
+                 patch_size=(17, 17), patch_normalisation=no_op):
+        self.patch_size = patch_size
+        self.patch_normalisation = patch_normalisation
+
+        super(LucasKanadePatchBaseInterface, self).__init__(
+            transform, template, sampling=sampling)
+
+    def _build_sampling_mask(self, sampling):
+        if sampling is None:
+            sampling = np.ones(self.patch_size, dtype=np.bool)
+
+        image_shape = self.template.pixels.shape
+        image_mask = np.tile(sampling[None, None, None, ...],
+                             image_shape[:3] + (1, 1))
+        self.i_mask = np.nonzero(image_mask.flatten())[0]
+        self.gradient_mask = np.nonzero(np.tile(
+            image_mask[None, ...], (2, 1, 1, 1, 1, 1)))
+        self.gradient2_mask = np.nonzero(np.tile(
+            image_mask[None, None, ...], (2, 2, 1, 1, 1, 1, 1)))
+
+    @property
+    def shape_model(self):
+        return self.transform.model
+
+    def warp_jacobian(self):
+        return np.rollaxis(self.transform.d_dp(None), -1)
+
+    def warp(self, image):
+        parts = image.extract_patches(self.transform.target,
+                                      patch_size=self.patch_size,
+                                      as_single_array=True)
+        parts = self.patch_normalisation(parts)
+        return Image(parts, copy=False)
+
+    def gradient(self, image):
+        pixels = image.pixels
+        nabla = fast_gradient(pixels.reshape((-1,) + self.patch_size))
+        # remove 1st dimension gradient which corresponds to the gradient
+        # between parts
+        return nabla.reshape((2,) + pixels.shape)
+
+    def steepest_descent_images(self, nabla, dw_dp):
+        # reshape nabla
+        # nabla: dims x parts x off x ch x (h x w)
+        nabla = nabla[self.gradient_mask].reshape(
+            nabla.shape[:-2] + (-1,))
+        # compute steepest descent images
+        # nabla: dims x parts x off x ch x (h x w)
+        # ds_dp:    dims x parts x                             x params
+        # sdi:             parts x off x ch x (h x w) x params
+        sdi = 0
+        a = nabla[..., None] * dw_dp[..., None, None, None, :]
+        for d in a:
+            sdi += d
+
+        # reshape steepest descent images
+        # sdi: (parts x offsets x ch x w x h) x params
+        return sdi.reshape((-1, sdi.shape[-1]))
+
+
+# TODO document me!
+class LucasKanadePatchInterface(LucasKanadePatchBaseInterface):
+    r"""
+    """
+    def __init__(self, appearance_model, transform, template, sampling=None,
+                 patch_size=(17, 17), patch_normalisation=no_op):
+        self.appearance_model = appearance_model
+
+        super(LucasKanadePatchInterface, self).__init__(
+            transform, template, patch_size=patch_size,
+            patch_normalisation=patch_normalisation, sampling=sampling)
+
+    @property
+    def m(self):
+        return self.appearance_model.n_active_components
+
+    def solve_all_map(self, H, J, e, Ja_prior, c, Js_prior, p):
+        return _solve_all_map(H, J, e, Ja_prior, c, Js_prior, p,
+                              self.m, self.n)
+
+    def solve_all_ml(self, H, J, e):
+        return _solve_all_ml(H, J, e, self.m)
+
+    def algorithm_result(self, image, shape_parameters, cost_functions=None,
+                         appearance_parameters=None, gt_shape=None):
+        return AAMAlgorithmResult(
+            image, self, shape_parameters,
+            cost_functions=cost_functions,
+            appearance_parameters=appearance_parameters, gt_shape=gt_shape)
+
+
+# TODO document me!
+class LucasKanade(object):
+    r"""
+    """
+    def __init__(self, aam_interface, eps=10**-5):
+        self.eps = eps
+        self.interface = aam_interface
+        self._precompute()
+
+    @property
+    def appearance_model(self):
+        return self.interface.appearance_model
+
+    @property
+    def transform(self):
+        return self.interface.transform
+
+    @property
+    def template(self):
+        return self.interface.template
+
+    def _precompute(self):
+        # grab number of shape and appearance parameters
+        self.n = self.transform.n_parameters
+        self.m = self.appearance_model.n_active_components
+
+        # grab appearance model components
+        self.A = self.appearance_model.components
+        # mask them
+        self.A_m = self.A.T[self.interface.i_mask, :]
+        # compute their pseudoinverse
+        self.pinv_A_m = np.linalg.pinv(self.A_m)
+
+        # grab appearance model mean
+        self.a_bar = self.appearance_model.mean()
+        # vectorize it and mask it
+        self.a_bar_m = self.a_bar.as_vector()[self.interface.i_mask]
+
+        # compute warp jacobian
+        self.dW_dp = self.interface.warp_jacobian()
+
+        # compute shape model prior
+        # TODO: Is this correct? It's like modelling no noise at all
+        sm_noise_variance = self.interface.shape_model.noise_variance() or 1
+        s2 = self.appearance_model.noise_variance() / sm_noise_variance
+        L = self.interface.shape_model.eigenvalues
+        self.s2_inv_L = np.hstack((np.ones((4,)), s2 / L))
+        # compute appearance model prior
+        S = self.appearance_model.eigenvalues
+        self.s2_inv_S = s2 / S
+
+
+# TODO: Document me!
+class ProjectOut(LucasKanade):
+    r"""
+    Abstract Interface for Project-out AAM algorithms
+    """
+    def project_out(self, J):
+        # project-out appearance bases from a particular vector or matrix
+        return J - self.A_m.dot(self.pinv_A_m.dot(J))
+
+    def run(self, image, initial_shape, gt_shape=None, max_iters=20,
+            map_inference=False):
+        # define cost closure
+        def cost_closure(x, f):
+            return lambda: x.T.dot(f(x))
+
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Compositional Gauss-Newton loop -------------------------------------
+
+        # warp image
+        self.i = self.interface.warp(image)
+        # vectorize it and mask it
+        i_m = self.i.as_vector()[self.interface.i_mask]
+
+        # compute masked error
+        self.e_m = i_m - self.a_bar_m
+
+        # update cost_functions
+        cost_functions = [cost_closure(self.e_m, self.project_out)]
+
+        while k < max_iters and eps > self.eps:
+            # solve for increments on the shape parameters
+            self.dp = self._solve(map_inference)
+
+            # update warp
+            s_k = self.transform.target.points
+            self._update_warp()
+            p_list.append(self.transform.as_vector())
+
+            # warp image
+            self.i = self.interface.warp(image)
+            # vectorize it and mask it
+            i_m = self.i.as_vector()[self.interface.i_mask]
+
+            # compute masked error
+            self.e_m = i_m - self.a_bar_m
+
+            # update cost
+            cost_functions.append(cost_closure(self.e_m, self.project_out))
+
+            # test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # increase iteration counter
+            k += 1
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, cost_functions=cost_functions, gt_shape=gt_shape)
+
+
+# TODO: Document me!
+class ProjectOutForwardCompositional(ProjectOut):
+    r"""
+    Project-out Forward Compositional (PFC) Gauss-Newton algorithm
+    """
+    def _solve(self, map_inference):
+        # compute warped image gradient
+        nabla_i = self.interface.gradient(self.i)
+        # compute masked forward Jacobian
+        J_m = self.interface.steepest_descent_images(nabla_i, self.dW_dp)
+        # project out appearance model from it
+        QJ_m = self.project_out(J_m)
+        # compute masked forward Hessian
+        JQJ_m = QJ_m.T.dot(J_m)
+        # solve for increments on the shape parameters
+        if map_inference:
+            return self.interface.solve_shape_map(
+                JQJ_m, QJ_m, self.e_m,  self.s2_inv_L,
+                self.transform.as_vector())
+        else:
+            return self.interface.solve_shape_ml(JQJ_m, QJ_m, self.e_m)
+
+    def _update_warp(self):
+        # update warp based on forward composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() + self.dp)
+
+
+# TODO: Document me!
+class ProjectOutInverseCompositional(ProjectOut):
+    r"""
+    Project-out Inverse Compositional (PIC) Gauss-Newton algorithm
+    """
+    def _precompute(self):
+        # call super method
+        super(ProjectOutInverseCompositional, self)._precompute()
+        # compute appearance model mean gradient
+        nabla_a = self.interface.gradient(self.a_bar)
+        # compute masked inverse Jacobian
+        J_m = self.interface.steepest_descent_images(-nabla_a, self.dW_dp)
+        # project out appearance model from it
+        self.QJ_m = self.project_out(J_m)
+        # compute masked inverse Hessian
+        self.JQJ_m = self.QJ_m.T.dot(J_m)
+        # compute masked Jacobian pseudo-inverse
+        self.pinv_QJ_m = np.linalg.solve(self.JQJ_m, self.QJ_m.T)
+
+    def _solve(self, map_inference):
+        # solve for increments on the shape parameters
+        if map_inference:
+            return self.interface.solve_shape_map(
+                self.JQJ_m, self.QJ_m, self.e_m, self.s2_inv_L,
+                self.transform.as_vector())
+        else:
+            return -self.pinv_QJ_m.dot(self.e_m)
+
+    def _update_warp(self):
+        # update warp based on inverse composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() - self.dp)
+
+
+# TODO: Document me!
+class Simultaneous(LucasKanade):
+    r"""
+    Abstract Interface for Simultaneous AAM algorithms
+    """
+    def run(self, image, initial_shape, gt_shape=None, max_iters=20,
+            map_inference=False):
+        # define cost closure
+        def cost_closure(x):
+            return lambda: x.T.dot(x)
+
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Compositional Gauss-Newton loop -------------------------------------
+
+        # warp image
+        self.i = self.interface.warp(image)
+        # mask warped image
+        i_m = self.i.as_vector()[self.interface.i_mask]
+
+        # initialize appearance parameters by projecting masked image
+        # onto masked appearance model
+        self.c = self.pinv_A_m.dot(i_m - self.a_bar_m)
+        self.a = self.appearance_model.instance(self.c)
+        a_m = self.a.as_vector()[self.interface.i_mask]
+        c_list = [self.c]
+
+        # compute masked error
+        self.e_m = i_m - a_m
+
+        # update cost
+        cost_functions = [cost_closure(self.e_m)]
+
+        while k < max_iters and eps > self.eps:
+            # solve for increments on the appearance and shape parameters
+            # simultaneously
+            dc, self.dp = self._solve(map_inference)
+
+            # update appearance parameters
+            self.c = self.c + dc
+            self.a = self.appearance_model.instance(self.c)
+            a_m = self.a.as_vector()[self.interface.i_mask]
+            c_list.append(self.c)
+
+            # update warp
+            s_k = self.transform.target.points
+            self._update_warp()
+            p_list.append(self.transform.as_vector())
+
+            # warp image
+            self.i = self.interface.warp(image)
+            # mask warped image
+            i_m = self.i.as_vector()[self.interface.i_mask]
+
+            # compute masked error
+            self.e_m = i_m - a_m
+
+            # update cost
+            cost_functions.append(cost_closure(self.e_m))
+
+            # test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # increase iteration counter
+            k += 1
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, cost_functions=cost_functions,
+            appearance_parameters=c_list, gt_shape=gt_shape)
+
+    def _solve(self, map_inference):
+        # compute masked Jacobian
+        J_m = self._compute_jacobian()
+        # assemble masked simultaneous Jacobian
+        J_sim_m = np.hstack((-self.A_m, J_m))
+        # compute masked Hessian
+        H_sim_m = J_sim_m.T.dot(J_sim_m)
+        # solve for increments on the appearance and shape parameters
+        # simultaneously
+        if map_inference:
+            return self.interface.solve_all_map(
+                H_sim_m, J_sim_m, self.e_m, self.s2_inv_S, self.c,
+                self.s2_inv_L, self.transform.as_vector())
+        else:
+            return self.interface.solve_all_ml(H_sim_m, J_sim_m, self.e_m)
+
+
+# TODO: Document me!
+class SimultaneousForwardCompositional(Simultaneous):
+    r"""
+    Simultaneous Forward Compositional (SFC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped image gradient
+        nabla_i = self.interface.gradient(self.i)
+        # return forward Jacobian
+        return self.interface.steepest_descent_images(nabla_i, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on forward composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() + self.dp)
+
+
+# TODO: Document me!
+class SimultaneousInverseCompositional(Simultaneous):
+    r"""
+    Simultaneous Inverse Compositional (SIC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped appearance model gradient
+        nabla_a = self.interface.gradient(self.a)
+        # return inverse Jacobian
+        return self.interface.steepest_descent_images(-nabla_a, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on inverse composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() - self.dp)
+
+
+# TODO: Document me!
+class Alternating(LucasKanade):
+    r"""
+    Abstract Interface for Alternating AAM algorithms
+    """
+    def _precompute(self, **kwargs):
+        # call super method
+        super(Alternating, self)._precompute()
+        # compute MAP appearance Hessian
+        self.AA_m_map = self.A_m.T.dot(self.A_m) + np.diag(self.s2_inv_S)
+
+    def run(self, image, initial_shape, gt_shape=None, max_iters=20,
+            map_inference=False):
+        # define cost closure
+        def cost_closure(x):
+            return lambda: x.T.dot(x)
+
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Compositional Gauss-Newton loop -------------------------------------
+
+        # warp image
+        self.i = self.interface.warp(image)
+        # mask warped image
+        i_m = self.i.as_vector()[self.interface.i_mask]
+
+        # initialize appearance parameters by projecting masked image
+        # onto masked appearance model
+        c = self.pinv_A_m.dot(i_m - self.a_bar_m)
+        self.a = self.appearance_model.instance(c)
+        a_m = self.a.as_vector()[self.interface.i_mask]
+        c_list = [c]
+        Jdp = 0
+
+        # compute masked error
+        e_m = i_m - a_m
+
+        # update cost
+        cost_functions = [cost_closure(e_m)]
+
+        while k < max_iters and eps > self.eps:
+            # solve for increment on the appearance parameters
+            if map_inference:
+                Ae_m_map = - self.s2_inv_S * c + self.A_m.dot(e_m + Jdp)
+                dc = np.linalg.solve(self.AA_m_map, Ae_m_map)
+            else:
+                dc = self.pinv_A_m.dot(e_m + Jdp)
+
+            # compute masked Jacobian
+            J_m = self._compute_jacobian()
+            # compute masked Hessian
+            H_m = J_m.T.dot(J_m)
+            # solve for increments on the shape parameters
+            if map_inference:
+                self.dp = self.interface.solve_shape_map(
+                    H_m, J_m, e_m - self.A_m.T.dot(dc), self.s2_inv_L,
+                    self.transform.as_vector())
+            else:
+                self.dp = self.interface.solve_shape_ml(H_m, J_m,
+                                                        e_m - self.A_m.dot(dc))
+
+            # update appearance parameters
+            c = c + dc
+            self.a = self.appearance_model.instance(c)
+            a_m = self.a.as_vector()[self.interface.i_mask]
+            c_list.append(c)
+
+            # update warp
+            s_k = self.transform.target.points
+            self._update_warp()
+            p_list.append(self.transform.as_vector())
+
+            # warp image
+            self.i = self.interface.warp(image)
+            # mask warped image
+            i_m = self.i.as_vector()[self.interface.i_mask]
+
+            # compute Jdp
+            Jdp = J_m.dot(self.dp)
+
+            # compute masked error
+            e_m = i_m - a_m
+
+            # update cost
+            cost_functions.append(cost_closure(e_m))
+
+            # test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # increase iteration counter
+            k += 1
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, cost_functions=cost_functions,
+            appearance_parameters=c_list, gt_shape=gt_shape)
+
+
+# TODO: Document me!
+class AlternatingForwardCompositional(Alternating):
+    r"""
+    Alternating Forward Compositional (AFC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped image gradient
+        nabla_i = self.interface.gradient(self.i)
+        # return forward Jacobian
+        return self.interface.steepest_descent_images(nabla_i, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on forward composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() + self.dp)
+
+
+# TODO: Document me!
+class AlternatingInverseCompositional(Alternating):
+    r"""
+    Alternating Inverse Compositional (AIC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped appearance model gradient
+        nabla_a = self.interface.gradient(self.a)
+        # return inverse Jacobian
+        return self.interface.steepest_descent_images(-nabla_a, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on inverse composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() - self.dp)
+
+
+# TODO: Document me!
+class ModifiedAlternating(Alternating):
+    r"""
+    Abstract Interface for Modified Alternating AAM algorithms
+    """
+    def run(self, image, initial_shape, gt_shape=None, max_iters=20,
+            map_inference=False):
+        # define cost closure
+        def cost_closure(x):
+            return lambda: x.T.dot(x)
+
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # initialize iteration counter and epsilon
+        a_m = self.a_bar_m
+        c_list = []
+        k = 0
+        eps = np.Inf
+
+        # Compositional Gauss-Newton loop -------------------------------------
+
+        # warp image
+        self.i = self.interface.warp(image)
+        # mask warped image
+        i_m = self.i.as_vector()[self.interface.i_mask]
+
+        # initialize appearance parameters by projecting masked image
+        # onto masked appearance model
+        c = self.pinv_A_m.dot(i_m - a_m)
+        self.a = self.appearance_model.instance(c)
+        a_m = self.a.as_vector()[self.interface.i_mask]
+        c_list.append(c)
+
+        # compute masked error
+        e_m = i_m - a_m
+
+        # update cost
+        cost_functions = [cost_closure(e_m)]
+
+        while k < max_iters and eps > self.eps:
+            # compute masked Jacobian
+            J_m = self._compute_jacobian()
+            # compute masked Hessian
+            H_m = J_m.T.dot(J_m)
+            # solve for increments on the shape parameters
+            if map_inference:
+                self.dp = self.interface.solve_shape_map(
+                    H_m, J_m, e_m, self.s2_inv_L, self.transform.as_vector())
+            else:
+                self.dp = self.interface.solve_shape_ml(H_m, J_m, e_m)
+
+            # update warp
+            s_k = self.transform.target.points
+            self._update_warp()
+            p_list.append(self.transform.as_vector())
+
+            # warp image
+            self.i = self.interface.warp(image)
+            # mask warped image
+            i_m = self.i.as_vector()[self.interface.i_mask]
+
+            # update appearance parameters
+            c = self.pinv_A_m.dot(i_m - self.a_bar_m)
+            self.a = self.appearance_model.instance(c)
+            a_m = self.a.as_vector()[self.interface.i_mask]
+            c_list.append(c)
+
+            # compute masked error
+            e_m = i_m - a_m
+
+            # update cost
+            cost_functions.append(cost_closure(e_m))
+
+            # test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # increase iteration counter
+            k += 1
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, cost_functions=cost_functions,
+            appearance_parameters=c_list, gt_shape=gt_shape)
+
+
+# TODO: Document me!
+class ModifiedAlternatingForwardCompositional(ModifiedAlternating):
+    r"""
+    Modified Alternating Forward Compositional (MAFC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped image gradient
+        nabla_i = self.interface.gradient(self.i)
+        # return forward Jacobian
+        return self.interface.steepest_descent_images(nabla_i, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on forward composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() + self.dp)
+
+
+# TODO: Document me!
+class ModifiedAlternatingInverseCompositional(ModifiedAlternating):
+    r"""
+    Modified Alternating Inverse Compositional (MAIC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped appearance model gradient
+        nabla_a = self.interface.gradient(self.a)
+        # return inverse Jacobian
+        return self.interface.steepest_descent_images(-nabla_a, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on inverse composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() - self.dp)
+
+
+# TODO: Document me!
+class Wiberg(LucasKanade):
+    r"""
+    Abstract Interface for Wiberg AAM algorithms
+    """
+    def project_out(self, J):
+        # project-out appearance bases from a particular vector or matrix
+        return J - self.A_m.dot(self.pinv_A_m.dot(J))
+
+    def run(self, image, initial_shape, gt_shape=None, max_iters=20,
+            map_inference=False):
+        # define cost closure
+        def cost_closure(x, f):
+            return lambda: x.T.dot(f(x))
+
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Compositional Gauss-Newton loop -------------------------------------
+
+        # warp image
+        self.i = self.interface.warp(image)
+        # mask warped image
+        i_m = self.i.as_vector()[self.interface.i_mask]
+
+        # initialize appearance parameters by projecting masked image
+        # onto masked appearance model
+        c = self.pinv_A_m.dot(i_m - self.a_bar_m)
+        self.a = self.appearance_model.instance(c)
+        a_m = self.a.as_vector()[self.interface.i_mask]
+        c_list = [c]
+
+        # compute masked error
+        e_m = i_m - self.a_bar_m
+
+        # update cost
+        cost_functions = [cost_closure(e_m, self.project_out)]
+
+        while k < max_iters and eps > self.eps:
+            # compute masked Jacobian
+            J_m = self._compute_jacobian()
+            # project out appearance models
+            QJ_m = self.project_out(J_m)
+            # compute masked Hessian
+            JQJ_m = QJ_m.T.dot(J_m)
+            # solve for increments on the shape parameters
+            if map_inference:
+                self.dp = self.interface.solve_shape_map(
+                    JQJ_m, QJ_m, e_m, self.s2_inv_L,
+                    self.transform.as_vector())
+            else:
+                self.dp = self.interface.solve_shape_ml(JQJ_m, QJ_m, e_m)
+
+            # update warp
+            s_k = self.transform.target.points
+            self._update_warp()
+            p_list.append(self.transform.as_vector())
+
+            # warp image
+            self.i = self.interface.warp(image)
+            # mask warped image
+            i_m = self.i.as_vector()[self.interface.i_mask]
+
+            # update appearance parameters
+            dc = self.pinv_A_m.dot(i_m - a_m + J_m.dot(self.dp))
+            c = c + dc
+            self.a = self.appearance_model.instance(c)
+            a_m = self.a.as_vector()[self.interface.i_mask]
+            c_list.append(c)
+
+            # compute masked error
+            e_m = i_m - self.a_bar_m
+
+            # update cost
+            cost_functions.append(cost_closure(e_m, self.project_out))
+
+            # test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # increase iteration counter
+            k += 1
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, cost_functions=cost_functions,
+            appearance_parameters=c_list, gt_shape=gt_shape)
+
+
+# TODO: Document me!
+class WibergForwardCompositional(Wiberg):
+    r"""
+    Wiberg Forward Compositional (WFC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped image gradient
+        nabla_i = self.interface.gradient(self.i)
+        # return forward Jacobian
+        return self.interface.steepest_descent_images(nabla_i, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on forward composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() + self.dp)
+
+
+# TODO: Document me!
+class WibergInverseCompositional(Wiberg):
+    r"""
+    Wiberg Inverse Compositional (WIC) Gauss-Newton algorithm
+    """
+    def _compute_jacobian(self):
+        # compute warped appearance model gradient
+        nabla_a = self.interface.gradient(self.a)
+        # return inverse Jacobian
+        return self.interface.steepest_descent_images(-nabla_a, self.dW_dp)
+
+    def _update_warp(self):
+        # update warp based on inverse composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() - self.dp)
diff --git a/menpofit/aam/algorithm/sd.py b/menpofit/aam/algorithm/sd.py
new file mode 100644
index 0000000..762386e
--- /dev/null
+++ b/menpofit/aam/algorithm/sd.py
@@ -0,0 +1,433 @@
+from __future__ import division
+from functools import partial
+import numpy as np
+from menpo.image import Image
+from menpo.feature import no_op
+from menpo.visualize import print_dynamic
+from menpofit.math import IRLRegression, IIRLRegression
+from menpofit.result import compute_normalise_point_to_point_error
+from menpofit.sdm.algorithm import SupervisedDescentAlgorithm
+from menpofit.visualize import print_progress
+from ..result import AAMAlgorithmResult, LinearAAMAlgorithmResult
+
+
+# TODO document me!
+class SupervisedDescentStandardInterface(object):
+    r"""
+    """
+    def __init__(self, appearance_model, transform, template, sampling=None):
+        self.appearance_model = appearance_model
+        self.transform = transform
+        self.template = template
+
+        self._build_sampling_mask(sampling)
+
+    def _build_sampling_mask(self, sampling):
+        n_true_pixels = self.template.n_true_pixels()
+        n_channels = self.template.n_channels
+        sampling_mask = np.zeros(n_true_pixels, dtype=np.bool)
+
+        if sampling is None:
+            sampling = 1
+        sampling_pattern = xrange(0, n_true_pixels, sampling)
+        sampling_mask[sampling_pattern] = 1
+
+        self.i_mask = np.nonzero(np.tile(
+            sampling_mask[None, ...], (n_channels, 1)).flatten())[0]
+
+    @property
+    def shape_model(self):
+        return self.transform.pdm.model
+
+    @property
+    def n(self):
+        return self.transform.n_parameters
+
+    @property
+    def m(self):
+        return self.appearance_model.n_active_components
+
+    def warp(self, image):
+        return image.warp_to_mask(self.template.mask,
+                                  self.transform)
+
+    def algorithm_result(self, image, shape_parameters,
+                         appearance_parameters=None, gt_shape=None):
+        return AAMAlgorithmResult(
+            image, self, shape_parameters,
+            appearance_parameters=appearance_parameters, gt_shape=gt_shape)
+
+
+# TODO document me!
+class SupervisedDescentLinearInterface(SupervisedDescentStandardInterface):
+    r"""
+    """
+    @property
+    def shape_model(self):
+        return self.transform.model
+
+    def algorithm_result(self, image, shape_parameters,
+                         appearance_parameters=None, gt_shape=None):
+        return LinearAAMAlgorithmResult(
+            image, self, shape_parameters,
+            appearance_parameters=appearance_parameters, gt_shape=gt_shape)
+
+
+# TODO document me!
+class SupervisedDescentPatchInterface(SupervisedDescentStandardInterface):
+    r"""
+    """
+    def __init__(self, appearance_model, transform, template, sampling=None,
+                 patch_size=(17, 17), patch_normalisation=no_op):
+        self.patch_size = patch_size
+        self.patch_normalisation = patch_normalisation
+
+        super(SupervisedDescentPatchInterface, self).__init__(
+            appearance_model, transform, template, sampling=sampling)
+
+    def _build_sampling_mask(self, sampling):
+        if sampling is None:
+            sampling = np.ones(self.patch_size, dtype=np.bool)
+
+        image_shape = self.template.pixels.shape
+        image_mask = np.tile(sampling[None, None, None, ...],
+                             image_shape[:3] + (1, 1))
+        self.i_mask = np.nonzero(image_mask.flatten())[0]
+
+    @property
+    def shape_model(self):
+        return self.transform.model
+
+    def warp(self, image):
+        parts = image.extract_patches(self.transform.target,
+                                      patch_size=self.patch_size,
+                                      as_single_array=True)
+        parts = self.patch_normalisation(parts)
+        return Image(parts, copy=False)
+
+
+def _weights_for_target(transform, target):
+    transform.set_target(target)
+    return transform.as_vector()
+
+
+# TODO document me!
+def obtain_parametric_delta_x(gt_shapes, current_shapes, transform):
+    # initialize current and delta parameters arrays
+    n_samples = len(gt_shapes) * len(current_shapes[0])
+    gt_params = np.empty((n_samples, transform.n_parameters))
+    delta_params = np.empty_like(gt_params)
+
+    k = 0
+    for gt_s, c_s in zip(gt_shapes, current_shapes):
+        # Compute and cache ground truth parameters
+        c_gt_params = _weights_for_target(transform, gt_s)
+        for s in c_s:
+            gt_params[k] = c_gt_params
+
+            current_params = _weights_for_target(transform, s)
+            delta_params[k] = c_gt_params - current_params
+
+            k += 1
+
+    return delta_params, gt_params
+
+
+class ParametricSupervisedDescentAlgorithm(SupervisedDescentAlgorithm):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5):
+        super(ParametricSupervisedDescentAlgorithm, self).__init__()
+
+        self.interface = aam_interface
+        self.n_iterations = n_iterations
+        self.eps = eps
+
+        self._compute_error = compute_error
+        self._precompute()
+
+    @property
+    def appearance_model(self):
+        return self.interface.appearance_model
+
+    @property
+    def transform(self):
+        return self.interface.transform
+
+    def _precompute(self):
+        # Grab appearance model mean
+        a_bar = self.appearance_model.mean()
+        # Vectorise it and mask it
+        self.a_bar_m = a_bar.as_vector()[self.interface.i_mask]
+
+    def _train(self, images, gt_shapes, current_shapes, increment=False,
+               prefix='', verbose=False):
+
+        if not increment:
+            # Reset the regressors
+            self.regressors = []
+
+        n_perturbations = len(current_shapes[0])
+        template_shape = gt_shapes[0]
+
+        # obtain delta_x and gt_x (parameters rather than shapes)
+        delta_x, gt_x = obtain_parametric_delta_x(gt_shapes, current_shapes,
+                                                  self.transform)
+
+        # Cascaded Regression loop
+        for k in range(self.n_iterations):
+            # generate regression data
+            features = self._generate_features(
+                images, current_shapes,
+                prefix='{}(Iteration {}) - '.format(prefix, k),
+                verbose=verbose)
+
+            if verbose:
+                print_dynamic('{}(Iteration {}) - Performing regression'.format(
+                    prefix, k))
+
+            if not increment:
+                r = self._regressor_cls()
+                r.train(features, delta_x)
+                self.regressors.append(r)
+            else:
+                self.regressors[k].increment(features, delta_x)
+
+            # Estimate delta_points
+            estimated_delta_x = self.regressors[k].predict(features)
+            if verbose:
+                self._print_regression_info(template_shape, gt_shapes,
+                                            n_perturbations, delta_x,
+                                            estimated_delta_x, k,
+                                            prefix=prefix)
+
+            j = 0
+            for shapes in current_shapes:
+                for s in shapes:
+                    # Estimate parameters
+                    edx = estimated_delta_x[j]
+                    # Current parameters
+                    cx = _weights_for_target(self.transform, s) + edx
+
+                    # Uses less memory to find updated target shape
+                    self.transform.from_vector_inplace(cx)
+                    # Update current shape inplace
+                    s.from_vector_inplace(self.transform.target.as_vector())
+
+                    delta_x[j] = gt_x[j] - cx
+                    j += 1
+
+        return current_shapes
+
+    def _generate_features(self, images, current_shapes, prefix='',
+                           verbose=False):
+        # Initialize features array - since current_shapes is a list of lists
+        # we need to know the total size
+        n_samples = len(images) * len(current_shapes[0])
+        features = np.empty((n_samples,) + self.a_bar_m.shape)
+
+        wrap = partial(print_progress,
+                       prefix='{}Computing features'.format(prefix),
+                       end_with_newline=not prefix, verbose=verbose)
+
+        # initialize sample counter
+        k = 0
+        for img, img_shapes in wrap(zip(images, current_shapes)):
+            for s in img_shapes:
+                self.transform.set_target(s)
+                # Assumes that the transform is correctly set
+                features[k] = self._compute_features(img)
+
+                k += 1
+
+        return features
+
+    def run(self, image, initial_shape, gt_shape=None, **kwargs):
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # Cascaded Regression loop
+        for r in self.regressors:
+            # Assumes that the transform is correctly set
+            features = self._compute_features(image)
+
+            # solve for increments on the shape parameters
+            dx = r.predict(features)
+
+            # We need to update the transform to set the state for the warping
+            # of the image above.
+            new_x = p_list[-1] + dx
+            self.transform.from_vector_inplace(new_x)
+            p_list.append(new_x)
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, gt_shape=gt_shape)
+
+    def _print_regression_info(self, template_shape, gt_shapes, n_perturbations,
+                               delta_x, estimated_delta_x, level_index,
+                               prefix=''):
+        print_dynamic('{}(Iteration {}) - Calculating errors'.format(
+            prefix, level_index))
+        errors = []
+        for j, (dx, edx) in enumerate(zip(delta_x, estimated_delta_x)):
+            self.transform.from_vector_inplace(dx)
+            s1 = self.transform.target
+            self.transform.from_vector_inplace(edx)
+            s2 = self.transform.target
+
+            gt_s = gt_shapes[np.floor_divide(j, n_perturbations)]
+            errors.append(self._compute_error(s1, s2, gt_s))
+        mean = np.mean(errors)
+        std = np.std(errors)
+        median = np.median(errors)
+        print_dynamic('{}(Iteration {}) - Training error -> '
+                      'mean: {:.4f}, std: {:.4f}, median: {:.4f}.\n'.
+                      format(prefix, level_index, mean, std, median))
+
+
+# TODO: document me!
+class MeanTemplate(ParametricSupervisedDescentAlgorithm):
+    r"""
+    """
+    def _compute_features(self, image):
+        i = self.interface.warp(image)
+        i_m = i.as_vector()[self.interface.i_mask]
+        return i_m - self.a_bar_m
+
+
+# TODO: document me!
+class MeanTemplateNewton(MeanTemplate):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, bias=True):
+        super(MeanTemplateNewton, self).__init__(
+            aam_interface, n_iterations=n_iterations,
+            compute_error=compute_error, eps=eps)
+
+        self._regressor_cls = partial(IRLRegression, alpha=alpha, bias=bias)
+
+
+# TODO: document me!
+class MeanTemplateGaussNewton(MeanTemplate):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, alpha2=0, bias=True):
+        super(MeanTemplateGaussNewton, self).__init__(
+            aam_interface, n_iterations=n_iterations,
+            compute_error=compute_error, eps=eps)
+
+        self._regressor_cls = partial(IIRLRegression, alpha=alpha,
+                                      alpha2=alpha2, bias=bias)
+
+
+# TODO: document me!
+class ProjectOut(ParametricSupervisedDescentAlgorithm):
+    r"""
+    """
+    def _precompute(self):
+        super(ProjectOut, self)._precompute()
+        A = self.appearance_model.components
+        self.A_m = A.T[self.interface.i_mask, :]
+
+        self.pinv_A_m = np.linalg.pinv(self.A_m)
+
+    def project_out(self, J):
+        # Project-out appearance bases from a particular vector or matrix
+        return J - self.A_m.dot(self.pinv_A_m.dot(J))
+
+    def _compute_features(self, image):
+        i = self.interface.warp(image)
+        i_m = i.as_vector()[self.interface.i_mask]
+        # TODO: This project out could actually be cached at test time -
+        # but we need to think about the best way to implement this and still
+        # allow incrementing
+        e_m = i_m - self.a_bar_m
+        return self.project_out(e_m)
+
+
+# TODO: document me!
+class ProjectOutNewton(ProjectOut):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, bias=True):
+        super(ProjectOutNewton, self).__init__(
+            aam_interface, n_iterations=n_iterations,
+            compute_error=compute_error, eps=eps)
+
+        self._regressor_cls = partial(IRLRegression, alpha=alpha, bias=bias)
+
+
+# TODO: document me!
+class ProjectOutGaussNewton(ProjectOut):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, alpha2=0, bias=True):
+        super(ProjectOutGaussNewton, self).__init__(
+            aam_interface, n_iterations=n_iterations,
+            compute_error=compute_error, eps=eps)
+
+        self._regressor_cls = partial(IIRLRegression, alpha=alpha,
+                                      alpha2=alpha2, bias=bias)
+
+# TODO: document me!
+class AppearanceWeights(ParametricSupervisedDescentAlgorithm):
+    r"""
+    """
+    def _precompute(self):
+        super(AppearanceWeights, self)._precompute()
+        A = self.appearance_model.components
+        A_m = A.T[self.interface.i_mask, :]
+
+        self.pinv_A_m = np.linalg.pinv(A_m)
+
+    def project(self, J):
+        # Project a particular vector or matrix onto the appearance bases
+        return self.pinv_A_m.dot(J - self.a_bar_m)
+
+    def _compute_features(self, image):
+        i = self.interface.warp(image)
+        i_m = i.as_vector()[self.interface.i_mask]
+        # Project image onto the appearance model
+        return self.project(i_m)
+
+
+# TODO: document me!
+class AppearanceWeightsNewton(AppearanceWeights):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, bias=True):
+        super(AppearanceWeightsNewton, self).__init__(
+            aam_interface, n_iterations=n_iterations,
+            compute_error=compute_error, eps=eps)
+
+        self._regressor_cls = partial(IRLRegression, alpha=alpha,
+                                      bias=bias)
+
+
+# TODO: document me!
+class AppearanceWeightsGaussNewton(AppearanceWeights):
+    r"""
+    """
+    def __init__(self, aam_interface, n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, alpha2=0, bias=True):
+        super(AppearanceWeightsGaussNewton, self).__init__(
+            aam_interface, n_iterations=n_iterations,
+            compute_error=compute_error, eps=eps)
+
+        self._regressor_cls = partial(IIRLRegression, alpha=alpha,
+                                      alpha2=alpha2, bias=bias)
diff --git a/menpofit/aam/base.py b/menpofit/aam/base.py
index 68ef24f..47cee1e 100644
--- a/menpofit/aam/base.py
+++ b/menpofit/aam/base.py
@@ -1,33 +1,32 @@
 from __future__ import division
-
+from copy import deepcopy
+import warnings
 import numpy as np
-from menpo.shape import TriMesh
-
-from menpofit.base import DeformableModel, name_of_callable
-from .builder import build_patch_reference_frame, build_reference_frame
-
-
-class AAM(DeformableModel):
+from menpo.feature import no_op
+from menpo.visualize import print_dynamic
+from menpo.model import PCAModel
+from menpo.transform import Scale
+from menpo.shape import mean_pointcloud
+from menpofit import checks
+from menpofit.transform import (DifferentiableThinPlateSplines,
+                                DifferentiablePiecewiseAffine)
+from menpofit.base import name_of_callable, batch
+from menpofit.builder import (
+    build_reference_frame, build_patch_reference_frame,
+    compute_features, scale_images, build_shape_model, warp_images,
+    align_shapes, rescale_images_to_reference_shape, densify_shapes,
+    extract_patches, MenpoFitBuilderWarning, compute_reference_shape)
+
+
+# TODO: document me!
+class AAM(object):
     r"""
     Active Appearance Model class.
 
     Parameters
-    -----------
-    shape_models : :map:`PCAModel` list
-        A list containing the shape models of the AAM.
-
-    appearance_models : :map:`PCAModel` list
-        A list containing the appearance models of the AAM.
-
-    n_training_images : `int`
-        The number of training images used to build the AAM.
-
-    transform : :map:`PureAlignmentTransform`
-        The transform used to warp the images from which the AAM was
-        constructed.
-
+    ----------
     features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
+        If list of length ``n_scales``, feature extraction is performed at
         each level after downscaling of the image.
         The first element of the list specifies the features to be extracted at
         the lowest pyramidal level and so on.
@@ -39,49 +38,333 @@ class AAM(DeformableModel):
         Note that from our experience, this approach of extracting features
         once and then creating a pyramid on top tends to lead to better
         performing AAMs.
+    transform : :map:`PureAlignmentTransform`, optional
+        The :map:`PureAlignmentTransform` that will be
+        used to warp the images.
+    trilist : ``(t, 3)`` `ndarray`, optional
+        Triangle list that will be used to build the reference frame. If
+        ``None``, defaults to performing Delaunay triangulation on the points.
+    diagonal : `int` >= ``20``, optional
+        During building an AAM, all images are rescaled to ensure that the
+        scale of their landmarks matches the scale of the mean shape.
+
+        If `int`, it ensures that the mean shape is scaled so that the diagonal
+        of the bounding box containing it matches the diagonal value.
+
+        If ``None``, the mean shape is not rescaled.
+
+        Note that, because the reference frame is computed from the mean
+        landmarks, this kwarg also specifies the diagonal length of the
+        reference frame (provided that features computation does not change
+        the image size).
+    scales : `int` or float` or list of those, optional
+    max_shape_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
+        If list of length ``n_scales``, then a number of shape components is
+        defined per level. The first element of the list specifies the number
+        of components of the lowest pyramidal level and so on.
+
+        If not a list or a list with length ``1``, then the specified number of
+        shape components will be used for all levels.
+
+        Per level:
+            If `int`, it specifies the exact number of components to be
+            retained.
+
+            If `float`, it specifies the percentage of variance to be retained.
+
+            If ``None``, all the available components are kept
+            (100% of variance).
+    max_appearance_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
+        If list of length ``n_scales``, then a number of appearance components
+        is defined per level. The first element of the list specifies the number
+        of components of the lowest pyramidal level and so on.
+
+        If not a list or a list with length ``1``, then the specified number of
+        appearance components will be used for all levels.
+
+        Per level:
+            If `int`, it specifies the exact number of components to be
+            retained.
+
+            If `float`, it specifies the percentage of variance to be retained.
+
+            If ``None``, all the available components are kept
+            (100% of variance).
+
+    Returns
+    -------
+    aam : :map:`AAMBuilder`
+        The AAM Builder object
+
+    Raises
+    -------
+    ValueError
+        ``diagonal`` must be >= ``20``.
+    ValueError
+        ``scales`` must be `int` or `float` or list of those.
+    ValueError
+        ``features`` must be a `function` or a list of those
+        containing ``1`` or ``len(scales)`` elements
+    ValueError
+        ``max_shape_components`` must be ``None`` or an `int` > 0 or
+        a ``0`` <= `float` <= ``1`` or a list of those containing 1 or
+        ``len(scales)`` elements
+    ValueError
+        ``max_appearance_components`` must be ``None`` or an `int` > ``0`` or a
+        ``0`` <= `float` <= ``1`` or a list of those containing 1 or
+        ``len(scales)`` elements
+    """
+    def __init__(self, images, group=None, verbose=False, reference_shape=None,
+                 holistic_features=no_op,
+                 transform=DifferentiablePiecewiseAffine, diagonal=None,
+                 scales=(0.5, 1.0), max_shape_components=None,
+                 max_appearance_components=None, batch_size=None):
+
+        checks.check_diagonal(diagonal)
+        scales = checks.check_scales(scales)
+        n_scales = len(scales)
+        holistic_features = checks.check_features(holistic_features, n_scales)
+        max_shape_components = checks.check_max_components(
+            max_shape_components, n_scales, 'max_shape_components')
+        max_appearance_components = checks.check_max_components(
+            max_appearance_components, n_scales, 'max_appearance_components')
+
+        self.holistic_features = holistic_features
+        self.transform = transform
+        self.diagonal = diagonal
+        self.scales = scales
+        self.max_shape_components = max_shape_components
+        self.max_appearance_components = max_appearance_components
+        self.reference_shape = reference_shape
+        self.shape_models = []
+        self.appearance_models = []
 
-    reference_shape : :map:`PointCloud`
-        The reference shape that was used to resize all training images to a
-        consistent object size.
+        # Train AAM
+        self._train(images, increment=False, group=group, verbose=verbose,
+                    batch_size=batch_size)
 
-    downscale : `float`
-        The downscale factor that was used to create the different pyramidal
-        levels.
+    def _train(self, images, increment=False, group=None,
+               shape_forgetting_factor=1.0, appearance_forgetting_factor=1.0,
+               verbose=False, batch_size=None):
+        r"""
+        """
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        if batch_size is not None:
+            # Create a generator of fixed sized batches. Will still work even
+            # on an infinite list.
+            image_batches = batch(images, batch_size)
+        else:
+            image_batches = [list(images)]
+
+        for k, image_batch in enumerate(image_batches):
+            if k == 0:
+                if self.reference_shape is None:
+                    # If no reference shape was given, use the mean of the first
+                    # batch
+                    if batch_size is not None:
+                        warnings.warn('No reference shape was provided. The '
+                                      'mean of the first batch will be the '
+                                      'reference shape. If the batch mean is '
+                                      'not representative of the true mean, '
+                                      'this may cause issues.',
+                                      MenpoFitBuilderWarning)
+                    checks.check_landmark_trilist(image_batch[0],
+                                                  self.transform, group=group)
+                    self.reference_shape = compute_reference_shape(
+                        [i.landmarks[group].lms for i in image_batch],
+                        self.diagonal, verbose=verbose)
+
+            # After the first batch, we are incrementing the model
+            if k > 0:
+                increment = True
+
+            if verbose:
+                print('Computing batch {}'.format(k))
+
+            # Train each batch
+            self._train_batch(
+                image_batch, increment=increment, group=group,
+                shape_forgetting_factor=shape_forgetting_factor,
+                appearance_forgetting_factor=appearance_forgetting_factor,
+                verbose=verbose)
+
+    def _train_batch(self, image_batch, increment=False, group=None,
+                     verbose=False, shape_forgetting_factor=1.0,
+                     appearance_forgetting_factor=1.0):
+        r"""
+        Builds an Active Appearance Model from a list of landmarked images.
+
+        Parameters
+        ----------
+        images : list of :map:`MaskedImage`
+            The set of landmarked images from which to build the AAM.
+        group : `string`, optional
+            The key of the landmark set that should be used. If ``None``,
+            and if there is only one set of landmarks, this set will be used.
+        verbose : `boolean`, optional
+            Flag that controls information and progress printing.
 
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames are the mean shapes of each pyramid
-        level, so the shape models are scaled.
+        Returns
+        -------
+        aam : :map:`AAM`
+            The AAM object. Shape and appearance models are stored from
+            lowest to highest scale
+        """
+        # Rescale to existing reference shape
+        image_batch = rescale_images_to_reference_shape(
+            image_batch, group, self.reference_shape,
+            verbose=verbose)
+
+        # build models at each scale
+        if verbose:
+            print_dynamic('- Building models\n')
+
+        feature_images = []
+        # for each scale (low --> high)
+        for j in range(self.n_scales):
+            if verbose:
+                if len(self.scales) > 1:
+                    scale_prefix = '  - Scale {}: '.format(j)
+                else:
+                    scale_prefix = '  - '
+            else:
+                scale_prefix = None
+
+            # Handle holistic features
+            if j == 0 and self.holistic_features[j] == no_op:
+                # Saves a lot of memory
+                feature_images = image_batch
+            elif j == 0 or self.holistic_features[j] is not self.holistic_features[j - 1]:
+                # Compute features only if this is the first pass through
+                # the loop or the features at this scale are different from
+                # the features at the previous scale
+                feature_images = compute_features(image_batch,
+                                                  self.holistic_features[j],
+                                                  prefix=scale_prefix,
+                                                  verbose=verbose)
+            # handle scales
+            if self.scales[j] != 1:
+                # Scale feature images only if scale is different than 1
+                scaled_images = scale_images(feature_images, self.scales[j],
+                                             prefix=scale_prefix,
+                                             verbose=verbose)
+            else:
+                scaled_images = feature_images
 
-        If ``False``, the reference frames of all levels are the mean shape of
-        the highest level, so the shape models are not scaled; they have the
-        same size.
+            # Extract potentially rescaled shapes
+            scale_shapes = [i.landmarks[group].lms for i in scaled_images]
 
-        Note that from our experience, if scaled_shape_models is ``False``, AAMs
-        tend to have slightly better performance.
+            # Build the shape model
+            if verbose:
+                print_dynamic('{}Building shape model'.format(scale_prefix))
 
-    """
-    def __init__(self, shape_models, appearance_models, n_training_images,
-                 transform, features, reference_shape, downscale,
-                 scaled_shape_models):
-        DeformableModel.__init__(self, features)
-        self.n_training_images = n_training_images
-        self.shape_models = shape_models
-        self.appearance_models = appearance_models
-        self.transform = transform
-        self.reference_shape = reference_shape
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
+            if not increment:
+                if j == 0:
+                    shape_model = self._build_shape_model(
+                        scale_shapes, j)
+                    self.shape_models.append(shape_model)
+                else:
+                    self.shape_models.append(deepcopy(shape_model))
+            else:
+                self._increment_shape_model(
+                    scale_shapes,  self.shape_models[j],
+                    forgetting_factor=shape_forgetting_factor)
+
+            # Obtain warped images - we use a scaled version of the
+            # reference shape, computed here. This is because the mean
+            # moves when we are incrementing, and we need a consistent
+            # reference frame.
+            scaled_reference_shape = Scale(self.scales[j], n_dims=2).apply(
+                self.reference_shape)
+            warped_images = self._warp_images(scaled_images, scale_shapes,
+                                              scaled_reference_shape,
+                                              j, scale_prefix, verbose)
+
+            # obtain appearance model
+            if verbose:
+                print_dynamic('{}Building appearance model'.format(
+                    scale_prefix))
+
+            if not increment:
+                appearance_model = PCAModel(warped_images)
+                # trim appearance model if required
+                if self.max_appearance_components is not None:
+                    appearance_model.trim_components(
+                        self.max_appearance_components[j])
+                # add appearance model to the list
+                self.appearance_models.append(appearance_model)
+            else:
+                # increment appearance model
+                self.appearance_models[j].increment(
+                    warped_images,
+                    forgetting_factor=appearance_forgetting_factor)
+                # trim appearance model if required
+                if self.max_appearance_components is not None:
+                    self.appearance_models[j].trim_components(
+                        self.max_appearance_components[j])
+
+            if verbose:
+                print_dynamic('{}Done\n'.format(scale_prefix))
+
+        # Because we just copy the shape model, we need to wait to trim
+        # it after building each model. This ensures we can have a different
+        # number of components per level
+        for j, sm in enumerate(self.shape_models):
+            max_sc = self.max_shape_components[j]
+            if max_sc is not None:
+                sm.trim_components(max_sc)
+
+    def increment(self, images, group=None, verbose=False,
+                  shape_forgetting_factor=1.0, appearance_forgetting_factor=1.0,
+                  batch_size=None):
+        # Literally just to fit under 80 characters, but maintain the sensible
+        # parameter name
+        aff = appearance_forgetting_factor
+        return self._train(images, increment=True, group=group,
+                           verbose=verbose,
+                           shape_forgetting_factor=shape_forgetting_factor,
+                           appearance_forgetting_factor=aff,
+                           batch_size=batch_size)
+
+    def _build_shape_model(self, shapes, scale_index):
+        return build_shape_model(shapes)
+
+    def _increment_shape_model(self, shapes, shape_model,
+                               forgetting_factor=1.0):
+        # Compute aligned shapes
+        aligned_shapes = align_shapes(shapes)
+        # Increment shape model
+        shape_model.increment(aligned_shapes,
+                              forgetting_factor=forgetting_factor)
+
+    def _warp_images(self, images, shapes, reference_shape, scale_index,
+                     prefix, verbose):
+        reference_frame = build_reference_frame(reference_shape)
+        return warp_images(images, shapes, reference_frame, self.transform,
+                           prefix=prefix, verbose=verbose)
 
     @property
-    def n_levels(self):
+    def n_scales(self):
         """
-        The number of multi-resolution pyramidal levels of the AAM.
+        The number of scales of the AAM.
 
         :type: `int`
         """
-        return len(self.appearance_models)
+        return len(self.scales)
+
+    @property
+    def _str_title(self):
+        r"""
+        Returns a string containing name of the model.
+        :type: `string`
+        """
+        return 'Holistic Active Appearance Model'
 
-    def instance(self, shape_weights=None, appearance_weights=None, level=-1):
+    def instance(self, shape_weights=None, appearance_weights=None,
+                 scale_index=-1):
         r"""
         Generates a novel AAM instance given a set of shape and appearance
         weights. If no weights are provided, the mean AAM instance is
@@ -93,22 +376,20 @@ def instance(self, shape_weights=None, appearance_weights=None, level=-1):
             Weights of the shape model that will be used to create
             a novel shape instance. If ``None``, the mean shape
             ``(shape_weights = [0, 0, ..., 0])`` is used.
-
         appearance_weights : ``(n_weights,)`` `ndarray` or `float` list
             Weights of the appearance model that will be used to create
             a novel appearance instance. If ``None``, the mean appearance
             ``(appearance_weights = [0, 0, ..., 0])`` is used.
-
-        level : `int`, optional
-            The pyramidal level to be used.
+        scale_index : `int`, optional
+            The scale to be used.
 
         Returns
         -------
         image : :map:`Image`
             The novel AAM instance.
         """
-        sm = self.shape_models[level]
-        am = self.appearance_models[level]
+        sm = self.shape_models[scale_index]
+        am = self.appearance_models[scale_index]
 
         # TODO: this bit of logic should to be transferred down to PCAModel
         if shape_weights is None:
@@ -122,24 +403,24 @@ def instance(self, shape_weights=None, appearance_weights=None, level=-1):
         appearance_weights *= am.eigenvalues[:n_appearance_weights] ** 0.5
         appearance_instance = am.instance(appearance_weights)
 
-        return self._instance(level, shape_instance, appearance_instance)
+        return self._instance(scale_index, shape_instance, appearance_instance)
 
-    def random_instance(self, level=-1):
+    def random_instance(self, scale_index=-1):
         r"""
         Generates a novel random instance of the AAM.
 
         Parameters
         -----------
-        level : `int`, optional
-            The pyramidal level to be used.
+        scale_index : `int`, optional
+            The scale to be used.
 
         Returns
         -------
         image : :map:`Image`
             The novel AAM instance.
         """
-        sm = self.shape_models[level]
-        am = self.appearance_models[level]
+        sm = self.shape_models[scale_index]
+        am = self.appearance_models[scale_index]
 
         # TODO: this bit of logic should to be transferred down to PCAModel
         shape_weights = (np.random.randn(sm.n_active_components) *
@@ -149,14 +430,13 @@ def random_instance(self, level=-1):
                               am.eigenvalues[:am.n_active_components]**0.5)
         appearance_instance = am.instance(appearance_weights)
 
-        return self._instance(level, shape_instance, appearance_instance)
+        return self._instance(scale_index, shape_instance, appearance_instance)
 
-    def _instance(self, level, shape_instance, appearance_instance):
-        template = self.appearance_models[level].mean()
+    def _instance(self, scale_index, shape_instance, appearance_instance):
+        template = self.appearance_models[scale_index].mean()
         landmarks = template.landmarks['source'].lms
 
-        reference_frame = self._build_reference_frame(
-            shape_instance, landmarks)
+        reference_frame = build_reference_frame(shape_instance)
 
         transform = self.transform(
             reference_frame.landmarks['source'].lms, landmarks)
@@ -164,26 +444,9 @@ def _instance(self, level, shape_instance, appearance_instance):
         return appearance_instance.as_unmasked(copy=False).warp_to_mask(
             reference_frame.mask, transform, warp_landmarks=True)
 
-    def _build_reference_frame(self, reference_shape, landmarks):
-        if type(landmarks) == TriMesh:
-            trilist = landmarks.trilist
-        else:
-            trilist = None
-        return build_reference_frame(
-            reference_shape, trilist=trilist)
-
-    @property
-    def _str_title(self):
-        r"""
-        Returns a string containing name of the model.
-
-        :type: `string`
-        """
-        return 'Active Appearance Model'
-
-    def view_shape_models_widget(self, n_parameters=5, mode='multiple',
+    def view_shape_models_widget(self, n_parameters=5,
                                  parameters_bounds=(-3.0, 3.0),
-                                 figure_size=(10, 8), style='coloured'):
+                                 mode='multiple', figure_size=(10, 8)):
         r"""
         Visualizes the shape models of the AAM object using the
         `menpo.visualize.widgets.visualize_shape_model` widget.
@@ -191,232 +454,207 @@ def view_shape_models_widget(self, n_parameters=5, mode='multiple',
         Parameters
         -----------
         n_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the parameters
-            sliders. If `int`, then the number of sliders per level is the
-            minimum between `n_parameters` and the number of active components
-            per level. If `list` of `int`, then a number of sliders is defined
-            per level. If ``None``, all the active components per level will
-            have a slider.
-        mode : {``'single'``, ``'multiple'``}, optional
-            If ``'single'``, then only a single slider is constructed along with
-            a drop down menu. If ``'multiple'``, then a slider is constructed
-            for each parameter.
+            The number of shape principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per level is the minimum
+            between `n_parameters` and the number of active components per
+            level.
+            If `list` of `int`, then a number of sliders is defined per level.
+            If ``None``, all the active components per level will have a slider.
         parameters_bounds : (`float`, `float`), optional
             The minimum and maximum bounds, in std units, for the sliders.
+        mode : {``single``, ``multiple``}, optional
+            If ``'single'``, only a single slider is constructed along with a
+            drop down menu.
+            If ``'multiple'``, a slider is constructed for each parameter.
         figure_size : (`int`, `int`), optional
             The size of the plotted figures.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
         """
         from menpofit.visualize import visualize_shape_model
-        visualize_shape_model(
-            self.shape_models, n_parameters=n_parameters,
-            parameters_bounds=parameters_bounds, figure_size=figure_size,
-            mode=mode, style=style)
+        visualize_shape_model(self.shape_models, n_parameters=n_parameters,
+                              parameters_bounds=parameters_bounds,
+                              figure_size=figure_size, mode=mode)
 
-    def view_appearance_models_widget(self, n_parameters=5, mode='multiple',
+    def view_appearance_models_widget(self, n_parameters=5,
                                       parameters_bounds=(-3.0, 3.0),
-                                      figure_size=(10, 8), style='coloured'):
+                                      mode='multiple', figure_size=(10, 8)):
         r"""
         Visualizes the appearance models of the AAM object using the
         `menpo.visualize.widgets.visualize_appearance_model` widget.
-
         Parameters
         -----------
         n_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the parameters
-            sliders. If `int`, then the number of sliders per level is the
-            minimum between `n_parameters` and the number of active components
-            per level. If `list` of `int`, then a number of sliders is defined
-            per level. If ``None``, all the active components per level will
-            have a slider.
-        mode : {``'single'``, ``'multiple'``}, optional
-            If ``'single'``, then only a single slider is constructed along with
-            a drop down menu. If ``'multiple'``, then a slider is constructed
-            for each parameter.
+            The number of appearance principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per scale is the minimum
+            between `n_parameters` and the number of active components per
+            scale.
+            If `list` of `int`, then a number of sliders is defined per scale.
+            If ``None``, all the active components per scale will have a slider.
         parameters_bounds : (`float`, `float`), optional
             The minimum and maximum bounds, in std units, for the sliders.
+        mode : {``single``, ``multiple``}, optional
+            If ``'single'``, only a single slider is constructed along with a
+            drop down menu.
+            If ``'multiple'``, a slider is constructed for each parameter.
         figure_size : (`int`, `int`), optional
             The size of the plotted figures.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
         """
         from menpofit.visualize import visualize_appearance_model
-        visualize_appearance_model(
-            self.appearance_models, n_parameters=n_parameters,
-            parameters_bounds=parameters_bounds, figure_size=figure_size,
-            mode=mode, style=style)
+        visualize_appearance_model(self.appearance_models,
+                                   n_parameters=n_parameters,
+                                   parameters_bounds=parameters_bounds,
+                                   figure_size=figure_size, mode=mode)
 
+    # TODO: fix me!
     def view_aam_widget(self, n_shape_parameters=5, n_appearance_parameters=5,
-                        mode='multiple', parameters_bounds=(-3.0, 3.0),
-                        figure_size=(10, 8), style='coloured'):
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
         r"""
         Visualizes both the shape and appearance models of the AAM object using
         the `menpo.visualize.widgets.visualize_aam` widget.
-
         Parameters
         -----------
-        n_shape_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the shape
-            parameters sliders. If `int`, then the number of sliders per level
-            is the minimum between `n_parameters` and the number of active
-            components per level. If `list` of `int`, then a number of sliders
-            is defined per level. If ``None``, all the active components per
-            level will have a slider.
-        n_appearance_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the appearance
-            parameters sliders. If `int`, then the number of sliders per level
-            is the minimum between `n_parameters` and the number of active
-            components per level. If `list` of `int`, then a number of sliders
-            is defined per level. If ``None``, all the active components per
-            level will have a slider.
-        mode : {``'single'``, ``'multiple'``}, optional
-            If ``'single'``, then only a single slider is constructed along with
-            a drop down menu. If ``'multiple'``, then a slider is constructed
-            for each parameter.
+        n_shape_parameters : `int` or `list` of `int` or None, optional
+            The number of shape principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per scale is the minimum
+            between `n_parameters` and the number of active components per
+            scale.
+            If `list` of `int`, then a number of sliders is defined per scale.
+            If ``None``, all the active components per scale will have a slider.
+        n_appearance_parameters : `int` or `list` of `int` or None, optional
+            The number of appearance principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per scale is the minimum
+            between `n_parameters` and the number of active components per
+            scale.
+            If `list` of `int`, then a number of sliders is defined per scale.
+            If ``None``, all the active components per scale will have a slider.
         parameters_bounds : (`float`, `float`), optional
             The minimum and maximum bounds, in std units, for the sliders.
+        mode : {``single``, ``multiple``}, optional
+            If ``'single'``, only a single slider is constructed along with a
+            drop down menu.
+            If ``'multiple'``, a slider is constructed for each parameter.
         figure_size : (`int`, `int`), optional
             The size of the plotted figures.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
         """
         from menpofit.visualize import visualize_aam
         visualize_aam(self, n_shape_parameters=n_shape_parameters,
                       n_appearance_parameters=n_appearance_parameters,
                       parameters_bounds=parameters_bounds,
-                      figure_size=figure_size, mode=mode, style=style)
+                      figure_size=figure_size, mode=mode)
 
     def __str__(self):
-        out = "{}\n - {} training images.\n".format(self._str_title,
-                                                    self.n_training_images)
-        # small strings about number of channels, channels string and downscale
-        n_channels = []
-        down_str = []
-        for j in range(self.n_levels):
-            n_channels.append(
-                self.appearance_models[j].template_instance.n_channels)
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(
-                name_of_callable(self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
-        else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                feat_str.append("- Feature is {} with ".format(
-                    name_of_callable(self.features[j])))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
-                else:
-                    ch_str.append("channels")
-        out = "{} - {} Warp.\n".format(out, name_of_callable(self.transform))
-        if self.n_levels > 1:
-            if self.scaled_shape_models:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}.\n   - Each level has a scaled shape " \
-                      "model (reference frame).\n".format(out, self.n_levels,
-                                                          self.downscale)
+        return _aam_str(self)
 
-            else:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}:\n   - Shape models (reference frames) " \
-                      "are not scaled.\n".format(out, self.n_levels,
-                                                 self.downscale)
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
-                if not self.scaled_shape_models:
-                    out = "{}   - Reference frames of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                        out,
-                        self.appearance_models[0].n_features,
-                        self.appearance_models[0].template_instance.n_true_pixels(),
-                        n_channels[0],
-                        self.appearance_models[0].template_instance._str_shape,
-                        n_channels[0])
-            else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-            for i in range(self.n_levels - 1, -1, -1):
-                out = "{}   - Level {} {}: \n".format(out, self.n_levels - i,
-                                                      down_str[i])
-                if not self.pyramid_on_features:
-                    out = "{}     {}{} {} per image.\n".format(
-                        out, feat_str[i], n_channels[i], ch_str[i])
-                if (self.scaled_shape_models or
-                        (not self.pyramid_on_features)):
-                    out = "{}     - Reference frame of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                        out, self.appearance_models[i].n_features,
-                        self.appearance_models[i].template_instance.n_true_pixels(),
-                        n_channels[i],
-                        self.appearance_models[i].template_instance._str_shape,
-                        n_channels[i])
-                out = "{0}     - {1} shape components ({2:.2f}% of " \
-                      "variance)\n     - {3} appearance components " \
-                      "({4:.2f}% of variance)\n".format(
-                    out, self.shape_models[i].n_components,
-                    self.shape_models[i].variance_ratio() * 100,
-                    self.appearance_models[i].n_components,
-                    self.appearance_models[i].variance_ratio() * 100)
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n" \
-                  "   - Reference frame of length {4} ({5} x {6}C, " \
-                  "{7} x {8}C)\n   - {9} shape components ({10:.2f}% of " \
-                  "variance)\n   - {11} appearance components ({12:.2f}% of " \
-                  "variance)\n".format(
-                out, feat_str[0], n_channels[0], ch_str[0],
-                self.appearance_models[0].n_features,
-                self.appearance_models[0].template_instance.n_true_pixels(),
-                n_channels[0],
-                self.appearance_models[0].template_instance._str_shape,
-                n_channels[0], self.shape_models[0].n_components,
-                self.shape_models[0].variance_ratio() * 100,
-                self.appearance_models[0].n_components,
-                self.appearance_models[0].variance_ratio() * 100)
-        return out
-
-
-class PatchBasedAAM(AAM):
+
+# TODO: document me!
+class MaskedAAM(AAM):
     r"""
-    Patch Based Active Appearance Model class.
+    Masked Active Appearance Model class.
 
     Parameters
     -----------
     shape_models : :map:`PCAModel` list
         A list containing the shape models of the AAM.
-
     appearance_models : :map:`PCAModel` list
         A list containing the appearance models of the AAM.
+    reference_shape : :map:`PointCloud`
+        The reference shape that was used to resize all training images to a
+        consistent object size.
+    patch_size : tuple of `int`
+        The shape of the patches used to build the Patch Based AAM.
+    features : `callable` or ``[callable]``
+        If list of length ``n_scales``, feature extraction is performed at
+        each scale after downscaling of the image.
+        The first element of the list specifies the features to be extracted at
+        the lowest scale and so on.
 
-    n_training_images : `int`
-        The number of training images used to build the AAM.
+        If ``callable`` the specified feature will be applied to the original
+        image and pyramid generation will be performed on top of the feature
+        image. Also see the `pyramid_on_features` property.
 
-    patch_shape : tuple of `int`
-        The shape of the patches used to build the Patch Based AAM.
+        Note that from our experience, this approach of extracting features
+        once and then creating a pyramid on top tends to lead to better
+        performing AAMs.
+
+    scales : `int` or float` or list of those
+    scale_shapes : `boolean`
+    """
+
+    def __init__(self, images, group=None, verbose=False,
+                 holistic_features=no_op, diagonal=None, scales=(0.5, 1.0),
+                 patch_size=(17, 17), max_shape_components=None,
+                 max_appearance_components=None, batch_size=None):
+        n_scales = len(checks.check_scales(scales))
+        self.patch_size = checks.check_patch_size(patch_size, n_scales)
+
+        super(MaskedAAM, self).__init__(
+            images, group=group, verbose=verbose,
+            holistic_features=holistic_features,
+            transform=DifferentiableThinPlateSplines, diagonal=diagonal,
+            scales=scales,  max_shape_components=max_shape_components,
+            max_appearance_components=max_appearance_components,
+            batch_size=batch_size)
+
+    def _warp_images(self, images, shapes, reference_shape, scale_index,
+                     prefix, verbose):
+        reference_frame = build_patch_reference_frame(
+            reference_shape, patch_size=self.patch_size[scale_index])
+        return warp_images(images, shapes, reference_frame, self.transform,
+                           prefix=prefix, verbose=verbose)
+
+    @property
+    def _str_title(self):
+        return 'Masked Active Appearance Model'
+
+    def _instance(self, scale_index, shape_instance, appearance_instance):
+        template = self.appearance_models[scale_index].mean
+        landmarks = template.landmarks['source'].lms
+
+        reference_frame = build_patch_reference_frame(
+            shape_instance, patch_size=self.patch_size)
+
+        transform = self.transform(
+            reference_frame.landmarks['source'].lms, landmarks)
+
+        return appearance_instance.as_unmasked().warp_to_mask(
+            reference_frame.mask, transform, warp_landmarks=True)
 
+    def view_appearance_models_widget(self, n_parameters=5,
+                                      parameters_bounds=(-3.0, 3.0),
+                                      mode='multiple', figure_size=(10, 8)):
+        from menpofit.visualize import visualize_appearance_model
+        visualize_appearance_model(self.appearance_models,
+                                   n_parameters=n_parameters,
+                                   parameters_bounds=parameters_bounds,
+                                   figure_size=figure_size, mode=mode)
+
+    def __str__(self):
+        return _aam_str(self)
+
+
+# TODO: document me!
+class LinearAAM(AAM):
+    r"""
+    Linear Active Appearance Model class.
+
+    Parameters
+    -----------
+    shape_models : :map:`PCAModel` list
+        A list containing the shape models of the AAM.
+    appearance_models : :map:`PCAModel` list
+        A list containing the appearance models of the AAM.
+    reference_shape : :map:`PointCloud`
+        The reference shape that was used to resize all training images to a
+        consistent object size.
     transform : :map:`PureAlignmentTransform`
         The transform used to warp the images from which the AAM was
         constructed.
-
     features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
+        If list of length ``n_scales``, feature extraction is performed at
         each level after downscaling of the image.
         The first element of the list specifies the features to be extracted at
         the lowest pyramidal level and so on.
@@ -429,51 +667,304 @@ class PatchBasedAAM(AAM):
         once and then creating a pyramid on top tends to lead to better
         performing AAMs.
 
+    scales : `int` or float` or list of those
+    """
+
+    def __init__(self, images, group=None, verbose=False,
+                 holistic_features=no_op,
+                 transform=DifferentiableThinPlateSplines, diagonal=None,
+                 scales=(0.5, 1.0), max_shape_components=None,
+                 max_appearance_components=None, batch_size=None):
+
+        super(LinearAAM, self).__init__(
+            images, group=group, verbose=verbose,
+            holistic_features=holistic_features, transform=transform,
+            diagonal=diagonal, scales=scales,
+            max_shape_components=max_shape_components,
+            max_appearance_components=max_appearance_components,
+            batch_size=batch_size)
+
+    @property
+    def _str_title(self):
+        r"""
+        Returns a string containing name of the model.
+        :type: `string`
+        """
+        return 'Linear Active Appearance Model'
+
+    def _build_shape_model(self, shapes, scale_index):
+        mean_aligned_shape = mean_pointcloud(align_shapes(shapes))
+        self.n_landmarks = mean_aligned_shape.n_points
+        self.reference_frame = build_reference_frame(mean_aligned_shape)
+        dense_shapes = densify_shapes(shapes, self.reference_frame,
+                                      self.transform)
+        # build dense shape model
+        shape_model = build_shape_model(dense_shapes)
+        return shape_model
+
+    def _increment_shape_model(self, shapes, shape_model,
+                               forgetting_factor=1.0):
+        aligned_shapes = align_shapes(shapes)
+        dense_shapes = densify_shapes(aligned_shapes, self.reference_frame,
+                                      self.transform)
+        # Increment shape model
+        shape_model.increment(dense_shapes,
+                              forgetting_factor=forgetting_factor)
+
+    def _warp_images(self, images, shapes, reference_shape, scale_index,
+                     prefix, verbose):
+        return warp_images(images, shapes, self.reference_frame,
+                           self.transform, prefix=prefix,
+                           verbose=verbose)
+
+    # TODO: implement me!
+    def _instance(self, scale_index, shape_instance, appearance_instance):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_appearance_models_widget(self, n_parameters=5,
+                                      parameters_bounds=(-3.0, 3.0),
+                                      mode='multiple', figure_size=(10, 8)):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_aam_widget(self, n_shape_parameters=5, n_appearance_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
+        raise NotImplemented
+
+    def __str__(self):
+        return _aam_str(self)
+
+
+# TODO: document me!
+class LinearMaskedAAM(AAM):
+    r"""
+    Linear Masked Active Appearance Model class.
+
+    Parameters
+    -----------
+    shape_models : :map:`PCAModel` list
+        A list containing the shape models of the AAM.
+    appearance_models : :map:`PCAModel` list
+        A list containing the appearance models of the AAM.
     reference_shape : :map:`PointCloud`
         The reference shape that was used to resize all training images to a
         consistent object size.
+    patch_size : tuple of `int`
+        The shape of the patches used to build the Patch Based AAM.
+    features : `callable` or ``[callable]``
+        If list of length ``n_scales``, feature extraction is performed at
+        each level after downscaling of the image.
+        The first element of the list specifies the features to be extracted at
+        the lowest pyramidal level and so on.
 
-    downscale : `float`
-        The downscale factor that was used to create the different pyramidal
-        levels.
+        If ``callable`` the specified feature will be applied to the original
+        image and pyramid generation will be performed on top of the feature
+        image. Also see the `pyramid_on_features` property.
 
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames are the mean shapes of each pyramid
-        level, so the shape models are scaled.
+        Note that from our experience, this approach of extracting features
+        once and then creating a pyramid on top tends to lead to better
+        performing AAMs.
+
+    scales : `int` or float` or list of those
+    """
+
+    def __init__(self, images, group=None, verbose=False,
+                 holistic_features=no_op, diagonal=None, scales=(0.5, 1.0),
+                 patch_size=(17, 17), max_shape_components=None,
+                 max_appearance_components=None, batch_size=None):
+        n_scales = len(checks.check_scales(scales))
+        self.patch_size = checks.check_patch_size(patch_size, n_scales)
+
+        super(LinearMaskedAAM, self).__init__(
+            images, group=group, verbose=verbose,
+            holistic_features=holistic_features,
+            transform=DifferentiableThinPlateSplines, diagonal=diagonal,
+            scales=scales,  max_shape_components=max_shape_components,
+            max_appearance_components=max_appearance_components,
+            batch_size=batch_size)
+
+    @property
+    def _str_title(self):
+        r"""
+        Returns a string containing name of the model.
+        :type: `string`
+        """
+        return 'Linear Masked Active Appearance Model'
+
+    def _build_shape_model(self, shapes, scale_index):
+        mean_aligned_shape = mean_pointcloud(align_shapes(shapes))
+        self.n_landmarks = mean_aligned_shape.n_points
+        self.reference_frame = build_patch_reference_frame(
+            mean_aligned_shape, patch_size=self.patch_size[scale_index])
+        dense_shapes = densify_shapes(shapes, self.reference_frame,
+                                      self.transform)
+        # build dense shape model
+        shape_model = build_shape_model(dense_shapes)
+        return shape_model
+
+    def _increment_shape_model(self, shapes, shape_model,
+                               forgetting_factor=1.0):
+        aligned_shapes = align_shapes(shapes)
+        dense_shapes = densify_shapes(aligned_shapes, self.reference_frame,
+                                      self.transform)
+        # Increment shape model
+        shape_model.increment(dense_shapes,
+                              forgetting_factor=forgetting_factor)
+
+    def _warp_images(self, images, shapes, reference_shape, scale_index,
+                     prefix, verbose):
+        return warp_images(images, shapes, self.reference_frame,
+                           self.transform, prefix=prefix,
+                           verbose=verbose)
+
+    # TODO: implement me!
+    def _instance(self, scale_index, shape_instance, appearance_instance):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_appearance_models_widget(self, n_parameters=5,
+                                      parameters_bounds=(-3.0, 3.0),
+                                      mode='multiple', figure_size=(10, 8)):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_aam_widget(self, n_shape_parameters=5, n_appearance_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
+        raise NotImplemented
+
+    def __str__(self):
+        return _aam_str(self)
+
+
+# TODO: document me!
+# TODO: implement offsets support?
+class PatchAAM(AAM):
+    r"""
+    Patch-based Active Appearance Model class.
+
+    Parameters
+    -----------
+    shape_models : :map:`PCAModel` list
+        A list containing the shape models of the AAM.
+    appearance_models : :map:`PCAModel` list
+        A list containing the appearance models of the AAM.
+    reference_shape : :map:`PointCloud`
+        The reference shape that was used to resize all training images to a
+        consistent object size.
+    patch_size : tuple of `int`
+        The shape of the patches used to build the Patch Based AAM.
+    features : `callable` or ``[callable]``
+        If list of length ``n_scales``, feature extraction is performed at
+        each level after downscaling of the image.
+        The first element of the list specifies the features to be extracted at
+        the lowest pyramidal level and so on.
 
-        If ``False``, the reference frames of all levels are the mean shape of
-        the highest level, so the shape models are not scaled; they have the
-        same size.
+        If ``callable`` the specified feature will be applied to the original
+        image and pyramid generation will be performed on top of the feature
+        image. Also see the `pyramid_on_features` property.
 
-        Note that from our experience, if ``scaled_shape_models`` is ``False``,
-        AAMs tend to have slightly better performance.
+        Note that from our experience, this approach of extracting features
+        once and then creating a pyramid on top tends to lead to better
+        performing AAMs.
 
+    normalize_parts: `callable`
+    scales : `int` or float` or list of those
     """
-    def __init__(self, shape_models, appearance_models, n_training_images,
-                 patch_shape, transform, features, reference_shape,
-                 downscale, scaled_shape_models):
-        super(PatchBasedAAM, self).__init__(
-            shape_models, appearance_models, n_training_images, transform,
-            features, reference_shape, downscale, scaled_shape_models)
-        self.patch_shape = patch_shape
-
-    def _build_reference_frame(self, reference_shape, landmarks):
-        return build_patch_reference_frame(
-            reference_shape, patch_shape=self.patch_shape)
+
+    def __init__(self, images, group=None, verbose=False,
+                 holistic_features=no_op, patch_normalisation=no_op,
+                 diagonal=None, scales=(0.5, 1.0), patch_size=(17, 17),
+                 max_shape_components=None, max_appearance_components=None,
+                 batch_size=None):
+        n_scales = len(checks.check_scales(scales))
+        self.patch_size = checks.check_patch_size(patch_size, n_scales)
+        self.patch_normalisation = patch_normalisation
+
+        super(PatchAAM, self).__init__(
+            images, group=group, verbose=verbose,
+            holistic_features=holistic_features, transform=None,
+            diagonal=diagonal, scales=scales,
+            max_shape_components=max_shape_components,
+            max_appearance_components=max_appearance_components,
+            batch_size=batch_size)
 
     @property
     def _str_title(self):
         r"""
         Returns a string containing name of the model.
-
         :type: `string`
         """
-        return 'Patch-Based Active Appearance Model'
+        return 'Patch-based Active Appearance Model'
+
+    def _warp_images(self, images, shapes, reference_shape, scale_index,
+                     prefix, verbose):
+        return extract_patches(images, shapes, self.patch_size[scale_index],
+                               normalise_function=self.patch_normalisation,
+                               prefix=prefix, verbose=verbose)
+
+    # TODO: implement me!
+    def _instance(self, scale_index, shape_instance, appearance_instance):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_appearance_models_widget(self, n_parameters=5,
+                                      parameters_bounds=(-3.0, 3.0),
+                                      mode='multiple', figure_size=(10, 8)):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_aam_widget(self, n_shape_parameters=5, n_appearance_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
+        raise NotImplemented
 
     def __str__(self):
-        out = super(PatchBasedAAM, self).__str__()
-        out_splitted = out.splitlines()
-        out_splitted[0] = self._str_title
-        out_splitted.insert(5, "   - Patch size is {}W x {}H.".format(
-            self.patch_shape[1], self.patch_shape[0]))
-        return '\n'.join(out_splitted)
+        return _aam_str(self)
+
+
+def _aam_str(aam):
+    if aam.diagonal is not None:
+        diagonal = aam.diagonal
+    else:
+        y, x = aam.reference_shape.range()
+        diagonal = np.sqrt(x ** 2 + y ** 2)
+
+    # Compute scale info strings
+    scales_info = []
+    lvl_str_tmplt = r"""  - Scale {}
+   - Holistic feature: {}
+   - {} appearance components
+   - {} shape components"""
+    for k, s in enumerate(aam.scales):
+        scales_info.append(lvl_str_tmplt.format(
+            s, name_of_callable(aam.holistic_features[k]),
+            aam.appearance_models[k].n_components,
+            aam.shape_models[k].n_components))
+    # Patch based AAM
+    if hasattr(aam, 'patch_size'):
+        for k in range(len(scales_info)):
+            scales_info[k] += '\n   - Patch size: {}'.format(
+                aam.patch_size[k])
+    scales_info = '\n'.join(scales_info)
+
+    if aam.transform is not None:
+        transform_str = 'Images warped with {transform} transform'
+    else:
+        transform_str = 'No image warping performed'
+
+    cls_str = r"""{class_title}
+ - Images scaled to diagonal: {diagonal:.2f}
+ - {transform}
+ - Scales: {scales}
+{scales_info}
+""".format(class_title=aam._str_title,
+           transform=transform_str,
+           diagonal=diagonal,
+           scales=aam.scales,
+           scales_info=scales_info)
+    return cls_str
+
+HolisticAAM = AAM
diff --git a/menpofit/aam/builder.py b/menpofit/aam/builder.py
deleted file mode 100644
index 4179d19..0000000
--- a/menpofit/aam/builder.py
+++ /dev/null
@@ -1,664 +0,0 @@
-from __future__ import division
-import numpy as np
-
-from menpo.shape import TriMesh
-from menpo.image import MaskedImage
-from menpo.transform import Translation
-from menpo.feature import igo
-from menpo.model import PCAModel
-from menpo.visualize import print_dynamic, print_progress
-
-from menpofit import checks
-from menpofit.base import create_pyramid
-from menpofit.builder import (DeformableModelBuilder, build_shape_model,
-                              normalization_wrt_reference_shape)
-from menpofit.transform import (DifferentiablePiecewiseAffine,
-                                DifferentiableThinPlateSplines)
-
-
-class AAMBuilder(DeformableModelBuilder):
-    r"""
-    Class that builds Multilevel Active Appearance Models.
-
-    Parameters
-    ----------
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-        Note that from our experience, this approach of extracting features
-        once and then creating a pyramid on top tends to lead to better
-        performing AAMs.
-
-    transform : :map:`PureAlignmentTransform`, optional
-        The :map:`PureAlignmentTransform` that will be
-        used to warp the images.
-
-    trilist : ``(t, 3)`` `ndarray`, optional
-        Triangle list that will be used to build the reference frame. If
-        ``None``, defaults to performing Delaunay triangulation on the points.
-
-    normalization_diagonal : `int` >= ``20``, optional
-        During building an AAM, all images are rescaled to ensure that the
-        scale of their landmarks matches the scale of the mean shape.
-
-        If `int`, it ensures that the mean shape is scaled so that the diagonal
-        of the bounding box containing it matches the normalization_diagonal
-        value.
-
-        If ``None``, the mean shape is not rescaled.
-
-        Note that, because the reference frame is computed from the mean
-        landmarks, this kwarg also specifies the diagonal length of the
-        reference frame (provided that features computation does not change
-        the image size).
-
-    n_levels : `int` > 0, optional
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale : `float` >= ``1``, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(``n_levels``)
-
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames will be the mean shapes of
-        each pyramid level, so the shape models will be scaled.
-
-        If ``False``, the reference frames of all levels will be the mean shape
-        of the highest level, so the shape models will not be scaled; they will
-        have the same size.
-
-        Note that from our experience, if ``scaled_shape_models`` is ``False``,
-        AAMs tend to have slightly better performance.
-
-    max_shape_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of shape components is
-        defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        shape components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-
-            If `float`, it specifies the percentage of variance to be retained.
-
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    max_appearance_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of appearance components
-        is defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        appearance components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-
-            If `float`, it specifies the percentage of variance to be retained.
-
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    boundary : `int` >= ``0``, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    Returns
-    -------
-    aam : :map:`AAMBuilder`
-        The AAM Builder object
-
-    Raises
-    -------
-    ValueError
-        ``n_levels`` must be `int` > ``0``
-    ValueError
-        ``downscale`` must be >= ``1``
-    ValueError
-        ``normalization_diagonal`` must be >= ``20``
-    ValueError
-        ``max_shape_components`` must be ``None`` or an `int` > 0 or
-        a ``0`` <= `float` <= ``1`` or a list of those containing 1 or
-        ``n_levels`` elements
-    ValueError
-        ``max_appearance_components`` must be ``None`` or an `int` > ``0`` or a
-        ``0`` <= `float` <= ``1`` or a list of those containing 1 or
-        ``n_levels`` elements
-    ValueError
-        ``features`` must be a `function` or a list of those
-        containing ``1`` or ``n_levels`` elements
-    """
-    def __init__(self, features=igo, transform=DifferentiablePiecewiseAffine,
-                 trilist=None, normalization_diagonal=None, n_levels=3,
-                 downscale=2, scaled_shape_models=True,
-                 max_shape_components=None, max_appearance_components=None,
-                 boundary=3):
-        # check parameters
-        checks.check_n_levels(n_levels)
-        checks.check_downscale(downscale)
-        checks.check_normalization_diagonal(normalization_diagonal)
-        checks.check_boundary(boundary)
-        max_shape_components = checks.check_max_components(
-            max_shape_components, n_levels, 'max_shape_components')
-        max_appearance_components = checks.check_max_components(
-            max_appearance_components, n_levels, 'max_appearance_components')
-        features = checks.check_features(features, n_levels)
-        # store parameters
-        self.features = features
-        self.transform = transform
-        self.trilist = trilist
-        self.normalization_diagonal = normalization_diagonal
-        self.n_levels = n_levels
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
-        self.max_shape_components = max_shape_components
-        self.max_appearance_components = max_appearance_components
-        self.boundary = boundary
-
-    def build(self, images, group=None, label=None, verbose=False):
-        r"""
-        Builds a Multilevel Active Appearance Model from a list of
-        landmarked images.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images from which to build the AAM.
-
-        group : `string`, optional
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`, optional
-            The label of of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        verbose : `boolean`, optional
-            Flag that controls information and progress printing.
-
-        Returns
-        -------
-        aam : :map:`AAM`
-            The AAM object. Shape and appearance models are stored from lowest
-            to highest level
-        """
-        # compute reference_shape and normalize images size
-        self.reference_shape, normalized_images = \
-            normalization_wrt_reference_shape(images, group, label,
-                                              self.normalization_diagonal,
-                                              verbose=verbose)
-
-        # create pyramid
-        generators = create_pyramid(normalized_images, self.n_levels,
-                                    self.downscale, self.features,
-                                    verbose=verbose)
-
-        # build the model at each pyramid level
-        if verbose:
-            if self.n_levels > 1:
-                print('- Building model for each of the {} '
-                      'pyramid levels'.format(self.n_levels))
-            else:
-                print('- Building model')
-
-        shape_models = []
-        appearance_models = []
-        # for each pyramid level (high --> low)
-        for j in range(self.n_levels):
-            # since models are built from highest to lowest level, the
-            # parameters in form of list need to use a reversed index
-            rj = self.n_levels - j - 1
-
-            if verbose:
-                level_str = '  - '
-                if self.n_levels > 1:
-                    level_str = '  - Level {}: '.format(j + 1)
-
-            # get feature images of current level
-            feature_images = []
-
-            if verbose:
-                generators_with_print = print_progress(
-                    generators, show_bar=False, show_eta=False,
-                    end_with_newline=False,
-                    prefix='{}Computing feature space/rescaling'.format(level_str))
-            else:
-                generators_with_print = generators
-
-            for g in generators_with_print:
-                feature_images.append(next(g))
-
-            # extract potentially rescaled shapes
-            shapes = [i.landmarks[group][label] for i in feature_images]
-
-            # define shapes that will be used for training
-            if j == 0:
-                original_shapes = shapes
-                train_shapes = shapes
-            else:
-                if self.scaled_shape_models:
-                    train_shapes = shapes
-                else:
-                    train_shapes = original_shapes
-
-            # train shape model and find reference frame
-            if verbose:
-                print_dynamic('{}Building shape model'.format(level_str))
-            shape_model = build_shape_model(
-                train_shapes, self.max_shape_components[rj])
-            reference_frame = self._build_reference_frame(shape_model.mean())
-
-            # add shape model to the list
-            shape_models.append(shape_model)
-
-            # compute transforms
-            if verbose:
-                print_dynamic('{}Computing transforms'.format(level_str))
-
-
-            # Create a dummy initial transform
-            s_to_t_transform = self.transform(
-                reference_frame.landmarks['source'].lms,
-                reference_frame.landmarks['source'].lms)
-
-            if verbose:
-                feature_images_with_print = print_progress(
-                    feature_images, show_bar=False, show_eta=False,
-                    end_with_newline=False,
-                    prefix='{}Warping images'.format(level_str))
-            else:
-                feature_images_with_print = feature_images
-
-            # warp images to reference frame
-            warped_images = []
-            for i in feature_images_with_print:
-                # Setting the target can be significantly faster for transforms
-                # such as CachedPiecewiseAffine
-                s_to_t_transform.set_target(i.landmarks[group][label])
-                warped_images.append(i.warp_to_mask(reference_frame.mask,
-                                                    s_to_t_transform))
-
-            # attach reference_frame to images' source shape
-            for i in warped_images:
-                i.landmarks['source'] = reference_frame.landmarks['source']
-
-            # build appearance model
-            if verbose:
-                print_dynamic('{}Building appearance model'.format(level_str))
-            appearance_model = PCAModel(warped_images)
-            # trim appearance model if required
-            if self.max_appearance_components[rj] is not None:
-                appearance_model.trim_components(
-                    self.max_appearance_components[rj])
-
-            # add appearance model to the list
-            appearance_models.append(appearance_model)
-
-            if verbose:
-                print('{}Done'.format(level_str).ljust(80))
-
-        # reverse the list of shape and appearance models so that they are
-        # ordered from lower to higher resolution
-        shape_models.reverse()
-        appearance_models.reverse()
-        n_training_images = len(images)
-
-        return self._build_aam(shape_models, appearance_models,
-                               n_training_images)
-
-    def _build_reference_frame(self, mean_shape):
-        r"""
-        Generates the reference frame given a mean shape.
-
-        Parameters
-        ----------
-        mean_shape : :map:`PointCloud`
-            The mean shape to use.
-
-        Returns
-        -------
-        reference_frame : :map:`MaskedImage`
-            The reference frame.
-        """
-        return build_reference_frame(mean_shape, boundary=self.boundary,
-                                     trilist=self.trilist)
-
-    def _build_aam(self, shape_models, appearance_models, n_training_images):
-        r"""
-        Returns an AAM object.
-
-        Parameters
-        ----------
-        shape_models : :map:`PCAModel`
-            The trained multilevel shape models.
-            
-        appearance_models : :map:`PCAModel`
-            The trained multilevel appearance models.
-            
-        n_training_images : `int`
-            The number of training images.
-
-        Returns
-        -------
-        aam : :map:`AAM`
-            The trained AAM object.
-        """
-        from .base import AAM
-        return AAM(shape_models, appearance_models, n_training_images,
-                   self.transform, self.features, self.reference_shape,
-                   self.downscale, self.scaled_shape_models)
-
-
-class PatchBasedAAMBuilder(AAMBuilder):
-    r"""
-    Class that builds Multilevel Patch-Based Active Appearance Models.
-
-    Parameters
-    ----------
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-        Note that from our experience, this approach of extracting features
-        once and then creating a pyramid on top tends to lead to better
-        performing AAMs.
-
-    patch_shape : tuple of `int`, optional
-        The appearance model of the Patch-Based AAM will be obtained by
-        sampling appearance patches with the specified shape around each
-        landmark.
-
-    normalization_diagonal : `int` >= ``20``, optional
-        During building an AAM, all images are rescaled to ensure that the
-        scale of their landmarks matches the scale of the mean shape.
-
-        If `int`, it ensures that the mean shape is scaled so that the diagonal
-        of the bounding box containing it matches the ``normalization_diagonal``
-        value.
-
-        If ``None``, the mean shape is not rescaled.
-
-        .. note::
-
-            Because the reference frame is computed from the mean
-            landmarks, this kwarg also specifies the diagonal length of the
-            reference frame (provided that features computation does not change
-            the image size).
-
-    n_levels : `int` > ``0``, optional
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale : `float` >= 1, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(``n_levels``)
-
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames will be the mean shapes of each
-        pyramid level, so the shape models will be scaled.
-        If ``False``, the reference frames of all levels will be the mean shape
-        of the highest level, so the shape models will not be scaled; they will
-        have the same size.
-        Note that from our experience, if scaled_shape_models is ``False``, AAMs
-        tend to have slightly better performance.
-
-    max_shape_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of shape components is
-        defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        shape components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-
-            If `float`, it specifies the percentage of variance to be retained.
-
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    max_appearance_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of appearance components
-        is defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        appearance components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-            If `float`, it specifies the percentage of variance to be retained.
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    boundary : `int` >= ``0``, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    Returns
-    -------
-    aam : ::map:`PatchBasedAAMBuilder`
-        The Patch-Based AAM Builder object
-
-    Raises
-    -------
-    ValueError
-        ``n_levels`` must be `int` > ``0``
-    ValueError
-        ``downscale`` must be >= ``1``
-    ValueError
-        ``normalization_diagonal`` must be >= ``20``
-    ValueError
-        ``max_shape_components must be ``None`` or an `int` > ``0`` or
-        a ``0`` <= `float` <= ``1`` or a list of those containing ``1``
-        or ``n_levels`` elements
-    ValueError
-        ``max_appearance_components`` must be ``None`` or an `int` > 0 or a
-        ``0`` <= `float` <= ``1`` or a list of those containing ``1``
-        or ``n_levels`` elements
-    ValueError
-        ``features`` must be a `string` or a `function` or a list of those
-        containing 1 or ``n_levels`` elements
-    ValueError
-        ``pyramid_on_features`` is enabled so ``features`` must be a
-        `string` or a `function` or a list containing one of those
-    """
-    def __init__(self, features=igo, patch_shape=(16, 16),
-                 normalization_diagonal=None, n_levels=3, downscale=2,
-                 scaled_shape_models=True, max_shape_components=None,
-                 max_appearance_components=None, boundary=3):
-        # check parameters
-        checks.check_n_levels(n_levels)
-        checks.check_downscale(downscale)
-        checks.check_normalization_diagonal(normalization_diagonal)
-        checks.check_boundary(boundary)
-        max_shape_components = checks.check_max_components(
-            max_shape_components, n_levels, 'max_shape_components')
-        max_appearance_components = checks.check_max_components(
-            max_appearance_components, n_levels, 'max_appearance_components')
-        features = checks.check_features(features, n_levels)
-
-        # store parameters
-        self.features = features
-        self.patch_shape = patch_shape
-        self.normalization_diagonal = normalization_diagonal
-        self.n_levels = n_levels
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
-        self.max_shape_components = max_shape_components
-        self.max_appearance_components = max_appearance_components
-        self.boundary = boundary
-
-        # patch-based AAMs can only work with TPS transform
-        self.transform = DifferentiableThinPlateSplines
-
-    def _build_reference_frame(self, mean_shape):
-        r"""
-        Generates the reference frame given a mean shape.
-
-        Parameters
-        ----------
-        mean_shape : :map:`PointCloud`
-            The mean shape to use.
-
-        Returns
-        -------
-        reference_frame : :map:`MaskedImage`
-            The patch-based reference frame.
-        """
-        return build_patch_reference_frame(mean_shape, boundary=self.boundary,
-                                           patch_shape=self.patch_shape)
-
-    def _mask_image(self, image):
-        r"""
-        Creates the patch-based mask of the given image.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The image to be masked.
-        """
-        image.build_mask_around_landmarks(self.patch_shape, group='source')
-
-    def _build_aam(self, shape_models, appearance_models, n_training_images):
-        r"""
-        Returns a Patch-Based AAM object.
-
-        Parameters
-        ----------
-        shape_models : :map:`PCAModel`
-            The trained multilevel shape models.
-
-        appearance_models : :map:`PCAModel`
-            The trained multilevel appearance models.
-
-        n_training_images : `int`
-            The number of training images.
-
-        Returns
-        -------
-        aam : :map:`PatchBasedAAM`
-            The trained Patched-Based AAM object.
-        """
-        from .base import PatchBasedAAM
-        return PatchBasedAAM(shape_models, appearance_models,
-                             n_training_images, self.patch_shape,
-                             self.transform, self.features,
-                             self.reference_shape, self.downscale,
-                             self.scaled_shape_models)
-
-
-def build_reference_frame(landmarks, boundary=3, group='source',
-                          trilist=None):
-    r"""
-    Builds a reference frame from a particular set of landmarks.
-
-    Parameters
-    ----------
-    landmarks : :map:`PointCloud`
-        The landmarks that will be used to build the reference frame.
-
-    boundary : `int`, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    group : `string`, optional
-        Group that will be assigned to the provided set of landmarks on the
-        reference frame.
-
-    trilist : ``(t, 3)`` `ndarray`, optional
-        Triangle list that will be used to build the reference frame.
-
-        If ``None``, defaults to performing Delaunay triangulation on the
-        points.
-
-    Returns
-    -------
-    reference_frame : :map:`Image`
-        The reference frame.
-    """
-    reference_frame = _build_reference_frame(landmarks, boundary=boundary,
-                                             group=group)
-    if trilist is not None:
-        reference_frame.landmarks[group] = TriMesh(
-            reference_frame.landmarks['source'].lms.points, trilist=trilist)
-
-    reference_frame.constrain_mask_to_landmarks(group=group)
-
-    return reference_frame
-
-
-def build_patch_reference_frame(landmarks, boundary=3, group='source',
-                                patch_shape=(16, 16)):
-    r"""
-    Builds a reference frame from a particular set of landmarks.
-
-    Parameters
-    ----------
-    landmarks : :map:`PointCloud`
-        The landmarks that will be used to build the reference frame.
-
-    boundary : `int`, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    group : `string`, optional
-        Group that will be assigned to the provided set of landmarks on the
-        reference frame.
-
-    patch_shape : tuple of ints, optional
-        Tuple specifying the shape of the patches.
-
-    Returns
-    -------
-    patch_based_reference_frame : :map:`Image`
-        The patch based reference frame.
-    """
-    boundary = np.max(patch_shape) + boundary
-    reference_frame = _build_reference_frame(landmarks, boundary=boundary,
-                                             group=group)
-
-    # mask reference frame
-    reference_frame.build_mask_around_landmarks(patch_shape, group=group)
-
-    return reference_frame
-
-
-def _build_reference_frame(landmarks, boundary=3, group='source'):
-    # translate landmarks to the origin
-    minimum = landmarks.bounds(boundary=boundary)[0]
-    landmarks = Translation(-minimum).apply(landmarks)
-
-    resolution = landmarks.range(boundary=boundary)
-    reference_frame = MaskedImage.init_blank(resolution)
-    reference_frame.landmarks[group] = landmarks
-
-    return reference_frame
diff --git a/menpofit/aam/fitter.py b/menpofit/aam/fitter.py
index 045f388..e7b7d98 100644
--- a/menpofit/aam/fitter.py
+++ b/menpofit/aam/fitter.py
@@ -1,425 +1,196 @@
 from __future__ import division
-from itertools import chain
-
-from menpofit.base import name_of_callable
-from menpofit.fitter import MultilevelFitter
-from menpofit.fittingresult import AMMultilevelFittingResult
-from menpofit.transform import (ModelDrivenTransform, OrthoMDTransform,
-                                DifferentiableAlignmentSimilarity)
-from menpofit.lucaskanade.appearance import SIC
-
-
-class AAMFitter(MultilevelFitter):
+import numpy as np
+from copy import deepcopy
+from menpo.transform import AlignmentUniformScale
+from menpo.image import BooleanImage
+from menpofit.fitter import ModelFitter, noisy_shape_from_bounding_box
+from menpofit.modelinstance import OrthoPDM
+from menpofit.sdm import SupervisedDescentFitter
+from menpofit.transform import OrthoMDTransform, LinearOrthoMDTransform
+import menpofit.checks as checks
+from .base import AAM, MaskedAAM, LinearAAM, LinearMaskedAAM, PatchAAM
+from .algorithm.lk import (
+    LucasKanadeStandardInterface, LucasKanadeLinearInterface,
+    LucasKanadePatchInterface, WibergInverseCompositional)
+from .algorithm.sd import (
+    SupervisedDescentStandardInterface, SupervisedDescentLinearInterface,
+    SupervisedDescentPatchInterface, ProjectOutNewton)
+from .result import AAMFitterResult
+
+
+# TODO: document me!
+class AAMFitter(ModelFitter):
     r"""
-    Abstract Interface for defining Active Appearance Models Fitters.
-
-    Parameters
-    -----------
-    aam : :map:`AAM`
-        The Active Appearance Model to be used.
     """
-    def __init__(self, aam):
-        self.aam = aam
-
-    @property
-    def reference_shape(self):
-        r"""
-        The reference shape of the AAM.
-
-        :type: :map:`PointCloud`
-        """
-        return self.aam.reference_shape
-
-    @property
-    def features(self):
-        r"""
-        The feature extracted at each pyramidal level during AAM building.
-        Stored in ascending pyramidal order.
-
-        :type: `list`
-        """
-        return self.aam.features
-
-    @property
-    def n_levels(self):
-        r"""
-        The number of pyramidal levels used during AAM building.
-
-        :type: `int`
-        """
-        return self.aam.n_levels
-
     @property
-    def downscale(self):
-        r"""
-        The downscale used to generate the final scale factor applied at
-        each pyramidal level during AAM building.
-        The scale factor is computed as:
+    def aam(self):
+        return self._model
 
-            ``(downscale ** k) for k in range(n_levels)``
+    def _check_n_appearance(self, n_appearance):
+        checks.set_models_components(self.aam.appearance_models, n_appearance)
 
-        :type: `float`
-        """
-        return self.aam.downscale
-
-    def _create_fitting_result(self, image, fitting_results, affine_correction,
-                               gt_shape=None):
-        r"""
-        Creates a :map:`AAMMultilevelFittingResult` associated to a
-        particular fitting of the AAM fitter.
-
-        Parameters
-        -----------
-        image : :map:`Image` or subclass
-            The image to be fitted.
-
-        fitting_results : `list` of :map:`FittingResult`
-            A list of fitting result objects containing the state of the
-            the fitting for each pyramidal level.
-
-        affine_correction : :map:`Affine`
-            An affine transform that maps the result of the top resolution
-            level to the scale space of the original image.
-
-        gt_shape : :map:`PointCloud`, optional
-            The ground truth shape associated to the image.
-
-        error_type : 'me_norm', 'me' or 'rmse', optional
-            Specifies how the error between the fitted and ground truth
-            shapes must be computed.
-
-        Returns
-        -------
-        fitting : :map:`AAMMultilevelFittingResult`
-            A fitting result object that will hold the state of the AAM
-            fitter for a particular fitting.
-        """
-        return AAMMultilevelFittingResult(
-            image, self, fitting_results, affine_correction, gt_shape=gt_shape)
+    def _fitter_result(self, image, algorithm_results, affine_correction,
+                       gt_shape=None):
+        return AAMFitterResult(image, self, algorithm_results,
+                               affine_correction, gt_shape=gt_shape)
 
 
+# TODO: document me!
 class LucasKanadeAAMFitter(AAMFitter):
     r"""
-    Lucas-Kanade based :map:`Fitter` for Active Appearance Models.
-
-    Parameters
-    -----------
-    aam : :map:`AAM`
-        The Active Appearance Model to be used.
-    algorithm : subclass of :map:`AppearanceLucasKanade`, optional
-        The Appearance Lucas-Kanade class to be used.
-    md_transform : :map:`ModelDrivenTransform` or subclass, optional
-        The model driven transform class to be used.
-    n_shape : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-        previous or ``None``, optional
-        The number of shape components or amount of shape variance to be
-        used per pyramidal level.
-
-        If `None`, all available shape components ``(n_active_components)``
-        will be used.
-        If `int` ``> 1``, the specified number of shape components will be
-        used.
-        If ``0. <=`` `float` ``<= 1.``, the number of components capturing the
-        specified variance ratio will be computed and used.
-
-        If `list` of length ``n_levels``, then the number of components is
-        defined per level. The first element of the list corresponds to the
-        lowest pyramidal level and so on.
-        If not a `list` or a `list` of length 1, then the specified number of
-        components will be used for all levels.
-    n_appearance : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-        previous or ``None``, optional
-        The number of appearance components or amount of appearance variance
-        to be used per pyramidal level.
-
-        If `None`, all available appearance components
-        ``(n_active_components)`` will be used.
-        If `int` ``> 1``, the specified number of appearance components will
-        be used.
-        If ``0. <=`` `float` ``<= 1.``, the number of appearance components
-        capturing the specified variance ratio will be computed and used.
-
-        If `list` of length ``n_levels``, then the number of components is
-        defined per level. The first element of the list corresponds to the
-        lowest pyramidal level and so on.
-        If not a `list` or a `list` of length 1, then the specified number of
-        components will be used for all levels.
     """
-    def __init__(self, aam, algorithm=SIC,
-                 md_transform=OrthoMDTransform, n_shape=None,
-                 n_appearance=None, **kwargs):
-        super(LucasKanadeAAMFitter, self).__init__(aam)
-        self._set_up(algorithm=algorithm, md_transform=md_transform,
-                     n_shape=n_shape, n_appearance=n_appearance, **kwargs)
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns a string containing the name of fitting algorithm.
-
-        :type: `str`
-        """
-        return 'LK-AAM-' + self._fitters[0].algorithm
-
-    def _set_up(self, algorithm=SIC,
-                md_transform=OrthoMDTransform,
-                global_transform=DifferentiableAlignmentSimilarity,
-                n_shape=None, n_appearance=None, **kwargs):
-        r"""
-        Sets up the Lucas-Kanade fitter object.
-
-        Parameters
-        -----------
-        algorithm : subclass of :map:`AppearanceLucasKanade`, optional
-            The Appearance Lucas-Kanade class to be used.
-
-        md_transform : :map:`ModelDrivenTransform` or subclass, optional
-            The model driven transform class to be used.
-
-        n_shape : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-            previous or ``None``, optional
-            The number of shape components or amount of shape variance to be
-            used per pyramidal level.
-
-            If `None`, all available shape components ``(n_active_components)``
-            will be used.
-            If `int` ``> 1``, the specified number of shape components will be
-            used.
-            If ``0. <=`` `float` ``<= 1.``, the number of components capturing the
-            specified variance ratio will be computed and used.
-
-            If `list` of length ``n_levels``, then the number of components is
-            defined per level. The first element of the list corresponds to the
-            lowest pyramidal level and so on.
-            If not a `list` or a `list` of length 1, then the specified number of
-            components will be used for all levels.
-
-        n_appearance : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-            previous or ``None``, optional
-            The number of appearance components or amount of appearance variance
-            to be used per pyramidal level.
+    def __init__(self, aam, lk_algorithm_cls=WibergInverseCompositional,
+                 n_shape=None, n_appearance=None, sampling=None):
+        self._model = aam
+        self._check_n_shape(n_shape)
+        self._check_n_appearance(n_appearance)
+        self._sampling = checks.check_sampling(sampling, aam.n_scales)
+        self._set_up(lk_algorithm_cls)
+
+    def _set_up(self, lk_algorithm_cls):
+        self.algorithms = []
+        for j, (am, sm, s) in enumerate(zip(self.aam.appearance_models,
+                                            self.aam.shape_models,
+                                            self._sampling)):
+
+            template = am.mean()
+            if type(self.aam) is AAM or type(self.aam) is MaskedAAM:
+                # build orthonormal model driven transform
+                md_transform = OrthoMDTransform(
+                    sm, self.aam.transform,
+                    source=am.mean().landmarks['source'].lms)
+                interface = LucasKanadeStandardInterface(am, md_transform,
+                                                         template, sampling=s)
+                algorithm = lk_algorithm_cls(interface)
+            elif (type(self.aam) is LinearAAM or
+                  type(self.aam) is LinearMaskedAAM):
+                # build linear version of orthogonal model driven transform
+                md_transform = LinearOrthoMDTransform(
+                    sm, self.aam.reference_shape)
+                interface = LucasKanadeLinearInterface(am, md_transform,
+                                                       template, sampling=s)
+                algorithm = lk_algorithm_cls(interface)
+            elif type(self.aam) is PatchAAM:
+                # build orthogonal point distribution model
+                pdm = OrthoPDM(sm)
+                interface = LucasKanadePatchInterface(
+                    am, pdm, template, sampling=s,
+                    patch_size=self.aam.patch_size[j],
+                    patch_normalisation=self.aam.patch_normalisation)
+                algorithm = lk_algorithm_cls(interface)
+            else:
+                raise ValueError("AAM object must be of one of the "
+                                 "following classes: {}, {}, {}, {}, "
+                                 "{}".format(AAM, MaskedAAM, LinearAAM,
+                                             LinearMaskedAAM, PatchAAM))
 
-            If `None`, all available appearance components
-            ``(n_active_components)`` will be used.
-            If `int` ``> 1``, the specified number of appearance components will
-            be used.
-            If ``0. <=`` `float` ``<= 1.``, the number of appearance components
-            capturing the specified variance ratio will be computed and used.
+            self.algorithms.append(algorithm)
 
-            If `list` of length ``n_levels``, then the number of components is
-            defined per level. The first element of the list corresponds to the
-            lowest pyramidal level and so on.
-            If not a `list` or a `list` of length 1, then the specified number of
-            components will be used for all levels.
 
-        Raises
-        -------
-        ValueError
-            ``n_shape`` can be an `int`, `float`, ``None`` or a `list`
-            containing ``1`` or ``n_levels`` of those.
-        ValueError
-            ``n_appearance`` can be an `int`, `float`, `None` or a `list`
-            containing ``1`` or ``n_levels`` of those.
-        """
-        # check n_shape parameter
-        if n_shape is not None:
-            if type(n_shape) is int or type(n_shape) is float:
-                for sm in self.aam.shape_models:
-                    sm.n_active_components = n_shape
-            elif len(n_shape) == 1 and self.aam.n_levels > 1:
-                for sm in self.aam.shape_models:
-                    sm.n_active_components = n_shape[0]
-            elif len(n_shape) == self.aam.n_levels:
-                for sm, n in zip(self.aam.shape_models, n_shape):
-                    sm.n_active_components = n
+# TODO: document me!
+class SupervisedDescentAAMFitter(SupervisedDescentFitter):
+    r"""
+    """
+    def __init__(self, images, aam, group=None, bounding_box_group=None,
+                 n_shape=None, n_appearance=None, sampling=None,
+                 sd_algorithm_cls=ProjectOutNewton,
+                 n_iterations=6, n_perturbations=30,
+                 perturb_from_bounding_box=noisy_shape_from_bounding_box,
+                 batch_size=None, verbose=False):
+        self.aam = aam
+        checks.set_models_components(aam.appearance_models, n_appearance)
+        checks.set_models_components(aam.shape_models, n_shape)
+        self._sampling = checks.check_sampling(sampling, aam.n_scales)
+
+        # patch_feature and patch_size are not actually
+        # used because they are fully defined by the AAM already. Therefore,
+        # we just leave them as their 'defaults' because they won't be used.
+        super(SupervisedDescentAAMFitter, self).__init__(
+            images, group=group, bounding_box_group=bounding_box_group,
+            reference_shape=self.aam.reference_shape,
+            sd_algorithm_cls=sd_algorithm_cls,
+            holistic_feature=self.aam.holistic_features,
+            diagonal=self.aam.diagonal,
+            scales=self.aam.scales, n_iterations=n_iterations,
+            n_perturbations=n_perturbations,
+            perturb_from_bounding_box=perturb_from_bounding_box,
+            batch_size=batch_size, verbose=verbose)
+
+    def _setup_algorithms(self):
+        self.algorithms = []
+        for j, (am, sm, s) in enumerate(zip(self.aam.appearance_models,
+                                            self.aam.shape_models,
+                                            self._sampling)):
+            template = am.mean()
+            if type(self.aam) is AAM or type(self.aam) is MaskedAAM:
+                # build orthonormal model driven transform
+                md_transform = OrthoMDTransform(
+                    sm, self.aam.transform,
+                    source=template.landmarks['source'].lms)
+                interface = SupervisedDescentStandardInterface(
+                    am, md_transform, template, sampling=s)
+                algorithm = self._sd_algorithm_cls(
+                    interface, n_iterations=self.n_iterations[j])
+            elif (type(self.aam) is LinearAAM or
+                  type(self.aam) is LinearMaskedAAM):
+                # Build linear version of orthogonal model driven transform
+                md_transform = LinearOrthoMDTransform(
+                    sm, self.aam.reference_shape)
+                interface = SupervisedDescentLinearInterface(
+                    am, md_transform, template, sampling=s)
+                algorithm = self._sd_algorithm_cls(
+                    interface, n_iterations=self.n_iterations[j])
+            elif type(self.aam) is PatchAAM:
+                # Build orthogonal point distribution model
+                pdm = OrthoPDM(sm)
+                interface = SupervisedDescentPatchInterface(
+                    am, pdm, template, sampling=s,
+                    patch_size=self.aam.patch_size[j],
+                    patch_normalisation=self.aam.patch_normalisation)
+                algorithm = self._sd_algorithm_cls(
+                    interface, n_iterations=self.n_iterations[j])
             else:
-                raise ValueError('n_shape can be an integer or a float or None '
-                                 'or a list containing 1 or {} of '
-                                 'those'.format(self.aam.n_levels))
+                raise ValueError("AAM object must be of one of the "
+                                 "following classes: {}, {}, {}, {}, "
+                                 "{}".format(AAM, MaskedAAM, LinearAAM,
+                                             LinearMaskedAAM, PatchAAM))
 
-        # check n_appearance parameter
-        if n_appearance is not None:
-            if type(n_appearance) is int or type(n_appearance) is float:
-                for am in self.aam.appearance_models:
-                    am.n_active_components = n_appearance
-            elif len(n_appearance) == 1 and self.aam.n_levels > 1:
-                for am in self.aam.appearance_models:
-                    am.n_active_components = n_appearance[0]
-            elif len(n_appearance) == self.aam.n_levels:
-                for am, n in zip(self.aam.appearance_models, n_appearance):
-                    am.n_active_components = n
-            else:
-                raise ValueError('n_appearance can be an integer or a float '
-                                 'or None or a list containing 1 or {} of '
-                                 'those'.format(self.aam.n_levels))
+            # append algorithms to list
+            self.algorithms.append(algorithm)
 
-        self._fitters = []
-        for j, (am, sm) in enumerate(zip(self.aam.appearance_models,
-                                         self.aam.shape_models)):
 
-            if md_transform is not ModelDrivenTransform:
-                md_trans = md_transform(
-                    sm, self.aam.transform, global_transform,
-                    source=am.mean().landmarks['source'].lms)
-            else:
-                md_trans = md_transform(
-                    sm, self.aam.transform,
-                    source=am.mean().landmarks['source'].lms)
-            self._fitters.append(
-                algorithm(am, md_trans, **kwargs))
+# TODO: Document me!
+def holistic_sampling_from_scale(aam, scale=0.35):
+    reference = aam.appearance_models[0].mean()
+    scaled_reference = reference.rescale(scale)
 
-    def __str__(self):
-        out = "{0} Fitter\n" \
-              " - Lucas-Kanade {1}\n" \
-              " - Transform is {2} and residual is {3}.\n" \
-              " - {4} training images.\n".format(
-            self.aam._str_title, self._fitters[0].algorithm,
-            self._fitters[0].transform.__class__.__name__,
-            self._fitters[0].residual.type, self.aam.n_training_images)
-        # small strings about number of channels, channels string and downscale
-        n_channels = []
-        down_str = []
-        for j in range(self.n_levels):
-            n_channels.append(
-                self._fitters[j].appearance_model.template_instance.n_channels)
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(name_of_callable(
-                self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
-        else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                if isinstance(self.features[j], str):
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j]))
-                elif self.features[j] is None:
-                    feat_str.append("- No features extracted. ")
-                else:
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j].__name__))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
-                else:
-                    ch_str.append("channels")
-        if self.n_levels > 1:
-            if self.aam.scaled_shape_models:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}.\n   - Each level has a scaled shape " \
-                      "model (reference frame).\n".format(out, self.n_levels,
-                                                          self.downscale)
+    t = AlignmentUniformScale(scaled_reference.landmarks['source'].lms,
+                              reference.landmarks['source'].lms)
+    new_indices = np.require(np.round(t.apply(
+        scaled_reference.mask.true_indices())), dtype=np.int)
 
-            else:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}:\n   - Shape models (reference frames) " \
-                      "are not scaled.\n".format(out, self.n_levels,
-                                                 self.downscale)
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
-                if not self.aam.scaled_shape_models:
-                    out = "{}   - Reference frames of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                        out, self._fitters[0].appearance_model.n_features,
-                        self._fitters[0].template.n_true_pixels(),
-                        n_channels[0], self._fitters[0].template._str_shape,
-                        n_channels[0])
-            else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-            for i in range(self.n_levels - 1, -1, -1):
-                out = "{}   - Level {} {}: \n".format(out, self.n_levels - i,
-                                                      down_str[i])
-                if not self.pyramid_on_features:
-                    out = "{}     {}{} {} per image.\n".format(
-                        out, feat_str[i], n_channels[i], ch_str[i])
-                if (self.aam.scaled_shape_models or
-                        (not self.pyramid_on_features)):
-                    out = "{}     - Reference frame of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                        out, self._fitters[i].appearance_model.n_features,
-                        self._fitters[i].template.n_true_pixels(),
-                        n_channels[i], self._fitters[i].template._str_shape,
-                        n_channels[i])
-                out = "{0}     - {1} motion components\n     - {2} active " \
-                      "appearance components ({3:.2f}% of original " \
-                      "variance)\n".format(
-                    out, self._fitters[i].transform.n_parameters,
-                    self._fitters[i].appearance_model.n_active_components,
-                    self._fitters[i].appearance_model.variance_ratio() * 100)
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n" \
-                  "   - Reference frame of length {4} ({5} x {6}C, " \
-                  "{7} x {8}C)\n   - {9} motion parameters\n" \
-                  "   - {10} appearance components ({11:.2f}% of original " \
-                  "variance)\n".format(
-                out, feat_str[0], n_channels[0], ch_str[0],
-                self._fitters[0].appearance_model.n_features,
-                self._fitters[0].template.n_true_pixels(),
-                n_channels[0], self._fitters[0].template._str_shape,
-                n_channels[0], self._fitters[0].transform.n_parameters,
-                self._fitters[0].appearance_model.n_active_components,
-                self._fitters[0].appearance_model.variance_ratio() * 100)
-        return out
+    modified_mask = deepcopy(reference.mask.pixels)
+    modified_mask[:] = False
+    modified_mask[:, new_indices[:, 0], new_indices[:, 1]] = True
 
+    true_positions = np.nonzero(
+        modified_mask[:, reference.mask.mask].ravel())[0]
 
-class AAMMultilevelFittingResult(AMMultilevelFittingResult):
-    r"""
-    Class that holds the state of a :map:`AAMFitter` object before,
-    during and after it has fitted a particular image.
-    """
-    @property
-    def appearance_reconstructions(self):
-        r"""
-        The list containing the appearance reconstruction obtained at
-        each fitting iteration.
+    return true_positions, BooleanImage(modified_mask[0])
 
-        :type: `list` of :map:`Image` or subclass
-        """
-        return list(chain(
-            *[f.appearance_reconstructions for f in self.fitting_results]))
 
-    @property
-    def aam_reconstructions(self):
-        r"""
-        The list containing the aam reconstruction (i.e. the appearance
-        reconstruction warped on the shape instance reconstruction) obtained at
-        each fitting iteration.
+# TODO: Document me!
+def holistic_sampling_from_step(aam, step=8):
+    reference = aam.appearance_models[0].mean()
 
-        Note that this reconstruction is only tested to work for the
-        :map:`OrthoMDTransform`
+    n_true_pixels = reference.n_true_pixels()
+    true_positions = np.zeros(n_true_pixels, dtype=np.bool)
+    sampling = xrange(0, n_true_pixels, step)
+    true_positions[sampling] = True
 
-        :type: list` of :map:`Image` or subclass
-        """
-        aam_reconstructions = []
-        for level, f in enumerate(self.fitting_results):
-            if f.weights:
-                for shape_w, aw in zip(f.parameters, f.weights):
-                    shape_w = shape_w[4:]
-                    sm_level = self.fitter.aam.shape_models[level]
-                    am_level = self.fitter.aam.appearance_models[level]
-                    swt = shape_w / sm_level.eigenvalues[:len(shape_w)] ** 0.5
-                    awt = aw / am_level.eigenvalues[:len(aw)] ** 0.5
-                    aam_reconstructions.append(self.fitter.aam.instance(
-                        shape_weights=swt, appearance_weights=awt, level=level))
-            else:
-                for shape_w in f.parameters:
-                    shape_w = shape_w[4:]
-                    sm_level = self.fitter.aam.shape_models[level]
-                    swt = shape_w / sm_level.eigenvalues[:len(shape_w)] ** 0.5
-                    aam_reconstructions.append(self.fitter.aam.instance(
-                        shape_weights=swt, appearance_weights=None,
-                        level=level))
-        return aam_reconstructions
+    modified_mask = reference.mask.copy()
+    new_indices = modified_mask.true_indices()[sampling, :]
+    modified_mask.mask[:] = False
+    modified_mask.mask[new_indices[:, 0], new_indices[:, 1]] = True
+
+    return true_positions, modified_mask
diff --git a/menpofit/aam/result.py b/menpofit/aam/result.py
new file mode 100644
index 0000000..ae478f2
--- /dev/null
+++ b/menpofit/aam/result.py
@@ -0,0 +1,109 @@
+from __future__ import division
+from menpofit.result import ParametricAlgorithmResult, MultiFitterResult
+
+
+# TODO: document me!
+class AAMAlgorithmResult(ParametricAlgorithmResult):
+    r"""
+    """
+    def __init__(self, image, algorithm, shape_parameters, cost_functions=None,
+                 appearance_parameters=None, gt_shape=None):
+        super(AAMAlgorithmResult, self).__init__(
+            image, algorithm, shape_parameters, gt_shape=gt_shape)
+        self._cost_functions = cost_functions
+        self.appearance_parameters = appearance_parameters
+        self._warped_images = None
+        self._appearance_reconstructions = None
+        self._costs = None
+
+    @property
+    def warped_images(self):
+        if self._warped_images is None:
+            self._warped_images = []
+            for p in self.shape_parameters:
+                self.algorithm.transform.from_vector_inplace(p)
+                self._warped_images.append(
+                    self.algorithm.interface.warp(self.image))
+        return self._warped_images
+
+    @property
+    def appearance_reconstructions(self):
+        if self.appearance_parameters is not None:
+            if self._appearance_reconstructions is None:
+                self._appearance_reconstructions = []
+                for c in self.appearance_parameters:
+                    instance = self.algorithm.appearance_model.instance(c)
+                    self._appearance_reconstructions.append(instance)
+            return self._appearance_reconstructions
+        else:
+            raise ValueError('appearance_reconstructions is not well '
+                             'defined for the chosen AAM algorithm: '
+                             '{}'.format(self.algorithm.__class__))
+
+    @property
+    def costs(self):
+        if self._cost_functions is not None:
+            if self._costs is None:
+                self._costs = [f() for f in self._cost_functions]
+            return self._costs
+        else:
+            raise ValueError('costs is not well '
+                             'defined for the chosen AAM algorithm: '
+                             '{}'.format(self.algorithm.__class__))
+
+
+# TODO: document me!
+class LinearAAMAlgorithmResult(AAMAlgorithmResult):
+    r"""
+    """
+    @property
+    def shapes(self, as_points=False):
+        return [self.algorithm.transform.from_vector(p).sparse_target
+                for p in self.shape_parameters]
+
+    @property
+    def final_shape(self):
+        return self.final_transform.sparse_target
+
+    @property
+    def initial_shape(self):
+        return self.initial_transform.sparse_target
+
+
+# TODO: document me!
+class AAMFitterResult(MultiFitterResult):
+    r"""
+    """
+    def __init__(self, image, fitter, algorithm_results, affine_correction,
+                 gt_shape=None):
+        super(AAMFitterResult, self).__init__(
+            image, fitter, algorithm_results, affine_correction,
+            gt_shape=gt_shape)
+        self._warped_images = None
+
+    @property
+    def warped_images(self):
+        if self._warped_images is None:
+            algorithm = self.algorithm_results[-1].algorithm
+            self._warped_images = []
+            for s in self.shapes:
+                algorithm.transform.set_target(s)
+                self._warped_images.append(
+                    algorithm.interface.warp(self.image))
+        return self._warped_images
+
+    @property
+    def appearance_reconstructions(self):
+        reconstructions = self.algorithm_results[0].appearance_reconstructions
+        if reconstructions is not None:
+            for a in self.algorithm_results[1:]:
+                reconstructions = (reconstructions +
+                                   a.appearance_reconstructions)
+        return reconstructions
+
+    @property
+    def costs(self):
+        costs = []
+        for a in self.algorithm_results:
+            costs += a.costs
+        return costs
diff --git a/menpofit/atm/__init__.py b/menpofit/atm/__init__.py
index 304b4d3..6705775 100644
--- a/menpofit/atm/__init__.py
+++ b/menpofit/atm/__init__.py
@@ -1,3 +1,3 @@
-from .base import ATM, PatchBasedATM
-from .builder import ATMBuilder, PatchBasedATMBuilder
+from .base import HolisticATM, PatchATM, MaskedATM, LinearATM, LinearMaskedATM
 from .fitter import LucasKanadeATMFitter
+from .algorithm import ForwardCompositional, InverseCompositional
diff --git a/menpofit/atm/algorithm.py b/menpofit/atm/algorithm.py
new file mode 100644
index 0000000..0e5d5db
--- /dev/null
+++ b/menpofit/atm/algorithm.py
@@ -0,0 +1,201 @@
+from __future__ import division
+import numpy as np
+from menpofit.aam.algorithm.lk import (LucasKanadeBaseInterface,
+                                       LucasKanadePatchBaseInterface)
+from .result import ATMAlgorithmResult, LinearATMAlgorithmResult
+
+
+# TODO document me!
+class ATMLKStandardInterface(LucasKanadeBaseInterface):
+    r"""
+    """
+    def __init__(self, transform, template, sampling=None):
+        super(ATMLKStandardInterface, self).__init__(transform, template,
+                                                     sampling=sampling)
+
+    def algorithm_result(self, image, shape_parameters, cost_functions=None,
+                         gt_shape=None):
+        return ATMAlgorithmResult(
+            image, self, shape_parameters,
+            cost_functions=cost_functions, gt_shape=gt_shape)
+
+
+# TODO document me!
+class ATMLKLinearInterface(ATMLKStandardInterface):
+    r"""
+    """
+    @property
+    def shape_model(self):
+        return self.transform.model
+
+    def algorithm_result(self, image, shape_parameters, cost_functions=None,
+                         gt_shape=None):
+        return LinearATMAlgorithmResult(
+            image, self, shape_parameters,
+            cost_functions=cost_functions, gt_shape=gt_shape)
+
+
+# TODO document me!
+class ATMLKPatchInterface(LucasKanadePatchBaseInterface):
+    r"""
+    """
+    def algorithm_result(self, image, shape_parameters, cost_functions=None,
+                         gt_shape=None):
+        return ATMAlgorithmResult(
+            image, self, shape_parameters,
+            cost_functions=cost_functions, gt_shape=gt_shape)
+
+
+# TODO document me!
+class LucasKanade(object):
+
+    def __init__(self, atm_interface, eps=10**-5):
+        self.eps = eps
+        self.interface = atm_interface
+        self._precompute()
+
+    @property
+    def transform(self):
+        return self.interface.transform
+
+    @property
+    def template(self):
+        return self.interface.template
+
+    def _precompute(self):
+        # grab number of shape and appearance parameters
+        self.n = self.transform.n_parameters
+
+        # vectorize template and mask it
+        self.t_m = self.template.as_vector()[self.interface.i_mask]
+
+        # compute warp jacobian
+        self.dW_dp = self.interface.warp_jacobian()
+
+        # compute shape model prior
+        # TODO: Is this correct? It's like modelling no noise at all
+        noise_variance = self.interface.shape_model.noise_variance() or 1
+        s2 = 1.0 / noise_variance
+        L = self.interface.shape_model.eigenvalues
+        self.s2_inv_L = np.hstack((np.ones((4,)), s2 / L))
+
+
+# TODO document me!
+class Compositional(LucasKanade):
+    r"""
+    Abstract Interface for Compositional ATM algorithms
+    """
+    def run(self, image, initial_shape, gt_shape=None, max_iters=20,
+            map_inference=False):
+        # define cost closure
+        def cost_closure(x):
+            return lambda: x.T.dot(x)
+
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Compositional Gauss-Newton loop -------------------------------------
+
+        # warp image
+        self.i = self.interface.warp(image)
+        # vectorize it and mask it
+        i_m = self.i.as_vector()[self.interface.i_mask]
+
+        # compute masked error
+        self.e_m = i_m - self.t_m
+
+        # update cost
+        cost_functions = [cost_closure(self.e_m)]
+
+        while k < max_iters and eps > self.eps:
+            # solve for increments on the shape parameters
+            self.dp = self._solve(map_inference)
+
+            # update warp
+            s_k = self.transform.target.points
+            self._update_warp()
+            p_list.append(self.transform.as_vector())
+
+            # warp image
+            self.i = self.interface.warp(image)
+            # vectorize it and mask it
+            i_m = self.i.as_vector()[self.interface.i_mask]
+
+            # compute masked error
+            self.e_m = i_m - self.t_m
+
+            # update cost
+            cost_functions.append(cost_closure(self.e_m))
+
+            # test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # increase iteration counter
+            k += 1
+
+        # return algorithm result
+        return self.interface.algorithm_result(
+            image, p_list, cost_functions=cost_functions, gt_shape=gt_shape)
+
+
+# TODO document me!
+class ForwardCompositional(Compositional):
+    r"""
+    Forward Compositional (FC) Gauss-Newton algorithm
+    """
+    def _solve(self, map_inference):
+        # compute warped image gradient
+        nabla_i = self.interface.gradient(self.i)
+        # compute masked forward Jacobian
+        J_m = self.interface.steepest_descent_images(nabla_i, self.dW_dp)
+        # compute masked forward Hessian
+        JJ_m = J_m.T.dot(J_m)
+        # solve for increments on the shape parameters
+        if map_inference:
+            return self.interface.solve_shape_map(
+                JJ_m, J_m, self.e_m,  self.s2_inv_L,
+                self.transform.as_vector())
+        else:
+            return self.interface.solve_shape_ml(JJ_m, J_m, self.e_m)
+
+    def _update_warp(self):
+        # update warp based on forward composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() + self.dp)
+
+
+# TODO document me!
+class InverseCompositional(Compositional):
+    r"""
+    Inverse Compositional (IC) Gauss-Newton algorithm
+    """
+    def _precompute(self):
+        # call super method
+        super(InverseCompositional, self)._precompute()
+        # compute appearance model mean gradient
+        nabla_t = self.interface.gradient(self.template)
+        # compute masked inverse Jacobian
+        self.J_m = self.interface.steepest_descent_images(-nabla_t, self.dW_dp)
+        # compute masked inverse Hessian
+        self.JJ_m = self.J_m.T.dot(self.J_m)
+        # compute masked Jacobian pseudo-inverse
+        self.pinv_J_m = np.linalg.solve(self.JJ_m, self.J_m.T)
+
+    def _solve(self, map_inference):
+        # solve for increments on the shape parameters
+        if map_inference:
+            return self.interface.solve_shape_map(
+                self.JJ_m, self.J_m, self.e_m, self.s2_inv_L,
+                self.transform.as_vector())
+        else:
+            return -self.pinv_J_m.dot(self.e_m)
+
+    def _update_warp(self):
+        # update warp based on inverse composition
+        self.transform.from_vector_inplace(
+            self.transform.as_vector() - self.dp)
diff --git a/menpofit/atm/base.py b/menpofit/atm/base.py
index 1ebee48..3b53565 100644
--- a/menpofit/atm/base.py
+++ b/menpofit/atm/base.py
@@ -1,108 +1,234 @@
 from __future__ import division
-
+from copy import deepcopy
+import warnings
 import numpy as np
-from menpo.shape import TriMesh
-
-from menpofit.base import DeformableModel, name_of_callable
-from menpofit.aam.builder import (build_patch_reference_frame,
-                                  build_reference_frame)
-
-
-class ATM(DeformableModel):
+from menpo.feature import no_op
+from menpo.visualize import print_dynamic
+from menpo.transform import Scale
+from menpo.shape import mean_pointcloud
+from menpofit import checks
+from menpofit.transform import (DifferentiableThinPlateSplines,
+                                DifferentiablePiecewiseAffine)
+from menpofit.base import name_of_callable, batch
+from menpofit.builder import (
+    build_reference_frame, build_patch_reference_frame,
+    compute_features, scale_images, build_shape_model, warp_images,
+    align_shapes, densify_shapes,
+    extract_patches, MenpoFitBuilderWarning, compute_reference_shape)
+
+
+# TODO: document me!
+class ATM(object):
     r"""
     Active Template Model class.
+    """
+    def __init__(self, template, shapes, group=None, verbose=False,
+                 reference_shape=None, holistic_features=no_op,
+                 transform=DifferentiablePiecewiseAffine, diagonal=None,
+                 scales=(0.5, 1.0), max_shape_components=None,
+                 batch_size=None):
+
+        checks.check_diagonal(diagonal)
+        n_scales = len(scales)
+        scales = checks.check_scales(scales)
+        holistic_features = checks.check_features(holistic_features, n_scales)
+        max_shape_components = checks.check_max_components(
+            max_shape_components, n_scales, 'max_shape_components')
+
+        self.holistic_features = holistic_features
+        self.transform = transform
+        self.diagonal = diagonal
+        self.scales = scales
+        self.max_shape_components = max_shape_components
+        self.reference_shape = reference_shape
+        self.shape_models = []
+        self.warped_templates = []
 
-    Parameters
-    -----------
-    shape_models : :map:`PCAModel` list
-        A list containing the shape models of the ATM.
-
-    warped_templates : :map:`MaskedImage` list
-        A list containing the warped templates models of the ATM.
-
-    n_training_shapes: `int`
-        The number of training shapes used to build the ATM.
-
-    transform : :map:`PureAlignmentTransform`
-        The transform used to warp the images from which the AAM was
-        constructed.
-
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-        Note that from our experience, this approach of extracting features
-        once and then creating a pyramid on top tends to lead to better
-        performing AAMs.
-
-    reference_shape : :map:`PointCloud`
-        The reference shape that was used to resize all training images to a
-        consistent object size.
-
-    downscale : `float`
-        The downscale factor that was used to create the different pyramidal
-        levels.
-
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames are the mean shapes of each pyramid
-        level, so the shape models are scaled.
+        # Train ATM
+        self._train(template, shapes, increment=False, group=group,
+                    verbose=verbose, batch_size=batch_size)
 
-        If ``False``, the reference frames of all levels are the mean shape of
-        the highest level, so the shape models are not scaled; they have the
-        same size.
+    def _train(self, template, shapes, increment=False, group=None,
+               shape_forgetting_factor=1.0, verbose=False, batch_size=None):
+        r"""
+        """
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        if batch_size is not None:
+            # Create a generator of fixed sized batches. Will still work even
+            # on an infinite list.
+            shape_batches = batch(shapes, batch_size)
+        else:
+            shape_batches = [list(shapes)]
+
+        for k, shape_batch in enumerate(shape_batches):
+            if k == 0:
+                # Rescale the template the reference shape
+                if self.reference_shape is None:
+                    # If no reference shape was given, use the mean of the first
+                    # batch
+                    if batch_size is not None:
+                        warnings.warn('No reference shape was provided. The '
+                                      'mean of the first batch will be the '
+                                      'reference shape. If the batch mean is '
+                                      'not representative of the true mean, '
+                                      'this may cause issues.',
+                                      MenpoFitBuilderWarning)
+                    checks.check_trilist(shape_batch[0], self.transform)
+                    self.reference_shape = compute_reference_shape(
+                        shape_batch, self.diagonal, verbose=verbose)
+
+                # Rescale the template the reference shape
+                template = template.rescale_to_pointcloud(
+                    self.reference_shape, group=group)
+
+            # After the first batch, we are incrementing the model
+            if k > 0:
+                increment = True
+
+            if verbose:
+                print('Computing batch {}'.format(k))
+
+            # Train each batch
+            self._train_batch(template, shape_batch, increment=increment,
+                              group=group,
+                              shape_forgetting_factor=shape_forgetting_factor,
+                              verbose=verbose)
+
+    def _train_batch(self, template, shape_batch, increment=False, group=None,
+                     shape_forgetting_factor=1.0, verbose=False):
+        r"""
+        Builds an Active Template Model from a list of landmarked images.
+        """
+        # build models at each scale
+        if verbose:
+            print_dynamic('- Building models\n')
+
+        feature_images = []
+        # for each scale (low --> high)
+        for j in range(self.n_scales):
+            if verbose:
+                if len(self.scales) > 1:
+                    scale_prefix = '  - Scale {}: '.format(j)
+                else:
+                    scale_prefix = '  - '
+            else:
+                scale_prefix = None
+
+            # Handle features
+            if j == 0 or self.holistic_features[j] is not self.holistic_features[j - 1]:
+                # Compute features only if this is the first pass through
+                # the loop or the features at this scale are different from
+                # the features at the previous scale
+                feature_images = compute_features([template],
+                                                  self.holistic_features[j],
+                                                  prefix=scale_prefix,
+                                                  verbose=verbose)
+            # handle scales
+            if self.scales[j] != 1:
+                # Scale feature images only if scale is different than 1
+                scaled_images = scale_images(feature_images, self.scales[j],
+                                             prefix=scale_prefix,
+                                             verbose=verbose)
+                # Extract potentially rescaled shapes
+                scale_transform = Scale(scale_factor=self.scales[j],
+                                        n_dims=2)
+                scale_shapes = [scale_transform.apply(s)
+                                for s in shape_batch]
+            else:
+                scaled_images = feature_images
+                scale_shapes = shape_batch
 
-        Note that from our experience, if scaled_shape_models is ``False``, AAMs
-        tend to have slightly better performance.
+            # Build the shape model
+            if verbose:
+                print_dynamic('{}Building shape model'.format(scale_prefix))
 
-    """
-    def __init__(self, shape_models, warped_templates, n_training_shapes,
-                 transform, features, reference_shape, downscale,
-                 scaled_shape_models):
-        DeformableModel.__init__(self, features)
-        self.n_training_shapes = n_training_shapes
-        self.shape_models = shape_models
-        self.warped_templates = warped_templates
-        self.transform = transform
-        self.reference_shape = reference_shape
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
+            if not increment:
+                if j == 0:
+                    shape_model = self._build_shape_model(scale_shapes, j)
+                    self.shape_models.append(shape_model)
+                else:
+                    self.shape_models.append(deepcopy(shape_model))
+            else:
+                self._increment_shape_model(
+                    scale_shapes,  self.shape_models[j],
+                    forgetting_factor=shape_forgetting_factor)
+
+            # Obtain warped images - we use a scaled version of the
+            # reference shape, computed here. This is because the mean
+            # moves when we are incrementing, and we need a consistent
+            # reference frame.
+            scaled_reference_shape = Scale(self.scales[j], n_dims=2).apply(
+                self.reference_shape)
+            warped_template = self._warp_template(scaled_images[0], group,
+                                                  scaled_reference_shape,
+                                                  j, scale_prefix, verbose)
+            self.warped_templates.append(warped_template[0])
+
+            if verbose:
+                print_dynamic('{}Done\n'.format(scale_prefix))
+
+        # Because we just copy the shape model, we need to wait to trim
+        # it after building each model. This ensures we can have a different
+        # number of components per level
+        for j, sm in enumerate(self.shape_models):
+            max_sc = self.max_shape_components[j]
+            if max_sc is not None:
+                sm.trim_components(max_sc)
+
+    def increment(self, template, shapes, group=None, verbose=False,
+                  shape_forgetting_factor=1.0, batch_size=None):
+        return self._train(template, shapes, group=group,
+                           verbose=verbose,
+                           shape_forgetting_factor=shape_forgetting_factor,
+                           increment=True, batch_size=batch_size)
+
+    def _build_shape_model(self, shapes, scale_index):
+        return build_shape_model(shapes)
+
+    def _increment_shape_model(self, shapes, shape_model,
+                               forgetting_factor=1.0):
+        # Compute aligned shapes
+        aligned_shapes = align_shapes(shapes)
+        # Increment shape model
+        shape_model.increment(aligned_shapes,
+                              forgetting_factor=forgetting_factor)
+
+    def _warp_template(self, template, group, reference_shape, scale_index,
+                       prefix, verbose):
+        reference_frame = build_reference_frame(reference_shape)
+        shape = template.landmarks[group].lms
+        return warp_images([template], [shape], reference_frame, self.transform,
+                           prefix=prefix, verbose=verbose)
 
     @property
-    def n_levels(self):
+    def n_scales(self):
         """
-        The number of multi-resolution pyramidal levels of the ATM.
+        The number of scales of the AAM.
 
         :type: `int`
         """
-        return len(self.warped_templates)
+        return len(self.scales)
 
-    def instance(self, shape_weights=None, level=-1):
+    @property
+    def _str_title(self):
         r"""
-        Generates a novel ATM instance given a set of shape weights. If no
-        weights are provided, the mean shape instance is returned.
-
-        Parameters
-        -----------
-        shape_weights : ``(n_weights,)`` `ndarray` or `float` list
-            Weights of the shape model that will be used to create
-            a novel shape instance. If ``None``, the mean shape
-            ``(shape_weights = [0, 0, ..., 0])`` is used.
-
-        level : `int`, optional
-            The pyramidal level to be used.
+        Returns a string containing name of the model.
+        :type: `string`
+        """
+        return 'Holistic Active Template Model'
 
+    def instance(self, shape_weights=None, scale_index=-1):
+        r"""
         Returns
         -------
         image : :map:`Image`
-            The novel ATM instance.
+            The novel AAM instance.
         """
-        sm = self.shape_models[level]
+        sm = self.shape_models[scale_index]
+        template = self.warped_templates[scale_index]
 
         # TODO: this bit of logic should to be transferred down to PCAModel
         if shape_weights is None:
@@ -111,37 +237,36 @@ def instance(self, shape_weights=None, level=-1):
         shape_weights *= sm.eigenvalues[:n_shape_weights] ** 0.5
         shape_instance = sm.instance(shape_weights)
 
-        return self._instance(level, shape_instance)
+        return self._instance(shape_instance, template)
 
-    def random_instance(self, level=-1):
+    def random_instance(self, scale_index=-1):
         r"""
         Generates a novel random instance of the ATM.
 
         Parameters
         -----------
-        level : `int`, optional
-            The pyramidal level to be used.
+        scale_index : `int`, optional
+            The scale to be used.
 
         Returns
         -------
         image : :map:`Image`
-            The novel ATM instance.
+            The novel AAM instance.
         """
-        sm = self.shape_models[level]
+        sm = self.shape_models[scale_index]
+        template = self.warped_templates[scale_index]
 
         # TODO: this bit of logic should to be transferred down to PCAModel
         shape_weights = (np.random.randn(sm.n_active_components) *
                          sm.eigenvalues[:sm.n_active_components]**0.5)
         shape_instance = sm.instance(shape_weights)
 
-        return self._instance(level, shape_instance)
+        return self._instance(shape_instance, template)
 
-    def _instance(self, level, shape_instance):
-        template = self.warped_templates[level]
+    def _instance(self, shape_instance, template):
         landmarks = template.landmarks['source'].lms
 
-        reference_frame = self._build_reference_frame(
-            shape_instance, landmarks)
+        reference_frame = build_reference_frame(shape_instance)
 
         transform = self.transform(
             reference_frame.landmarks['source'].lms, landmarks)
@@ -149,271 +274,346 @@ def _instance(self, level, shape_instance):
         return template.as_unmasked(copy=False).warp_to_mask(
             reference_frame.mask, transform, warp_landmarks=True)
 
-    def _build_reference_frame(self, reference_shape, landmarks):
-        if type(landmarks) == TriMesh:
-            trilist = landmarks.trilist
-        else:
-            trilist = None
-        return build_reference_frame(
-            reference_shape, trilist=trilist)
-
-    @property
-    def _str_title(self):
-        r"""
-        Returns a string containing name of the model.
-
-        :type: `string`
-        """
-        return 'Active Template Model'
-
-    def view_shape_models_widget(self, n_parameters=5, mode='multiple',
+    def view_shape_models_widget(self, n_parameters=5,
                                  parameters_bounds=(-3.0, 3.0),
-                                 figure_size=(10, 8), style='coloured'):
+                                 mode='multiple', figure_size=(10, 8)):
         r"""
-        Visualizes the shape models of the ATM object using the
+        Visualizes the shape models of the AAM object using the
         `menpo.visualize.widgets.visualize_shape_model` widget.
 
         Parameters
         -----------
         n_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the parameters
-            sliders. If `int`, then the number of sliders per level is the
-            minimum between `n_parameters` and the number of active components
-            per level. If `list` of `int`, then a number of sliders is defined
-            per level. If ``None``, all the active components per level will
-            have a slider.
-        mode : {``'single'``, ``'multiple'``}, optional
-            If ``'single'``, then only a single slider is constructed along with
-            a drop down menu. If ``'multiple'``, then a slider is constructed
-            for each parameter.
+            The number of shape principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per level is the minimum
+            between `n_parameters` and the number of active components per
+            level.
+            If `list` of `int`, then a number of sliders is defined per level.
+            If ``None``, all the active components per level will have a slider.
         parameters_bounds : (`float`, `float`), optional
             The minimum and maximum bounds, in std units, for the sliders.
+        mode : {``single``, ``multiple``}, optional
+            If ``'single'``, only a single slider is constructed along with a
+            drop down menu.
+            If ``'multiple'``, a slider is constructed for each parameter.
         figure_size : (`int`, `int`), optional
             The size of the plotted figures.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
         """
         from menpofit.visualize import visualize_shape_model
-        visualize_shape_model(
-            self.shape_models, n_parameters=n_parameters,
-            parameters_bounds=parameters_bounds, figure_size=figure_size,
-            mode=mode, style=style)
-
-    def view_atm_widget(self, n_shape_parameters=5, mode='multiple',
-                        parameters_bounds=(-3.0, 3.0), figure_size=(10, 8),
-                        style='coloured'):
+        visualize_shape_model(self.shape_models, n_parameters=n_parameters,
+                              parameters_bounds=parameters_bounds,
+                              figure_size=figure_size, mode=mode)
+
+    # TODO: fix me!
+    def view_atm_widget(self, n_shape_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
         r"""
-        Visualizes the ATM object using the
-        menpo.visualize.widgets.visualize_atm widget.
-
+        Visualizes both the shape and appearance models of the AAM object using
+        the `menpo.visualize.widgets.visualize_aam` widget.
         Parameters
         -----------
-        n_shape_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the shape
-            parameters sliders. If `int`, then the number of sliders per level
-            is the minimum between `n_parameters` and the number of active
-            components per level. If `list` of `int`, then a number of sliders
-            is defined per level. If ``None``, all the active components per
-            level will have a slider.
-        mode : {``'single'``, ``'multiple'``}, optional
-            If ``'single'``, then only a single slider is constructed along with
-            a drop down menu. If ``'multiple'``, then a slider is constructed
-            for each parameter.
+        n_shape_parameters : `int` or `list` of `int` or None, optional
+            The number of shape principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per scale is the minimum
+            between `n_parameters` and the number of active components per
+            scale.
+            If `list` of `int`, then a number of sliders is defined per scale.
+            If ``None``, all the active components per scale will have a slider.
+        n_appearance_parameters : `int` or `list` of `int` or None, optional
+            The number of appearance principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per scale is the minimum
+            between `n_parameters` and the number of active components per
+            scale.
+            If `list` of `int`, then a number of sliders is defined per scale.
+            If ``None``, all the active components per scale will have a slider.
         parameters_bounds : (`float`, `float`), optional
             The minimum and maximum bounds, in std units, for the sliders.
+        mode : {``single``, ``multiple``}, optional
+            If ``'single'``, only a single slider is constructed along with a
+            drop down menu.
+            If ``'multiple'``, a slider is constructed for each parameter.
         figure_size : (`int`, `int`), optional
             The size of the plotted figures.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
         """
         from menpofit.visualize import visualize_atm
         visualize_atm(self, n_shape_parameters=n_shape_parameters,
                       parameters_bounds=parameters_bounds,
-                      figure_size=figure_size, mode=mode, style=style)
+                      figure_size=figure_size, mode=mode)
 
     def __str__(self):
-        out = "{}\n - {} training shapes.\n".format(self._str_title,
-                                                    self.n_training_shapes)
-        # small strings about number of channels, channels string and downscale
-        n_channels = []
-        down_str = []
-        for j in range(self.n_levels):
-            n_channels.append(
-                self.warped_templates[j].n_channels)
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(
-                name_of_callable(self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
-        else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                feat_str.append("- Feature is {} with ".format(
-                    name_of_callable(self.features[j])))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
-                else:
-                    ch_str.append("channels")
-        out = "{} - {} Warp.\n".format(out, name_of_callable(self.transform))
-        if self.n_levels > 1:
-            if self.scaled_shape_models:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}.\n   - Each level has a scaled shape " \
-                      "model (reference frame).\n".format(out, self.n_levels,
-                                                          self.downscale)
+        return _atm_str(self)
 
-            else:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}:\n   - Shape models (reference frames) " \
-                      "are not scaled.\n".format(out, self.n_levels,
-                                                 self.downscale)
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
-                if not self.scaled_shape_models:
-                    out = "{}   - Reference frames of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                          out,
-                          self.warped_templates[0].n_true_pixels() *
-                          n_channels[0],
-                          self.warped_templates[0].n_true_pixels(),
-                          n_channels[0],
-                          self.warped_templates[0]._str_shape,
-                          n_channels[0])
-            else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-            for i in range(self.n_levels - 1, -1, -1):
-                out = "{}   - Level {} {}: \n".format(out, self.n_levels - i,
-                                                      down_str[i])
-                if not self.pyramid_on_features:
-                    out = "{}     {}{} {} per image.\n".format(
-                        out, feat_str[i], n_channels[i], ch_str[i])
-                if (self.scaled_shape_models or
-                        (not self.pyramid_on_features)):
-                    out = "{}     - Reference frame of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                          out,
-                          self.warped_templates[i].n_true_pixels() *
-                                                                  n_channels[i],
-                          self.warped_templates[i].n_true_pixels(),
-                          n_channels[i],
-                          self.warped_templates[i]._str_shape,
-                          n_channels[i])
-                out = "{0}     - {1} shape components ({2:.2f}% of " \
-                      "variance)\n".format(
-                      out, self.shape_models[i].n_components,
-                      self.shape_models[i].variance_ratio() * 100)
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n" \
-                  "   - Reference frame of length {4} ({5} x {6}C, " \
-                  "{7} x {8}C)\n   - {9} shape components ({10:.2f}% of " \
-                  "variance)\n".format(
-                  out, feat_str[0], n_channels[0], ch_str[0],
-                  self.warped_templates[0].n_true_pixels() * n_channels[0],
-                  self.warped_templates[0].n_true_pixels(),
-                  n_channels[0],
-                  self.warped_templates[0]._str_shape,
-                  n_channels[0], self.shape_models[0].n_components,
-                  self.shape_models[0].variance_ratio() * 100)
-        return out
-
-
-class PatchBasedATM(ATM):
+
+# TODO: document me!
+class MaskedATM(ATM):
     r"""
-    Patch Based Active Template Model class.
+    Masked Based Active Appearance Model class.
+    """
 
-    Parameters
-    -----------
-    shape_models : :map:`PCAModel` list
-        A list containing the shape models of the ATM.
+    def __init__(self, template, shapes, group=None, verbose=False,
+                 holistic_features=no_op, diagonal=None, scales=(0.5, 1.0),
+                 patch_size=(17, 17), max_shape_components=None,
+                 batch_size=None):
+        self.patch_size = checks.check_patch_size(patch_size, len(scales))
+
+        super(MaskedATM, self).__init__(
+            template, shapes, group=group, verbose=verbose,
+            holistic_features=holistic_features,
+            transform=DifferentiableThinPlateSplines, diagonal=diagonal,
+            scales=scales,  max_shape_components=max_shape_components,
+            batch_size=batch_size)
+
+    def _warp_template(self, template, group, reference_shape, scale_index,
+                       prefix, verbose):
+        reference_frame = build_patch_reference_frame(
+            reference_shape, patch_size=self.patch_size[scale_index])
+        shape = template.landmarks[group].lms
+        return warp_images([template], [shape], reference_frame, self.transform,
+                           prefix=prefix, verbose=verbose)
 
-    warped_templates : :map:`MaskedImage` list
-        A list containing the warped templates models of the ATM.
+    @property
+    def _str_title(self):
+        return 'Masked Active Template Model'
 
-    n_training_shapes: `int`
-        The number of training shapes used to build the ATM.
+    def _instance(self, shape_instance, template):
+        landmarks = template.landmarks['source'].lms
 
-    patch_shape : tuple of `int`
-        The shape of the patches used to build the Patch Based ATM.
+        reference_frame = build_patch_reference_frame(
+            shape_instance, patch_size=self.patch_size)
 
-    transform : :map:`PureAlignmentTransform`
-        The transform used to warp the images from which the ATM was
-        constructed.
+        transform = self.transform(
+            reference_frame.landmarks['source'].lms, landmarks)
 
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
+        return template.as_unmasked().warp_to_mask(
+            reference_frame.mask, transform, warp_landmarks=True)
 
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
+    def __str__(self):
+        return _atm_str(self)
 
-        Note that from our experience, this approach of extracting features
-        once and then creating a pyramid on top tends to lead to better
-        performing AAMs.
 
-    reference_shape : :map:`PointCloud`
-        The reference shape that was used to resize all training images to a
-        consistent object size.
+# TODO: document me!
+class LinearATM(ATM):
+    r"""
+    Linear Active Template Model class.
+    """
 
-    downscale : `float`
-        The downscale factor that was used to create the different pyramidal
-        levels.
+    def __init__(self, template, shapes, group=None, verbose=False,
+                 holistic_features=no_op,
+                 transform=DifferentiableThinPlateSplines, diagonal=None,
+                 scales=(0.5, 1.0), max_shape_components=None, batch_size=None):
 
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames are the mean shapes of each pyramid
-        level, so the shape models are scaled.
+        super(LinearATM, self).__init__(
+            template, shapes, group=group, verbose=verbose,
+            holistic_features=holistic_features, transform=transform,
+            diagonal=diagonal, scales=scales,
+            max_shape_components=max_shape_components, batch_size=batch_size)
+
+    @property
+    def _str_title(self):
+        r"""
+        Returns a string containing name of the model.
+        :type: `string`
+        """
+        return 'Linear Active Template Model'
+
+    def _build_shape_model(self, shapes, scale_index):
+        mean_aligned_shape = mean_pointcloud(align_shapes(shapes))
+        self.n_landmarks = mean_aligned_shape.n_points
+        self.reference_frame = build_reference_frame(mean_aligned_shape)
+        dense_shapes = densify_shapes(shapes, self.reference_frame,
+                                      self.transform)
+        # build dense shape model
+        shape_model = build_shape_model(dense_shapes)
+        return shape_model
+
+    def _increment_shape_model(self, shapes, shape_model,
+                               forgetting_factor=1.0):
+        aligned_shapes = align_shapes(shapes)
+        dense_shapes = densify_shapes(aligned_shapes, self.reference_frame,
+                                      self.transform)
+        # Increment shape model
+        shape_model.increment(dense_shapes,
+                              forgetting_factor=forgetting_factor)
+
+    def _warp_template(self, template, group, reference_shape, scale_index,
+                       prefix, verbose):
+        shape = template.landmarks[group].lms
+        return warp_images([template], [shape], self.reference_frame,
+                           self.transform, prefix=prefix,
+                           verbose=verbose)
+
+    # TODO: implement me!
+    def _instance(self, shape_instance, template):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_atm_widget(self, n_shape_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
+        raise NotImplemented
 
-        If ``False``, the reference frames of all levels are the mean shape of
-        the highest level, so the shape models are not scaled; they have the
-        same size.
+    def __str__(self):
+        return _atm_str(self)
 
-        Note that from our experience, if ``scaled_shape_models`` is ``False``,
-        AAMs tend to have slightly better performance.
 
+# TODO: document me!
+class LinearMaskedATM(ATM):
+    r"""
+    Linear Masked Active Template Model class.
     """
-    def __init__(self, shape_models, warped_templates, n_training_shapes,
-                 patch_shape, transform, features, reference_shape,
-                 downscale, scaled_shape_models):
-        super(PatchBasedATM, self).__init__(
-            shape_models, warped_templates, n_training_shapes, transform,
-            features, reference_shape, downscale, scaled_shape_models)
-        self.patch_shape = patch_shape
-
-    def _build_reference_frame(self, reference_shape, landmarks):
-        return build_patch_reference_frame(
-            reference_shape, patch_shape=self.patch_shape)
+
+    def __init__(self, template, shapes, group=None, verbose=False,
+                 holistic_features=no_op, diagonal=None, scales=(0.5, 1.0),
+                 patch_size=(17, 17), max_shape_components=None,
+                 batch_size=None):
+        self.patch_size = checks.check_patch_size(patch_size, len(scales))
+
+        super(LinearMaskedATM, self).__init__(
+            template, shapes, group=group, verbose=verbose,
+            holistic_features=holistic_features,
+            transform=DifferentiableThinPlateSplines, diagonal=diagonal,
+            scales=scales,  max_shape_components=max_shape_components,
+            batch_size=batch_size)
 
     @property
     def _str_title(self):
         r"""
         Returns a string containing name of the model.
+        :type: `string`
+        """
+        return 'Linear Masked Active Template Model'
+
+    def _build_shape_model(self, shapes, scale_index):
+        mean_aligned_shape = mean_pointcloud(align_shapes(shapes))
+        self.n_landmarks = mean_aligned_shape.n_points
+        self.reference_frame = build_patch_reference_frame(
+            mean_aligned_shape, patch_size=self.patch_size[scale_index])
+        dense_shapes = densify_shapes(shapes, self.reference_frame,
+                                      self.transform)
+        # build dense shape model
+        shape_model = build_shape_model(dense_shapes)
+        return shape_model
+
+    def _increment_shape_model(self, shapes, shape_model,
+                               forgetting_factor=1.0):
+        aligned_shapes = align_shapes(shapes)
+        dense_shapes = densify_shapes(aligned_shapes, self.reference_frame,
+                                      self.transform)
+        # Increment shape model
+        shape_model.increment(dense_shapes,
+                              forgetting_factor=forgetting_factor)
+
+    def _warp_template(self, template, group, reference_shape, scale_index,
+                       prefix, verbose):
+        shape = template.landmarks[group].lms
+        return warp_images([template], [shape], self.reference_frame,
+                           self.transform, prefix=prefix,
+                           verbose=verbose)
+
+    # TODO: implement me!
+    def _instance(self, shape_instance, template):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_atm_widget(self, n_shape_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
+        raise NotImplemented
 
+    def __str__(self):
+        return _atm_str(self)
+
+
+# TODO: document me!
+# TODO: implement offsets support?
+class PatchATM(ATM):
+    r"""
+    Patch-based Active Template Model class.
+    """
+
+    def __init__(self, template, shapes, group=None, verbose=False,
+                 holistic_features=no_op, patch_normalisation=no_op,
+                 diagonal=None, scales=(0.5, 1.0), patch_size=(17, 17),
+                 max_shape_components=None, batch_size=None):
+        self.patch_size = checks.check_patch_size(patch_size, len(scales))
+        self.patch_normalisation = patch_normalisation
+
+        super(PatchATM, self).__init__(
+            template, shapes, group=group, verbose=verbose,
+            holistic_features=holistic_features,
+            transform=DifferentiableThinPlateSplines, diagonal=diagonal,
+            scales=scales,  max_shape_components=max_shape_components,
+            batch_size=batch_size)
+
+    @property
+    def _str_title(self):
+        r"""
+        Returns a string containing name of the model.
         :type: `string`
         """
-        return 'Patch-Based Active Template Model'
+        return 'Patch-based Active Template Model'
+
+    def _warp_template(self, template, group, reference_shape, scale_index,
+                       prefix, verbose):
+        shape = template.landmarks[group].lms
+        return extract_patches([template], [shape],
+                               self.patch_size[scale_index],
+                               normalise_function=self.patch_normalisation,
+                               prefix=prefix, verbose=verbose)
+
+    # TODO: implement me!
+    def _instance(self, shape_instance, template):
+        raise NotImplemented
+
+    # TODO: implement me!
+    def view_atm_widget(self, n_shape_parameters=5,
+                        parameters_bounds=(-3.0, 3.0), mode='multiple',
+                        figure_size=(10, 8)):
+        raise NotImplemented
 
     def __str__(self):
-        out = super(PatchBasedATM, self).__str__()
-        out_splitted = out.splitlines()
-        out_splitted[0] = self._str_title
-        out_splitted.insert(5, "   - Patch size is {}W x {}H.".format(
-            self.patch_shape[1], self.patch_shape[0]))
-        return '\n'.join(out_splitted)
+        return _atm_str(self)
+
+
+def _atm_str(atm):
+    if atm.diagonal is not None:
+        diagonal = atm.diagonal
+    else:
+        y, x = atm.reference_shape.range()
+        diagonal = np.sqrt(x ** 2 + y ** 2)
+
+    # Compute scale info strings
+    scales_info = []
+    lvl_str_tmplt = r"""  - Scale {}
+   - Holistic feature: {}
+   - Template shape: {}
+   - {} shape components"""
+    for k, s in enumerate(atm.scales):
+        scales_info.append(lvl_str_tmplt.format(
+            s, name_of_callable(atm.holistic_features[k]),
+            atm.warped_templates[k].shape,
+            atm.shape_models[k].n_components))
+    # Patch based ATM
+    if hasattr(atm, 'patch_size'):
+        for k in range(len(scales_info)):
+            scales_info[k] += '\n   - Patch size: {}'.format(
+                atm.patch_size[k])
+    scales_info = '\n'.join(scales_info)
+
+    cls_str = r"""{class_title}
+ - Images warped with {transform} transform
+ - Images scaled to diagonal: {diagonal:.2f}
+ - Scales: {scales}
+{scales_info}
+""".format(class_title=atm._str_title,
+           transform=name_of_callable(atm.transform),
+           diagonal=diagonal,
+           scales=atm.scales,
+           scales_info=scales_info)
+    return cls_str
+
+HolisticATM = ATM
diff --git a/menpofit/atm/builder.py b/menpofit/atm/builder.py
deleted file mode 100644
index 415bb95..0000000
--- a/menpofit/atm/builder.py
+++ /dev/null
@@ -1,486 +0,0 @@
-from __future__ import division
-
-from menpo.transform import Scale, ThinPlateSplines
-from menpofit.transform import DifferentiablePiecewiseAffine
-from menpo.feature import igo
-from menpo.visualize import print_dynamic
-
-from menpofit import checks
-from menpofit.aam.builder import (build_reference_frame,
-                                  build_patch_reference_frame)
-from menpofit.base import create_pyramid
-from menpofit.builder import (DeformableModelBuilder, build_shape_model,
-                              compute_reference_shape)
-
-
-class ATMBuilder(DeformableModelBuilder):
-    r"""
-    Class that builds Multilevel Active Template Models.
-
-    Parameters
-    ----------
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-        Note that from our experience, this approach of extracting features
-        once and then creating a pyramid on top tends to lead to better
-        performing AAMs.
-
-    transform : :map:`PureAlignmentTransform`, optional
-        The :map:`PureAlignmentTransform` that will be
-        used to warp the images.
-
-    trilist : ``(t, 3)`` `ndarray`, optional
-        Triangle list that will be used to build the reference frame. If
-        ``None``, defaults to performing Delaunay triangulation on the points.
-
-    normalization_diagonal : `int` >= ``20``, optional
-        During building an AAM, all images are rescaled to ensure that the
-        scale of their landmarks matches the scale of the mean shape.
-
-        If `int`, it ensures that the mean shape is scaled so that the diagonal
-        of the bounding box containing it matches the normalization_diagonal
-        value.
-
-        If ``None``, the mean shape is not rescaled.
-
-        Note that, because the reference frame is computed from the mean
-        landmarks, this kwarg also specifies the diagonal length of the
-        reference frame (provided that features computation does not change
-        the image size).
-
-    n_levels : `int` > 0, optional
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale : `float` >= ``1``, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(``n_levels``)
-
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames will be the mean shapes of
-        each pyramid level, so the shape models will be scaled.
-
-        If ``False``, the reference frames of all levels will be the mean shape
-        of the highest level, so the shape models will not be scaled; they will
-        have the same size.
-
-        Note that from our experience, if ``scaled_shape_models`` is ``False``,
-        AAMs tend to have slightly better performance.
-
-    max_shape_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of shape components is
-        defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        shape components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-
-            If `float`, it specifies the percentage of variance to be retained.
-
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    boundary : `int` >= ``0``, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    Returns
-    -------
-    atm : :map:`ATMBuilder`
-        The ATM Builder object
-
-    Raises
-    -------
-    ValueError
-        ``n_levels`` must be `int` > ``0``
-    ValueError
-        ``downscale`` must be >= ``1``
-    ValueError
-        ``normalization_diagonal`` must be >= ``20``
-    ValueError
-        ``max_shape_components`` must be ``None`` or an `int` > 0 or
-        a ``0`` <= `float` <= ``1`` or a list of those containing 1 or
-        ``n_levels`` elements
-    ValueError
-        ``features`` must be a `function` or a list of those
-        containing ``1`` or ``n_levels`` elements
-    """
-    def __init__(self, features=igo, transform=DifferentiablePiecewiseAffine,
-                 trilist=None, normalization_diagonal=None, n_levels=3,
-                 downscale=2, scaled_shape_models=True,
-                 max_shape_components=None, boundary=3):
-        # check parameters
-        checks.check_n_levels(n_levels)
-        checks.check_downscale(downscale)
-        checks.check_normalization_diagonal(normalization_diagonal)
-        checks.check_boundary(boundary)
-        max_shape_components = checks.check_max_components(
-            max_shape_components, n_levels, 'max_shape_components')
-        features = checks.check_features(features, n_levels)
-        # store parameters
-        self.features = features
-        self.transform = transform
-        self.trilist = trilist
-        self.normalization_diagonal = normalization_diagonal
-        self.n_levels = n_levels
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
-        self.max_shape_components = max_shape_components
-        self.boundary = boundary
-
-    def build(self, shapes, template, group=None, label=None, verbose=False):
-        r"""
-        Builds a Multilevel Active Template Model given a list of shapes and a
-        template image.
-
-        Parameters
-        ----------
-        shapes : list of :map:`PointCloud`
-            The set of shapes from which to build the shape model of the ATM.
-
-        template : :map:`Image` or subclass
-            The image to be used as template.
-
-        group : `string`, optional
-            The key of the landmark set of the template that should be used. If
-            ``None``, and if there is only one set of landmarks, this set will
-            be used.
-
-        label : `string`, optional
-            The label of the landmark manager of the template that you wish to
-            use. If ``None`` is passed, the convex hull of all landmarks is
-            used.
-
-        verbose : `boolean`, optional
-            Flag that controls information and progress printing.
-
-        Returns
-        -------
-        atm : :map:`ATM`
-            The ATM object. Shape and appearance models are stored from lowest
-            to highest level.
-        """
-        # compute reference_shape
-        self.reference_shape = compute_reference_shape(
-            shapes, self.normalization_diagonal, verbose=verbose)
-
-        # normalize the template size using the reference_shape scaling
-        if verbose:
-            print_dynamic('- Normalizing template size')
-        normalized_template = template.rescale_to_reference_shape(
-            self.reference_shape, group=group, label=label)
-
-        # create pyramid for template image
-        if verbose:
-            print_dynamic('- Creating template pyramid')
-        generator = create_pyramid([normalized_template], self.n_levels,
-                                   self.downscale, self.features)
-
-        # build the model at each pyramid level
-        if verbose:
-            if self.n_levels > 1:
-                print_dynamic('- Building model for each of the {} pyramid '
-                              'levels\n'.format(self.n_levels))
-            else:
-                print_dynamic('- Building model\n')
-
-        shape_models = []
-        warped_templates = []
-        # for each pyramid level (high --> low)
-        for j in range(self.n_levels):
-            # since models are built from highest to lowest level, the
-            # parameters in form of list need to use a reversed index
-            rj = self.n_levels - j - 1
-
-            if verbose:
-                level_str = '  - '
-                if self.n_levels > 1:
-                    level_str = '  - Level {}: '.format(j + 1)
-
-            # rescale shapes if required
-            if j > 0 and self.scaled_shape_models:
-                scale_transform = Scale(scale_factor=1.0 / self.downscale,
-                                        n_dims=2)
-                shapes = [scale_transform.apply(s) for s in shapes]
-
-            # train shape model and find reference frame
-            if verbose:
-                print_dynamic('{}Building shape model'.format(level_str))
-            shape_model = build_shape_model(shapes,
-                                            self.max_shape_components[rj])
-            reference_frame = self._build_reference_frame(shape_model.mean())
-
-            # add shape model to the list
-            shape_models.append(shape_model)
-
-            # get template's feature image of current level
-            if verbose:
-                print_dynamic('{}Warping template'.format(level_str))
-            feature_template = next(generator[0])
-
-            # compute transform
-            transform = self.transform(reference_frame.landmarks['source'].lms,
-                                       feature_template.landmarks[group][label])
-
-            # warp template to reference frame
-            warped_templates.append(
-                feature_template.warp_to_mask(reference_frame.mask, transform))
-
-            # attach reference_frame to template's source shape
-            warped_templates[j].landmarks['source'] = \
-                reference_frame.landmarks['source']
-
-            if verbose:
-                print_dynamic('{}Done\n'.format(level_str))
-
-        # reverse the list of shape and appearance models so that they are
-        # ordered from lower to higher resolution
-        shape_models.reverse()
-        warped_templates.reverse()
-        n_training_shapes = len(shapes)
-
-        return self._build_atm(shape_models, warped_templates,
-                               n_training_shapes)
-
-    def _build_reference_frame(self, mean_shape):
-        r"""
-        Generates the reference frame given a mean shape.
-
-        Parameters
-        ----------
-        mean_shape : :map:`PointCloud`
-            The mean shape to use.
-
-        Returns
-        -------
-        reference_frame : :map:`MaskedImage`
-            The reference frame.
-        """
-        return build_reference_frame(mean_shape, boundary=self.boundary,
-                                     trilist=self.trilist)
-
-    def _build_atm(self, shape_models, warped_templates, n_training_shapes):
-        r"""
-        Returns an ATM object.
-
-        Parameters
-        ----------
-        shape_models : `list` of :map:`PCAModel`
-            The trained multilevel shape models.
-
-        warped_templates : `list` of :map:`MaskedImage`
-            The warped multilevel templates.
-
-        n_training_shapes : `int`
-            The number of training shapes.
-
-        Returns
-        -------
-        atm : :map:`ATM`
-            The trained ATM object.
-        """
-        from .base import ATM
-        return ATM(shape_models, warped_templates, n_training_shapes,
-                   self.transform, self.features, self.reference_shape,
-                   self.downscale, self.scaled_shape_models)
-
-
-class PatchBasedATMBuilder(ATMBuilder):
-    r"""
-    Class that builds Multilevel Patch-Based Active Template Models.
-
-    Parameters
-    ----------
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-        Note that from our experience, this approach of extracting features
-        once and then creating a pyramid on top tends to lead to better
-        performing AAMs.
-
-    patch_shape : tuple of `int`, optional
-        The appearance model of the Patch-Based AAM will be obtained by
-        sampling appearance patches with the specified shape around each
-        landmark.
-
-    normalization_diagonal : `int` >= ``20``, optional
-        During building an AAM, all images are rescaled to ensure that the
-        scale of their landmarks matches the scale of the mean shape.
-
-        If `int`, it ensures that the mean shape is scaled so that the diagonal
-        of the bounding box containing it matches the ``normalization_diagonal``
-        value.
-
-        If ``None``, the mean shape is not rescaled.
-
-        .. note::
-
-            Because the reference frame is computed from the mean
-            landmarks, this kwarg also specifies the diagonal length of the
-            reference frame (provided that features computation does not change
-            the image size).
-
-    n_levels : `int` > ``0``, optional
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale : `float` >= 1, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(``n_levels``)
-
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames will be the mean shapes of each
-        pyramid level, so the shape models will be scaled.
-        If ``False``, the reference frames of all levels will be the mean shape
-        of the highest level, so the shape models will not be scaled; they will
-        have the same size.
-        Note that from our experience, if scaled_shape_models is ``False``, AAMs
-        tend to have slightly better performance.
-
-    max_shape_components : ``None`` or `int` > 0 or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of shape components is
-        defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        shape components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-
-            If `float`, it specifies the percentage of variance to be retained.
-
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    boundary : `int` >= ``0``, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    Returns
-    -------
-    atm : ::map:`PatchBasedATMBuilder`
-        The Patch-Based ATM Builder object
-
-    Raises
-    -------
-    ValueError
-        ``n_levels`` must be `int` > ``0``
-    ValueError
-        ``downscale`` must be >= ``1``
-    ValueError
-        ``normalization_diagonal`` must be >= ``20``
-    ValueError
-        ``max_shape_components must be ``None`` or an `int` > ``0`` or
-        a ``0`` <= `float` <= ``1`` or a list of those containing ``1``
-        or ``n_levels`` elements
-    ValueError
-        ``features`` must be a `string` or a `function` or a list of those
-        containing 1 or ``n_levels`` elements
-    ValueError
-        ``pyramid_on_features`` is enabled so ``features`` must be a
-        `string` or a `function` or a list containing one of those
-    """
-    def __init__(self, features=igo, patch_shape=(16, 16),
-                 normalization_diagonal=None, n_levels=3, downscale=2,
-                 scaled_shape_models=True, max_shape_components=None,
-                 boundary=3):
-        # check parameters
-        checks.check_n_levels(n_levels)
-        checks.check_downscale(downscale)
-        checks.check_normalization_diagonal(normalization_diagonal)
-        checks.check_boundary(boundary)
-        max_shape_components = checks.check_max_components(
-            max_shape_components, n_levels, 'max_shape_components')
-        features = checks.check_features(features, n_levels)
-
-        # store parameters
-        self.features = features
-        self.patch_shape = patch_shape
-        self.normalization_diagonal = normalization_diagonal
-        self.n_levels = n_levels
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
-        self.max_shape_components = max_shape_components
-        self.boundary = boundary
-
-        # patch-based AAMs can only work with TPS transform
-        self.transform = ThinPlateSplines
-
-    def _build_reference_frame(self, mean_shape):
-        r"""
-        Generates the reference frame given a mean shape.
-
-        Parameters
-        ----------
-        mean_shape : :map:`PointCloud`
-            The mean shape to use.
-
-        Returns
-        -------
-        reference_frame : :map:`MaskedImage`
-            The patch-based reference frame.
-        """
-        return build_patch_reference_frame(mean_shape, boundary=self.boundary,
-                                           patch_shape=self.patch_shape)
-
-    def _mask_image(self, image):
-        r"""
-        Creates the patch-based mask of the given image.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The image to be masked.
-        """
-        image.build_mask_around_landmarks(self.patch_shape, group='source')
-
-    def _build_atm(self, shape_models, warped_templates, n_training_shapes):
-        r"""
-        Returns a Patch-Based ATM object.
-
-        Parameters
-        ----------
-        shape_models : :map:`PCAModel`
-            The trained multilevel shape models.
-
-        warped_templates : `list` of :map:`MaskedImage`
-            The warped multilevel templates.
-
-        n_training_shapes : `int`
-            The number of training shapes.
-
-        Returns
-        -------
-        atm : :map:`PatchBasedATM`
-            The trained Patched-Based ATM object.
-        """
-        from .base import PatchBasedATM
-        return PatchBasedATM(shape_models, warped_templates, n_training_shapes,
-                             self.patch_shape, self.transform, self.features,
-                             self.reference_shape, self.downscale,
-                             self.scaled_shape_models)
diff --git a/menpofit/atm/fitter.py b/menpofit/atm/fitter.py
index a5bfbe4..0450571 100644
--- a/menpofit/atm/fitter.py
+++ b/menpofit/atm/fitter.py
@@ -1,352 +1,62 @@
 from __future__ import division
-
-from menpofit.fitter import MultilevelFitter
-from menpofit.fittingresult import AMMultilevelFittingResult
-from menpofit.transform import (ModelDrivenTransform, OrthoMDTransform,
-                                DifferentiableAlignmentSimilarity)
-from menpofit.lucaskanade.residual import SSD, GaborFourier
-from menpofit.lucaskanade.image import IC
-from menpofit.base import name_of_callable
-
-
-class ATMFitter(MultilevelFitter):
-    r"""
-    Abstract Interface for defining Active Template Models Fitters.
-
-    Parameters
-    -----------
-    atm : :map:`ATM`
-        The Active Template Model to be used.
-    """
-    def __init__(self, atm):
-        self.atm = atm
-
-    @property
-    def reference_shape(self):
-        r"""
-        The reference shape of the ATM.
-
-        :type: :map:`PointCloud`
-        """
-        return self.atm.reference_shape
-
-    @property
-    def features(self):
-        r"""
-        The feature extracted at each pyramidal level during ATM building.
-        Stored in ascending pyramidal order.
-
-        :type: `list`
-        """
-        return self.atm.features
-
-    @property
-    def n_levels(self):
-        r"""
-        The number of pyramidal levels used during ATM building.
-
-        :type: `int`
-        """
-        return self.atm.n_levels
-
-    @property
-    def downscale(self):
-        r"""
-        The downscale used to generate the final scale factor applied at
-        each pyramidal level during ATM building.
-        The scale factor is computed as:
-
-            ``(downscale ** k) for k in range(n_levels)``
-
-        :type: `float`
-        """
-        return self.atm.downscale
-
-    def _create_fitting_result(self, image, fitting_results, affine_correction,
-                               gt_shape=None):
-        r"""
-        Creates a :map:`ATMMultilevelFittingResult` associated to a
-        particular fitting of the ATM fitter.
-
-        Parameters
-        -----------
-        image : :map:`Image` or subclass
-            The image to be fitted.
-
-        fitting_results : `list` of :map:`FittingResult`
-            A list of fitting result objects containing the state of the
-            the fitting for each pyramidal level.
-
-        affine_correction : :map:`Affine`
-            An affine transform that maps the result of the top resolution
-            level to the scale space of the original image.
-
-        gt_shape : :map:`PointCloud`, optional
-            The ground truth shape associated to the image.
-
-        error_type : 'me_norm', 'me' or 'rmse', optional
-            Specifies how the error between the fitted and ground truth
-            shapes must be computed.
-
-        Returns
-        -------
-        fitting : :map:`ATMMultilevelFittingResult`
-            A fitting result object that will hold the state of the ATM
-            fitter for a particular fitting.
-        """
-        return ATMMultilevelFittingResult(
-            image, self, fitting_results, affine_correction, gt_shape=gt_shape)
-
-
-class LucasKanadeATMFitter(ATMFitter):
+from menpofit import checks
+from menpofit.fitter import ModelFitter
+from menpofit.modelinstance import OrthoPDM
+from menpofit.transform import OrthoMDTransform, LinearOrthoMDTransform
+from .base import ATM, MaskedATM, LinearATM, LinearMaskedATM, PatchATM
+from .algorithm import (
+    ATMLKStandardInterface, ATMLKPatchInterface, ATMLKLinearInterface,
+    InverseCompositional)
+from .result import ATMFitterResult
+
+
+# TODO: document me!
+class LucasKanadeATMFitter(ModelFitter):
     r"""
-    Lucas-Kanade based :map:`Fitter` for Active Template Models.
-
-    Parameters
-    -----------
-    atm : :map:`ATM`
-        The Active Template Model to be used.
-
-    algorithm : subclass of :map:`ImageLucasKanade`, optional
-        The Image Lucas-Kanade class to be used.
-
-    md_transform : :map:`ModelDrivenTransform` or subclass, optional
-        The model driven transform class to be used.
-
-    n_shape : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-        previous or ``None``, optional
-        The number of shape components or amount of shape variance to be
-        used per pyramidal level.
-
-        If `None`, all available shape components ``(n_active_components)``
-        will be used.
-        If `int` ``> 1``, the specified number of shape components will be
-        used.
-        If ``0. <=`` `float` ``<= 1.``, the number of components capturing the
-        specified variance ratio will be computed and used.
-
-        If `list` of length ``n_levels``, then the number of components is
-        defined per level. The first element of the list corresponds to the
-        lowest pyramidal level and so on.
-        If not a `list` or a `list` of length 1, then the specified number of
-        components will be used for all levels.
     """
-    def __init__(self, atm, algorithm=IC, residual=SSD,
-                 md_transform=OrthoMDTransform, n_shape=None, **kwargs):
-        super(LucasKanadeATMFitter, self).__init__(atm)
-        self._set_up(algorithm=algorithm, residual=residual,
-                     md_transform=md_transform, n_shape=n_shape, **kwargs)
+    def __init__(self, atm, algorithm_cls=InverseCompositional,
+                 n_shape=None, sampling=None):
+        self._model = atm
+        checks.set_models_components(atm.shape_models, n_shape)
+        self._sampling = checks.check_sampling(sampling, atm.n_scales)
+        self._set_up(algorithm_cls)
 
     @property
-    def algorithm(self):
-        r"""
-        Returns a string containing the name of fitting algorithm.
-
-        :type: `str`
-        """
-        return 'LK-ATM-' + self._fitters[0].algorithm
-
-    def _set_up(self, algorithm=IC,
-                residual=SSD, md_transform=OrthoMDTransform,
-                global_transform=DifferentiableAlignmentSimilarity,
-                n_shape=None, **kwargs):
-        r"""
-        Sets up the Lucas-Kanade fitter object.
-
-        Parameters
-        -----------
-        algorithm : subclass of :map:`ImageLucasKanade`, optional
-            The Image Lucas-Kanade class to be used.
-
-        md_transform : :map:`ModelDrivenTransform` or subclass, optional
-            The model driven transform class to be used.
-
-        n_shape : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-            previous or ``None``, optional
-            The number of shape components or amount of shape variance to be
-            used per pyramidal level.
-
-            If `None`, all available shape components ``(n_active_components)``
-            will be used.
-            If `int` ``> 1``, the specified number of shape components will be
-            used.
-            If ``0. <=`` `float` ``<= 1.``, the number of components capturing
-            the specified variance ratio will be computed and used.
-
-            If `list` of length ``n_levels``, then the number of components is
-            defined per level. The first element of the list corresponds to the
-            lowest pyramidal level and so on.
-            If not a `list` or a `list` of length 1, then the specified number
-            of components will be used for all levels.
-
-        Raises
-        -------
-        ValueError
-            ``n_shape`` can be an `int`, `float`, ``None`` or a `list`
-            containing ``1`` or ``n_levels`` of those.
-        """
-        # check n_shape parameter
-        if n_shape is not None:
-            if type(n_shape) is int or type(n_shape) is float:
-                for sm in self.atm.shape_models:
-                    sm.n_active_components = n_shape
-            elif len(n_shape) == 1 and self.atm.n_levels > 1:
-                for sm in self.atm.shape_models:
-                    sm.n_active_components = n_shape[0]
-            elif len(n_shape) == self.atm.n_levels:
-                for sm, n in zip(self.atm.shape_models, n_shape):
-                    sm.n_active_components = n
-            else:
-                raise ValueError('n_shape can be an integer or a float or None '
-                                 'or a list containing 1 or {} of '
-                                 'those'.format(self.atm.n_levels))
-
-        self._fitters = []
-        for j, (t, sm) in enumerate(zip(self.atm.warped_templates,
-                                        self.atm.shape_models)):
-
-            if md_transform is not ModelDrivenTransform:
-                md_trans = md_transform(
-                    sm, self.atm.transform, global_transform,
-                    source=t.landmarks['source'].lms)
+    def atm(self):
+        return self._model
+
+    def _set_up(self, algorithm_cls):
+        self.algorithms = []
+        for j, (wt, sm, s) in enumerate(zip(self.atm.warped_templates,
+                                            self.atm.shape_models,
+                                            self._sampling)):
+
+            if type(self.atm) is ATM or type(self.atm) is MaskedATM:
+                source_lmarks = wt.landmarks['source'].lms
+                md_transform = OrthoMDTransform(sm, self.atm.transform,
+                                                source=source_lmarks)
+                interface = ATMLKStandardInterface(md_transform, wt, sampling=s)
+                algorithm = algorithm_cls(interface)
+            elif (type(self.atm) is LinearATM or
+                  type(self.atm) is LinearMaskedATM):
+                md_transform = LinearOrthoMDTransform(sm,
+                                                      self.atm.reference_shape)
+                interface = ATMLKLinearInterface(md_transform, wt, sampling=s)
+                algorithm = algorithm_cls(interface)
+            elif type(self.atm) is PatchATM:
+                pdm = OrthoPDM(sm)
+                interface = ATMLKPatchInterface(
+                    pdm, wt, sampling=s, patch_size=self.atm.patch_size[j],
+                    patch_normalisation=self.atm.patch_normalisation)
+                algorithm = algorithm_cls(interface)
             else:
-                md_trans = md_transform(
-                    sm, self.atm.transform,
-                    source=t.landmarks['source'].lms)
-
-            if residual is not GaborFourier:
-                self._fitters.append(
-                    algorithm(t, residual(), md_trans, **kwargs))
-            else:
-                self._fitters.append(
-                    algorithm(t, residual(t.shape), md_trans,
-                              **kwargs))
-
-    def __str__(self):
-        out = "{0} Fitter\n" \
-              " - Lucas-Kanade {1}\n" \
-              " - Transform is {2} and residual is {3}.\n" \
-              " - {4} training images.\n".format(
-              self.atm._str_title, self._fitters[0].algorithm,
-              self._fitters[0].transform.__class__.__name__,
-              self._fitters[0].residual.type, self.atm.n_training_shapes)
-        # small strings about number of channels, channels string and downscale
-        n_channels = []
-        down_str = []
-        for j in range(self.n_levels):
-            n_channels.append(
-                self._fitters[j].template.n_channels)
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(name_of_callable(
-                self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
-        else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                if isinstance(self.features[j], str):
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j]))
-                elif self.features[j] is None:
-                    feat_str.append("- No features extracted. ")
-                else:
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j].__name__))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
-                else:
-                    ch_str.append("channels")
-        if self.n_levels > 1:
-            if self.atm.scaled_shape_models:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}.\n   - Each level has a scaled shape " \
-                      "model (reference frame).\n".format(out, self.n_levels,
-                                                          self.downscale)
-
-            else:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}:\n   - Shape models (reference frames) " \
-                      "are not scaled.\n".format(out, self.n_levels,
-                                                 self.downscale)
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
-                if not self.atm.scaled_shape_models:
-                    out = "{}   - Reference frames of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                          out,
-                          self._fitters[0].template.n_true_pixels() *
-                                                                   n_channels[0],
-                          self._fitters[0].template.n_true_pixels(),
-                          n_channels[0], self._fitters[0].template._str_shape,
-                          n_channels[0])
-            else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-            for i in range(self.n_levels - 1, -1, -1):
-                out = "{}   - Level {} {}: \n".format(out, self.n_levels - i,
-                                                      down_str[i])
-                if not self.pyramid_on_features:
-                    out = "{}     {}{} {} per image.\n".format(
-                        out, feat_str[i], n_channels[i], ch_str[i])
-                if (self.atm.scaled_shape_models or
-                        (not self.pyramid_on_features)):
-                    out = "{}     - Reference frame of length {} " \
-                          "({} x {}C, {} x {}C)\n".format(
-                          out,
-                          self._fitters[i].template.n_true_pixels() *
-                                                                   n_channels[i],
-                          self._fitters[i].template.n_true_pixels(),
-                          n_channels[i], self._fitters[i].template._str_shape,
-                          n_channels[i])
-                out = "{0}     - {1} motion components\n\n".format(
-                      out, self._fitters[i].transform.n_parameters)
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n" \
-                  "   - Reference frame of length {4} ({5} x {6}C, " \
-                  "{7} x {8}C)\n   - {9} motion parameters\n".format(
-                  out, feat_str[0], n_channels[0], ch_str[0],
-                  self._fitters[0].template.n_true_pixels() * n_channels[0],
-                  self._fitters[0].template.n_true_pixels(),
-                  n_channels[0], self._fitters[0].template._str_shape,
-                  n_channels[0], self._fitters[0].transform.n_parameters)
-        return out
-
-
-class ATMMultilevelFittingResult(AMMultilevelFittingResult):
-    r"""
-    Class that holds the state of a :map:`ATMFitter` object before,
-    during and after it has fitted a particular image.
-    """
-    @property
-    def atm_reconstructions(self):
-        r"""
-        The list containing the atm reconstruction (i.e. the template warped on
-        the shape instance reconstruction) obtained at each fitting iteration.
-
-        Note that this reconstruction is only tested to work for the
-        :map:`OrthoMDTransform`
-
-        :type: list` of :map:`Image` or subclass
-        """
-        atm_reconstructions = []
-        for level, f in enumerate(self.fitting_results):
-            for shape_w in f.parameters:
-                shape_w = shape_w[4:]
-                sm_level = self.fitter.aam.shape_models[level]
-                swt = shape_w / sm_level.eigenvalues[:len(shape_w)] ** 0.5
-                atm_reconstructions.append(self.fitter.aam.instance(
-                    shape_weights=swt, level=level))
-        return atm_reconstructions
+                raise ValueError("AAM object must be of one of the "
+                                 "following classes: {}, {}, {}, {}, "
+                                 "{}".format(ATM, MaskedATM, LinearATM,
+                                             LinearMaskedATM, PatchATM))
+            self.algorithms.append(algorithm)
+
+    def _fitter_result(self, image, algorithm_results, affine_correction,
+                       gt_shape=None):
+        return ATMFitterResult(image, self, algorithm_results,
+                               affine_correction, gt_shape=gt_shape)
diff --git a/menpofit/atm/result.py b/menpofit/atm/result.py
new file mode 100644
index 0000000..b7aec3e
--- /dev/null
+++ b/menpofit/atm/result.py
@@ -0,0 +1,79 @@
+from __future__ import division
+from menpofit.result import ParametricAlgorithmResult, MultiFitterResult
+
+
+# TODO: document me!
+class ATMAlgorithmResult(ParametricAlgorithmResult):
+    r"""
+    """
+    def __init__(self, image, algorithm, shape_parameters, cost_functions=None,
+                 gt_shape=None):
+        super(ATMAlgorithmResult, self).__init__(
+            image, algorithm, shape_parameters, gt_shape=gt_shape)
+        self._cost_functions = cost_functions
+        self._warped_images = None
+        self._costs = None
+
+    @property
+    def warped_images(self):
+        if self._warped_images is None:
+            self._warped_images = []
+            for p in self.shape_parameters:
+                self.algorithm.transform.from_vector_inplace(p)
+                self._warped_images.append(
+                    self.algorithm.interface.warp(self.image))
+        return self._warped_images
+
+    @property
+    def costs(self):
+        if self._costs is None:
+            self._costs = [f() for f in self._cost_functions]
+        return self._costs
+
+
+# TODO: document me!
+class LinearATMAlgorithmResult(ATMAlgorithmResult):
+    r"""
+    """
+    @property
+    def shapes(self):
+        return [self.algorithm.transform.from_vector(p).sparse_target
+                for p in self.shape_parameters]
+
+    @property
+    def final_shape(self):
+        return self.final_transform.sparse_target
+
+    @property
+    def initial_shape(self):
+        return self.initial_transform.sparse_target
+
+
+# TODO: document me!
+class ATMFitterResult(MultiFitterResult):
+    r"""
+    """
+    def __init__(self, image, fitter, algorithm_results, affine_correction,
+                 gt_shape=None):
+        super(ATMFitterResult, self).__init__(
+            image, fitter, algorithm_results, affine_correction,
+            gt_shape=gt_shape)
+        self._warped_images = None
+
+    @property
+    def warped_images(self):
+        if self._warped_images is None:
+            algorithm = self.algorithm_results[-1].algorithm
+            self._warped_images = []
+            for s in self.shapes:
+                algorithm.transform.set_target(s)
+                self._warped_images.append(
+                    algorithm.interface.warp(self.image))
+        return self._warped_images
+
+    @property
+    def costs(self):
+        costs = []
+        for a in self.algorithm_results:
+            costs += a.costs
+        return costs
diff --git a/menpofit/base.py b/menpofit/base.py
index 36b8a08..aa32cb3 100644
--- a/menpofit/base.py
+++ b/menpofit/base.py
@@ -1,142 +1,37 @@
 from __future__ import division
-from menpo.transform import AlignmentSimilarity, Similarity
+from functools import partial
+import itertools
 import numpy as np
-from menpo.visualize import progress_bar_str, print_dynamic, print_progress
 
 
 def name_of_callable(c):
     try:
-        return c.__name__  # function
+        if isinstance(c, partial):  # partial
+            # Recursively call as partial may be wrapping either a callable
+            # or a function (or another partial for some reason!)
+            return name_of_callable(c.func)
+        else:
+            return c.__name__  # function
     except AttributeError:
         return c.__class__.__name__  # callable class
 
 
-def is_pyramid_on_features(features):
-    r"""
-    True if feature extraction happens once and then a gaussian pyramid
-    is taken. False if a gaussian pyramid is taken and then features are
-    extracted at each level.
-    """
-    return callable(features)
-
-
-def create_pyramid(images, n_levels, downscale, features, verbose=False):
-    r"""
-    Function that creates a generator function for Gaussian pyramid. The
-    pyramid can be created either on the feature space or the original
-    (intensities) space.
-
-    Parameters
-    ----------
-    images: list of :map:`Image`
-        The set of landmarked images from which to build the AAM.
-
-    n_levels: int
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale: float
-        The downscale factor that will be used to create the different
-        pyramidal levels.
-
-    features: ``callable`` ``[callable]``
-        If a single callable, then the feature calculation will happen once
-        followed by a gaussian pyramid. If a list of callables then a
-        gaussian pyramid is generated with features extracted at each level
-        (after downsizing and blurring).
-
-    Returns
-    -------
-    list of generators :
-        The generator function of the Gaussian pyramid.
-
-    """
-    will_take_a_while = is_pyramid_on_features(features)
-    if will_take_a_while and verbose:
-        images = print_progress(images, show_bar=False, show_count=False,
-                                prefix='- Computing top-level feature space')
-    pyramids = []
-    for img in images:
-        pyramids.append(pyramid_of_feature_images(n_levels, downscale,
-                                                  features, img))
-    return pyramids
-
-
-def pyramid_of_feature_images(n_levels, downscale, features, image):
-    r"""
-    Generates a gaussian pyramid of feature images for a single image.
-    """
-    if is_pyramid_on_features(features):
-        # compute feature image at the top
-        feature_image = features(image)
-        # create pyramid on the feature image
-        return feature_image.gaussian_pyramid(n_levels=n_levels,
-                                              downscale=downscale)
-    else:
-        # create pyramid on intensities image
-        # feature will be computed per level
-        pyramid = image.gaussian_pyramid(n_levels=n_levels,
-                                         downscale=downscale)
-        # add the feature generation here
-        return feature_images(pyramid, features)
-
-
-# adds feature extraction to a generator of images
-def feature_images(images, features):
-    for feature, level in zip(reversed(features), images):
-        yield feature(level)
-
-
-class DeformableModel(object):
-
-    def __init__(self, features):
-        self.features = features
-
-    @property
-    def pyramid_on_features(self):
-        return is_pyramid_on_features(self.features)
-
-
-# TODO: Should this be a method on Similarity? AlignableTransforms?
-def noisy_align(source, target, noise_std=0.04, rotation=False):
-    r"""
-    Constructs and perturbs the optimal similarity transform between source
-    to the target by adding white noise to its weights.
-
-    Parameters
-    ----------
-    source: :class:`menpo.shape.PointCloud`
-        The source pointcloud instance used in the alignment
-    target: :class:`menpo.shape.PointCloud`
-        The target pointcloud instance used in the alignment
-    noise_std: float
-        The standard deviation of the white noise
-
-        Default: 0.04
-    rotation: boolean
-        If False the second parameter of the Similarity,
-        which captures captures inplane rotations, is set to 0.
-
-        Default:False
-
-    Returns
-    -------
-    noisy_transform : :class: `menpo.transform.Similarity`
-        The noisy Similarity Transform
-    """
-    transform = AlignmentSimilarity(source, target, rotation=rotation)
-    parameters = transform.as_vector()
-    parameter_range = np.hstack((parameters[:2], target.range()))
-    noise = (parameter_range * noise_std *
-             np.random.randn(transform.n_parameters))
-    return Similarity.init_identity(source.n_dims).from_vector(parameters + noise)
+def batch(iterable, n):
+    it = iter(iterable)
+    while True:
+        chunk = tuple(itertools.islice(it, n))
+        if not chunk:
+            return
+        yield chunk
 
 
-def build_sampling_grid(patch_shape):
+def build_grid(shape):
     r"""
     """
-    patch_shape = np.array(patch_shape)
-    patch_half_shape = np.require(np.floor(patch_shape / 2), dtype=int)
-    start = -patch_half_shape
-    end = patch_half_shape + 1
+    shape = np.asarray(shape)
+    half_shape = np.floor(shape / 2)
+    half_shape = np.require(half_shape, dtype=int)
+    start = -half_shape
+    end = half_shape + shape % 2
     sampling_grid = np.mgrid[start[0]:end[0], start[1]:end[1]]
-    return sampling_grid.swapaxes(0, 2).swapaxes(0, 1)
\ No newline at end of file
+    return np.rollaxis(sampling_grid, 0, 3)
diff --git a/menpofit/benchmark/__init__.py b/menpofit/benchmark/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/menpofit/benchmark/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/menpofit/benchmark/base.py b/menpofit/benchmark/base.py
deleted file mode 100644
index c87ef2d..0000000
--- a/menpofit/benchmark/base.py
+++ /dev/null
@@ -1,699 +0,0 @@
-import os
-
-import numpy as np
-
-import menpo.io as mio
-from menpo.visualize.text_utils import print_dynamic, progress_bar_str
-from menpo.landmark import labeller
-from menpo.visualize.base import GraphPlotter
-from menpofit.aam import AAMBuilder, LucasKanadeAAMFitter
-from menpofit.clm import CLMBuilder, GradientDescentCLMFitter
-from menpofit.sdm import SDMTrainer
-
-
-def aam_fit_benchmark(fitting_images, aam, fitting_options=None,
-                      perturb_options=None, verbose=False):
-    r"""
-    Fits a trained AAM model to a database.
-
-    Parameters
-    ----------
-    fitting_images: list of :class:MaskedImage objects
-        A list of the fitting images.
-    aam: :class:menpo.fitmultilevel.aam.AAM object
-        The trained AAM object. It can be generated from the
-        aam_build_benchmark() method.
-    fitting_options: dictionary, optional
-        A dictionary with the parameters that will be passed in the
-        LucasKanadeAAMFitter (:class:menpo.fitmultilevel.aam.base).
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            fitting_options = {'algorithm': AlternatingInverseCompositional,
-                               'md_transform': OrthoMDTransform,
-                               'global_transform': AlignmentSimilarity,
-                               'n_shape': None,
-                               'n_appearance': None,
-                               'max_iters': 50,
-                               'error_type': 'me_norm'
-                               }
-        For an explanation of the options, please refer to the
-        LucasKanadeAAMFitter documentation.
-
-        Default: None
-    bounding_boxes: list of (2, 2) ndarray, optional
-        If provided, fits will be initialized from a bounding box. If
-        None, perturbation of ground truth will be used instead.
-        can be provided). Interpreted as [[min_x, min_y], [max_x, max_y]].
-    perturb_options: dictionary, optional
-        A dictionary with parameters that control the perturbation on the
-        ground truth shape with noise of specified std. Note that if
-        bounding_box is provided perturb_options is ignored and not used.
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            initialization_options = {'noise_std': 0.04,
-                                      'rotation': False
-                                      }
-        For an explanation of the options, please refer to the perturb_shape()
-        method documentation of :map:`MultilevelFitter`.
-    verbose: bool, optional
-        If True, it prints information regarding the AAM fitting including
-        progress bar, current image error and percentage of images with errors
-        less or equal than a value.
-
-        Default: False
-
-    Returns
-    -------
-    fitting_results: :map:`FittingResultList`
-        A list with the :map:`FittingResult` object per image.
-    """
-    if verbose:
-        print('AAM Fitting:')
-        perc1 = 0.
-        perc2 = 0.
-
-    # parse options
-    if fitting_options is None:
-        fitting_options = {}
-    if perturb_options is None:
-        perturb_options = {}
-
-    # extract some options
-    group = fitting_options.pop('gt_group', 'PTS')
-    max_iters = fitting_options.pop('max_iters', 50)
-    error_type = fitting_options.pop('error_type', 'me_norm')
-
-    # create fitter
-    fitter = LucasKanadeAAMFitter(aam, **fitting_options)
-
-    # fit images
-    n_images = len(fitting_images)
-    fitting_results = []
-    for j, i in enumerate(fitting_images):
-        # perturb shape
-        gt_s = i.landmarks[group].lms
-        if 'bbox' in i.landmarks:
-            # shape from bounding box
-            s = fitter.obtain_shape_from_bb(i.landmarks['bbox'].lms.points)
-        else:
-            # shape from perturbation
-            s = fitter.perturb_shape(gt_s, **perturb_options)
-        # fit
-        fr = fitter.fit(i, s, gt_shape=gt_s, max_iters=max_iters)
-        fitting_results.append(fr)
-
-        # print
-        final_error = fr.final_error(error_type=error_type)
-        initial_error = fr.initial_error(error_type=error_type)
-        if verbose:
-            if error_type == 'me_norm':
-                if final_error <= 0.03:
-                    perc1 += 1.
-                if final_error <= 0.04:
-                    perc2 += 1.
-            elif error_type == 'rmse':
-                if final_error <= 0.05:
-                    perc1 += 1.
-                if final_error <= 0.06:
-                    perc2 += 1.
-            print_dynamic('- {0} - [<=0.03: {1:.1f}%, <=0.04: {2:.1f}%] - '
-                          'Image {3}/{4} (error: {5:.3f} --> {6:.3f})'.format(
-                progress_bar_str(float(j + 1) / n_images, show_bar=False),
-                perc1 * 100. / n_images, perc2 * 100. / n_images, j + 1,
-                n_images, initial_error, final_error))
-    if verbose:
-        print_dynamic('- Fitting completed: [<=0.03: {0:.1f}%, <=0.04: '
-                      '{1:.1f}%]\n'.format(perc1 * 100. / n_images,
-                                           perc2 * 100. / n_images))
-
-    return fitting_results
-
-
-def aam_build_benchmark(training_images, training_options=None, verbose=False):
-    r"""
-    Builds an AAM model.
-
-    Parameters
-    ----------
-    training_images: list of :class:MaskedImage objects
-        A list of the training images.
-    training_options: dictionary, optional
-        A dictionary with the parameters that will be passed in the AAMBuilder
-        (:class:menpo.fitmultilevel.aam.AAMBuilder).
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            training_options = {'group': 'PTS',
-                                'features': 'igo',
-                                'transform': PiecewiseAffine,
-                                'trilist': None,
-                                'normalization_diagonal': None,
-                                'n_levels': 3,
-                                'downscale': 2,
-                                'scaled_shape_models': True,
-                                'max_shape_components': None,
-                                'max_appearance_components': None,
-                                'boundary': 3
-                                }
-        For an explanation of the options, please refer to the AAMBuilder
-        documentation.
-
-        Default: None
-    verbose: boolean, optional
-        If True, it prints information regarding the AAM training.
-
-        Default: False
-
-    Returns
-    -------
-    aam: :class:menpo.fitmultilevel.aam.AAM object
-        The trained AAM model.
-    """
-    if verbose:
-        print('AAM Training:')
-
-    # parse options
-    if training_options is None:
-        training_options = {}
-
-    # group option
-    group = training_options.pop('group', None)
-
-    # trilist option
-    trilist = training_options.pop('trilist', None)
-    if trilist is not None:
-        labeller(training_images[0], 'PTS', trilist)
-        training_options['trilist'] = \
-            training_images[0].landmarks[trilist.__name__].lms.trilist
-
-    # build aam
-    aam = AAMBuilder(**training_options).build(training_images, group=group,
-                                               verbose=verbose)
-
-    return aam
-
-
-def clm_fit_benchmark(fitting_images, clm, fitting_options=None,
-                      perturb_options=None, verbose=False):
-    r"""
-    Fits a trained CLM model to a database.
-
-    Parameters
-    ----------
-    fitting_images: list of :class:MaskedImage objects
-        A list of the fitting images.
-    clm: :class:menpo.fitmultilevel.clm.CLM object
-        The trained CLM object. It can be generated from the
-        clm_build_benchmark() method.
-    fitting_options: dictionary, optional
-        A dictionary with the parameters that will be passed in the
-        GradientDescentCLMFitter (:class:menpo.fitmultilevel.clm.base).
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            fitting_options = {'algorithm': RegularizedLandmarkMeanShift,
-                               'pdm_transform': OrthoPDM,
-                               'global_transform': AlignmentSimilarity,
-                               'n_shape': None,
-                               'max_iters': 50,
-                               'error_type': 'me_norm'
-                               }
-        For an explanation of the options, please refer to the
-        GradientDescentCLMFitter documentation.
-
-        Default: None
-    bounding_boxes: list of (2, 2) ndarray, optional
-        If provided, fits will be initialized from a bounding box. If
-        None, perturbation of ground truth will be used instead.
-        can be provided). Interpreted as [[min_x, min_y], [max_x, max_y]].
-    perturb_options: dictionary, optional
-        A dictionary with parameters that control the perturbation on the
-        ground truth shape with noise of specified std. Note that if
-        bounding_box is provided perturb_options is ignored and not used.
-    verbose: boolean, optional
-        If True, it prints information regarding the AAM fitting including
-        progress bar, current image error and percentage of images with errors
-        less or equal than a value.
-
-        Default: False
-
-    Returns
-    -------
-    fitting_results: :class:menpo.fit.fittingresult.FittingResultList object
-        A list with the FittingResult object per image.
-    """
-    if verbose:
-        print('CLM Fitting:')
-        perc1 = 0.
-        perc2 = 0.
-
-    # parse options
-    if fitting_options is None:
-        fitting_options = {}
-
-    # extract some options
-    group = fitting_options.pop('gt_group', 'PTS')
-    max_iters = fitting_options.pop('max_iters', 50)
-    error_type = fitting_options.pop('error_type', 'me_norm')
-
-    # create fitter
-    fitter = GradientDescentCLMFitter(clm, **fitting_options)
-
-    # fit images
-    n_images = len(fitting_images)
-    fitting_results = []
-    for j, i in enumerate(fitting_images):
-        # perturb shape
-        gt_s = i.landmarks[group].lms
-        if 'bbox' in i.landmarks:
-            # shape from bounding box
-            s = fitter.obtain_shape_from_bb(i.landmarks['bbox'].lms.points)
-        else:
-            # shape from perturbation
-            s = fitter.perturb_shape(gt_s, **perturb_options)
-        # fit
-        fr = fitter.fit(i, s, gt_shape=gt_s, max_iters=max_iters)
-        fitting_results.append(fr)
-
-        # print
-        final_error = fr.final_error(error_type=error_type)
-        initial_error = fr.initial_error(error_type=error_type)
-        if verbose:
-            if error_type == 'me_norm':
-                if final_error <= 0.03:
-                    perc1 += 1.
-                if final_error <= 0.04:
-                    perc2 += 1.
-            elif error_type == 'rmse':
-                if final_error <= 0.05:
-                    perc1 += 1.
-                if final_error <= 0.06:
-                    perc2 += 1.
-            print_dynamic('- {0} - [<=0.03: {1:.1f}%, <=0.04: {2:.1f}%] - '
-                          'Image {3}/{4} (error: {5:.3f} --> {6:.3f})'.format(
-                          progress_bar_str(float(j + 1) / n_images,
-                                           show_bar=False),
-                          perc1 * 100. / n_images, perc2 * 100. / n_images,
-                          j + 1, n_images, initial_error, final_error))
-    if verbose:
-        print_dynamic('- Fitting completed: [<=0.03: {0:.1f}%, <=0.04: '
-                      '{1:.1f}%]\n'.format(perc1 * 100. / n_images,
-                                           perc2 * 100. / n_images))
-
-    return fitting_results
-
-
-def clm_build_benchmark(training_images, training_options=None, verbose=False):
-    r"""
-    Builds an CLM model.
-
-    Parameters
-    ----------
-    training_images: list of :class:MaskedImage objects
-        A list of the training images.
-    training_options: dictionary, optional
-        A dictionary with the parameters that will be passed in the CLMBuilder
-        (:class:menpo.fitmultilevel.clm.CLMBuilder).
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            training_options = {'group': 'PTS',
-                                'classifier_trainers': linear_svm_lr,
-                                'patch_shape': (5, 5),
-                                'features': sparse_hog,
-                                'normalization_diagonal': None,
-                                'n_levels': 3,
-                                'downscale': 1.1,
-                                'scaled_shape_models': True,
-                                'max_shape_components': None,
-                                'boundary': 3
-                                }
-        For an explanation of the options, please refer to the CLMBuilder
-        documentation.
-
-        Default: None
-    verbose: boolean, optional
-        If True, it prints information regarding the CLM training.
-
-        Default: False
-
-    Returns
-    -------
-    clm: :class:menpo.fitmultilevel.clm.CLM object
-        The trained CLM model.
-    """
-    if verbose:
-        print('CLM Training:')
-
-    # parse options
-    if training_options is None:
-        training_options = {}
-
-    # group option
-    group = training_options.pop('group', None)
-
-    # build aam
-    aam = CLMBuilder(**training_options).build(training_images, group=group,
-                                               verbose=verbose)
-
-    return aam
-
-
-def sdm_fit_benchmark(fitting_images, fitter, perturb_options=None,
-                      fitting_options=None, verbose=False):
-    r"""
-    Fits a trained SDM to a database.
-
-    Parameters
-    ----------
-    fitting_images: list of :class:MaskedImage objects
-        A list of the fitting images.
-    fitter: :map:`SDMFitter`
-        The trained AAM object. It can be generated from the
-        aam_build_benchmark() method.
-    fitting_options: dictionary, optional
-        A dictionary with the parameters that will be passed in the
-        LucasKanadeAAMFitter (:class:menpo.fitmultilevel.sdm.base).
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            fitting_options = {'algorithm': AlternatingInverseCompositional,
-                               'md_transform': OrthoMDTransform,
-                               'global_transform': AlignmentSimilarity,
-                               'n_shape': None,
-                               'n_appearance': None,
-                               'max_iters': 50,
-                               'error_type': 'me_norm'
-                               }
-        For an explanation of the options, please refer to the
-        LucasKanadeAAMFitter documentation.
-
-        Default: None
-    bounding_boxes: list of (2, 2) ndarray, optional
-        If provided, fits will be initialized from a bounding box. If
-        None, perturbation of ground truth will be used instead.
-        can be provided). Interpreted as [[min_x, min_y], [max_x, max_y]].
-    perturb_options: dictionary, optional
-        A dictionary with parameters that control the perturbation on the
-        ground truth shape with noise of specified std. Note that if
-        bounding_box is provided perturb_options is ignored and not used.
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            initialization_options = {'noise_std': 0.04,
-                                      'rotation': False
-                                      }
-        For an explanation of the options, please refer to the perturb_shape()
-        method documentation of :map:`MultilevelFitter`.
-    verbose: bool, optional
-        If True, it prints information regarding the AAM fitting including
-        progress bar, current image error and percentage of images with errors
-        less or equal than a value.
-
-        Default: False
-
-    Returns
-    -------
-    fitting_results: :map:`FittingResultList`
-        A list with the :map:`FittingResult` object per image.
-    """
-    if verbose:
-        print('SDM Fitting:')
-        perc1 = 0.
-        perc2 = 0.
-
-    # parse options
-    if fitting_options is None:
-        fitting_options = {}
-    if perturb_options is None:
-        perturb_options = {}
-
-    # extract some options
-    group = fitting_options.pop('gt_group', 'PTS')
-    error_type = fitting_options.pop('error_type', 'me_norm')
-
-    # fit images
-    n_images = len(fitting_images)
-    fitting_results = []
-    for j, i in enumerate(fitting_images):
-        # perturb shape
-        gt_s = i.landmarks[group].lms
-        if 'bbox' in i.landmarks:
-            # shape from bounding box
-            s = fitter.obtain_shape_from_bb(i.landmarks['bbox'].lms.points)
-        else:
-            # shape from perturbation
-            s = fitter.perturb_shape(gt_s, **perturb_options)
-        # fit
-        fr = fitter.fit(i, s, gt_shape=gt_s)
-        fitting_results.append(fr)
-
-        # print
-        final_error = fr.final_error(error_type=error_type)
-        initial_error = fr.initial_error(error_type=error_type)
-        if verbose:
-            if error_type == 'me_norm':
-                if final_error <= 0.03:
-                    perc1 += 1.
-                if final_error <= 0.04:
-                    perc2 += 1.
-            elif error_type == 'rmse':
-                if final_error <= 0.05:
-                    perc1 += 1.
-                if final_error <= 0.06:
-                    perc2 += 1.
-            print_dynamic('- {0} - [<=0.03: {1:.1f}%, <=0.04: {2:.1f}%] - '
-                          'Image {3}/{4} (error: {5:.3f} --> {6:.3f})'.format(
-                progress_bar_str(float(j + 1) / n_images, show_bar=False),
-                perc1 * 100. / n_images, perc2 * 100. / n_images, j + 1,
-                n_images, initial_error, final_error))
-    if verbose:
-        print_dynamic('- Fitting completed: [<=0.03: {0:.1f}%, <=0.04: '
-                      '{1:.1f}%]\n'.format(perc1 * 100. / n_images,
-                                           perc2 * 100. / n_images))
-
-    return fitting_results
-
-
-def sdm_build_benchmark(training_images, training_options=None, verbose=False):
-    r"""
-    Builds an SDM model.
-
-    Parameters
-    ----------
-    training_images: list of :class:MaskedImage objects
-        A list of the training images.
-    training_options: dictionary, optional
-        A dictionary with the parameters that will be passed in the AAMBuilder
-        (:class:menpo.fitmultilevel.aam.AAMBuilder).
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            training_options = {'group': 'PTS',
-                                'features': 'igo',
-                                'transform': PiecewiseAffine,
-                                'trilist': None,
-                                'normalization_diagonal': None,
-                                'n_levels': 3,
-                                'downscale': 2,
-                                'scaled_shape_models': True,
-                                'max_shape_components': None,
-                                'max_appearance_components': None,
-                                'boundary': 3
-                                }
-        For an explanation of the options, please refer to the AAMBuilder
-        documentation.
-
-        Default: None
-    verbose: boolean, optional
-        If True, it prints information regarding the AAM training.
-
-        Default: False
-
-    Returns
-    -------
-    aam: :class:menpo.fitmultilevel.aam.AAM object
-        The trained AAM model.
-    """
-    if verbose:
-        print('SDM Training:')
-
-    # parse options
-    if training_options is None:
-        training_options = {}
-
-    # group option
-    group = training_options.pop('group', None)
-
-    # build sdm
-    sdm = SDMTrainer(**training_options).train(training_images, group=group,
-                                               verbose=verbose)
-    return sdm
-
-
-def load_database(database_path, bounding_boxes=None,
-                  db_loading_options=None, verbose=False):
-    r"""
-    Loads the database images, crops them and converts them.
-
-    Parameters
-    ----------
-    database_path: str
-        The path of the database images.
-    db_loading_options: dictionary, optional
-        A dictionary with options related to image loading.
-        If None, the default options will be used.
-        This is an example of the dictionary with the default options:
-            training_options = {'crop_proportion': 0.1,
-                                'convert_to_grey': True,
-                                }
-
-        crop_proportion (float) defines the additional padding to be added all
-        around the landmarks bounds when the images are cropped. It is defined
-        as a proportion of the landmarks' range.
-
-        convert_to_grey (boolean)defines whether the images will be converted
-        to greyscale.
-
-        Default: None
-    verbose: boolean, optional
-        If True, it prints a progress percentage bar.
-
-        Default: False
-
-    Returns
-    -------
-    images: list of :class:MaskedImage objects
-        A list of the loaded images.
-
-    Raises
-    ------
-    ValueError
-        Invalid path given
-    ValueError
-        No {files_extension} files in given path
-    """
-    # check input options
-    if db_loading_options is None:
-        db_loading_options = {}
-
-    # check given path
-    database_path = os.path.abspath(os.path.expanduser(database_path))
-    if os.path.isdir(database_path) is not True:
-        raise ValueError('Invalid path given')
-
-    # create final path
-    final_path = os.path.join(database_path, '*')
-
-    # get options
-    crop_proportion = db_loading_options.pop('crop_proportion', 0.5)
-    convert_to_grey = db_loading_options.pop('convert_to_grey', True)
-
-    # load images
-    images = []
-    for i in mio.import_images(final_path, verbose=verbose):
-        # If we have bounding boxes then we need to make sure we crop to them!
-        # If we don't crop to the bounding box then we might crop out part of
-        # the image the bounding box belongs to.
-        landmark_group_label = None
-        if bounding_boxes is not None:
-            fname = i.path.name
-            landmark_group_label = 'bbox'
-            i.landmarks[landmark_group_label] = bounding_boxes[fname].detector
-
-        # crop image
-        i.crop_to_landmarks_proportion_inplace(crop_proportion,
-                                               group=landmark_group_label)
-
-        # convert it to greyscale if needed
-        if convert_to_grey and i.n_channels == 3:
-            i = i.as_greyscale(mode='luminosity')
-
-        # append it to the list
-        images.append(i)
-    if verbose:
-        print("\nAssets loaded.")
-    return images
-
-
-def convert_fitting_results_to_ced(fitting_results, max_error_bin=0.05,
-                                   bins_error_step=0.005, error_type='me_norm'):
-    r"""
-    Method that given a fitting_result object, it converts it to the
-    cumulative error distribution values that can be used for plotting.
-
-    Parameters
-    ----------
-    fitting_results: :class:menpo.fit.fittingresult.FittingResultList object
-        A list with the FittingResult object per image.
-    max_error_bin: float, Optional
-        The maximum error of the distribution.
-
-        Default: 0.05
-    bins_error_step: float, Optional
-        The sampling step of the distribution values.
-
-        Default: 0.005
-
-    Returns
-    -------
-    final_error_dist: list
-        Cumulative distribution values of the final errors.
-    initial_error_dist: list
-        Cumulative distribution values of the initial errors.
-    """
-    error_bins = np.arange(0., max_error_bin + bins_error_step,
-                           bins_error_step)
-    final_error = [f.final_error(error_type=error_type)
-                   for f in fitting_results]
-    initial_error = [f.initial_error(error_type=error_type)
-                     for f in fitting_results]
-
-    final_error_dist = np.array(
-        [float(np.sum(final_error <= k)) /
-         len(final_error) for k in error_bins])
-    initial_error_dist = np.array(
-        [float(np.sum(initial_error <= k)) /
-         len(final_error) for k in error_bins])
-    return final_error_dist, initial_error_dist, error_bins
-
-
-def plot_fitting_curves(x_axis, ceds, title, figure_id=None, new_figure=False,
-                        x_label='Point-to-Point Normalized RMS Error',
-                        y_limit=1, x_limit=0.05, legend_entries=None, **kwargs):
-    r"""
-    Method that plots Cumulative Error Distributions in a single figure.
-
-    Parameters
-    ----------
-    x_axis : ndarray
-        The horizontal axis values (errors).
-    ceds : list of ndarrays
-        The vertical axis values (percentages).
-    title : string
-        The plot title.
-    figure_id : Optional
-        A figure handle.
-    new_figure : boolean, Optional
-        If True, a new figure window will be created.
-    y_limit : float, Optional
-        The maximum value of the vertical axis.
-    x_limit : float, Optional
-        The maximum value of the vertical axis.
-    x_label : string
-        The label of the horizontal axis.
-    legend_entries : list of strings or None
-        The legend of the plot. If None, the legend will include an incremental
-        number per curve.
-
-    Returns
-    -------
-    final_error_dist : list
-        Cumulative distribution values of the final errors.
-    initial_error_dist : list
-        Cumulative distribution values of the initial errors.
-    """
-    if legend_entries is None:
-        legend_entries = [str(i + 1) for i in range(len(ceds))]
-    y_label = 'Proportion of images'
-    x_axis_limits = [0, x_limit]
-    y_axis_limits = [0, y_limit]
-    return GraphPlotter(figure_id, new_figure, x_axis, ceds, title=title,
-                        legend_entries=legend_entries, x_label=x_label,
-                        y_label=y_label,
-                        x_axis_limits=x_axis_limits,
-                        y_axis_limits=y_axis_limits).render(**kwargs)
diff --git a/menpofit/benchmark/io.py b/menpofit/benchmark/io.py
deleted file mode 100644
index c9b9ff4..0000000
--- a/menpofit/benchmark/io.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import urllib2
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-import os
-import scipy.io as sio
-import glob
-import tempfile
-import shutil
-import zipfile
-from collections import namedtuple
-
-# Container for bounding box
-from menpo.shape import PointCloud
-
-BoundingBox = namedtuple('BoundingBox', ['detector', 'groundtruth'])
-# Where the bounding boxes should be fetched from
-bboxes_url = 'http://ibug.doc.ic.ac.uk/media/uploads/competitions/bounding_boxes.zip'
-
-
-def download_ibug_bounding_boxes(path=None, verbose=False):
-    r"""Downloads the bounding box information provided on the iBUG website
-    and unzips it to the path.
-    
-    Parameters
-    ----------
-    path : `str`, optional
-        The path that the bounding box files should be extracted to.
-        If None, the current directory will be used.
-    """
-    if path is None:
-        path = os.getcwd()
-    else:
-        path = os.path.abspath(os.path.expanduser(path))
-    if verbose:
-        print('Acquiring bounding box information from iBUG website...')
-    try:
-        remotezip = urllib2.urlopen(bboxes_url)
-        zipinmemory = StringIO(remotezip.read())
-        ziplocal = zipfile.ZipFile(zipinmemory)
-    except Exception as e:
-        print('Unable to grab bounding boxes (are you online?)')
-        raise e
-    if verbose:
-        print('Extracting to {}'.format(os.path.join(path, 'Bounding Boxes')))
-    try:
-        ziplocal.extractall(path=path)
-        if verbose:
-            print('Done.')
-    except Exception as e:
-        if verbose:
-            print('Unable to save.'.format(e))
-        raise e
-
-
-def import_bounding_boxes(boxes_path):
-    r"""
-    Imports the bounding boxes at boxes_path, returning a dict
-    where the key is a filename and the value is a BoundingBox.
-    
-    Parameters
-    ----------
-    boxes_path : str
-        A path to a bounding box .mat file downloaded from the
-        iBUG website.
-        
-    Returns
-    -------
-    dict:
-        Mapping of filenames to bounding boxes
-
-    """
-    bboxes_mat = sio.loadmat(boxes_path)
-    bboxes = {}
-    for bb in bboxes_mat['bounding_boxes'][0, :]:
-        fname, detector_bb, gt_bb = bb[0, 0]
-        bboxes[str(fname[0])] = BoundingBox(
-            PointCloud(detector_bb.reshape([2, 2])[:, ::-1]),
-            PointCloud(gt_bb.reshape([2, 2])[:, ::-1]))
-    return bboxes
-
-
-def import_all_bounding_boxes(boxes_dir_path=None, verbose=True):
-    r"""
-    Imports all the bounding boxes contained in boxes_dir_path.
-    If the path is False, the bounding boxes are downloaded from the
-    iBUG website directly.
-    
-    
-    """
-    temp_path = None
-    if boxes_dir_path is None:
-        print('No path provided - acuqiring zip to tmp dir...')
-        temp_path = tempfile.mkdtemp()
-        download_ibug_bounding_boxes(path=temp_path, verbose=verbose)
-        boxes_dir_path = os.path.join(temp_path, 'Bounding Boxes')
-    prefix = 'bounding_boxes_'
-    bbox_paths = glob.glob(os.path.join(boxes_dir_path, prefix + '*.mat'))
-    bboxes = {}
-    for bbox_path in bbox_paths:
-        db = os.path.splitext(os.path.split(bbox_path)[-1])[0][len(prefix):]
-        if verbose:
-            print('Importing {}'.format(db))
-        bboxes[db] = import_bounding_boxes(bbox_path)
-    if verbose:
-        print('Cleaning up...')
-    if temp_path:
-        # If we downloaded, clean it up!
-        shutil.rmtree(temp_path)
-    if verbose:
-        print('Done.')
-    return bboxes
-
diff --git a/menpofit/benchmark/predefined.py b/menpofit/benchmark/predefined.py
deleted file mode 100644
index 3780f41..0000000
--- a/menpofit/benchmark/predefined.py
+++ /dev/null
@@ -1,779 +0,0 @@
-from menpo.landmark import ibug_face_68_trimesh
-from menpo.feature import sparse_hog, igo
-
-from menpofit.lucaskanade import AIC
-from menpofit.transform import OrthoMDTransform, DifferentiablePiecewiseAffine
-from menpofit.modelinstance import OrthoPDM
-from menpofit.gradientdescent import RLMS
-from menpofit.clm.classifier import linear_svm_lr
-
-from .io import import_bounding_boxes
-from .base import (aam_build_benchmark, aam_fit_benchmark,
-                   clm_build_benchmark, clm_fit_benchmark,
-                   sdm_build_benchmark, sdm_fit_benchmark,
-                   load_database, convert_fitting_results_to_ced,
-                   plot_fitting_curves)
-
-
-def aam_fastest_alternating_noise(training_db_path, fitting_db_path,
-                                  features=igo, noise_std=0.04,
-                                  verbose=False, plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.2,
-                          'convert_to_grey': True
-                          }
-    training_options = {'group': 'PTS',
-                        'features': igo,
-                        'transform': DifferentiablePiecewiseAffine,
-                        'trilist': ibug_face_68_trimesh,
-                        'normalization_diagonal': None,
-                        'n_levels': 3,
-                        'downscale': 2,
-                        'scaled_shape_models': True,
-                        'max_shape_components': 25,
-                        'max_appearance_components': 250,
-                        'boundary': 3
-                        }
-    fitting_options = {'algorithm': AIC,
-                       'md_transform': OrthoMDTransform,
-                       'n_shape': [3, 6, 12],
-                       'n_appearance': 50,
-                       'max_iters': 50,
-                       'error_type': 'me_norm'
-                       }
-    perturb_options = {'noise_std': 0.04,
-                       'rotation': False}
-
-    # set passed parameters
-    training_options['features'] = features
-    perturb_options['noise_std'] = noise_std
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    aam = aam_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   verbose=verbose)
-    fitting_results = aam_fit_benchmark(fitting_images, aam,
-                                        perturb_options=perturb_options,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "AAMs using {} and Alternating IC".format(
-            training_options['features'].__name__)
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def aam_fastest_alternating_bbox(training_db_path, fitting_db_path,
-                                 fitting_bboxes_path, features=igo,
-                                 verbose=False, plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.1,
-                          'convert_to_grey': True
-    }
-    training_options = {'group': 'PTS',
-                        'features': [igo] * 3,
-                        'transform': DifferentiablePiecewiseAffine,
-                        'trilist': ibug_face_68_trimesh,
-                        'normalization_diagonal': None,
-                        'n_levels': 3,
-                        'downscale': 2,
-                        'scaled_shape_models': True,
-                        'max_shape_components': 25,
-                        'max_appearance_components': 250,
-                        'boundary': 3
-    }
-    fitting_options = {'algorithm': AIC,
-                       'md_transform': OrthoMDTransform,
-                       'n_shape': [3, 6, 12],
-                       'n_appearance': 50,
-                       'max_iters': 50,
-                       'error_type': 'me_norm'
-    }
-
-    # set passed parameters
-    training_options['features'] = features
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    aam = aam_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-
-    # import bounding boxes
-    bboxes_list = import_bounding_boxes(fitting_bboxes_path)
-
-    # for all fittings, we crop to 0.5
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   bounding_boxes=bboxes_list,
-                                   verbose=verbose)
-
-    fitting_results = aam_fit_benchmark(fitting_images, aam,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "AAMs using {} and Alternating IC".format(
-            training_options['features'].__name__)
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def aam_best_performance_alternating_noise(training_db_path, fitting_db_path,
-                                           features=igo, noise_std=0.04,
-                                           verbose=False, plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.2,
-                          'convert_to_grey': True
-                          }
-    training_options = {'group': 'PTS',
-                        'features': igo,
-                        'transform': DifferentiablePiecewiseAffine,
-                        'trilist': ibug_face_68_trimesh,
-                        'normalization_diagonal': None,
-                        'n_levels': 3,
-                        'downscale': 1.2,
-                        'scaled_shape_models': False,
-                        'max_shape_components': 25,
-                        'max_appearance_components': 250,
-                        'boundary': 3
-                        }
-    fitting_options = {'algorithm': AIC,
-                       'md_transform': OrthoMDTransform,
-                       'n_shape': [3, 6, 12],
-                       'n_appearance': 50,
-                       'max_iters': 50,
-                       'error_type': error_type
-                       }
-    perturb_options = {'noise_std': 0.04,
-                       'rotation': False}
-
-    # set passed parameters
-    training_options['features'] = features
-    perturb_options['noise_std'] = noise_std
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    aam = aam_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   verbose=verbose)
-    fitting_results = aam_fit_benchmark(fitting_images, aam,
-                                        perturb_options=perturb_options,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "AAMs using {} and Alternating IC".format(
-            training_options['features'].__name__)
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def aam_best_performance_alternating_bbox(training_db_path, fitting_db_path,
-                                          fitting_bboxes_path,
-                                          features=igo, verbose=False,
-                                          plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.5,
-                          'convert_to_grey': True
-    }
-    training_options = {'group': 'PTS',
-                        'features': igo,
-                        'transform': DifferentiablePiecewiseAffine,
-                        'trilist': ibug_face_68_trimesh,
-                        'normalization_diagonal': 200,
-                        'n_levels': 3,
-                        'downscale': 2,
-                        'scaled_shape_models': True,
-                        'max_shape_components': 25,
-                        'max_appearance_components': 100,
-                        'boundary': 3
-    }
-    fitting_options = {'algorithm': AIC,
-                       'md_transform': OrthoMDTransform,
-                       'n_shape': [3, 6, 12],
-                       'n_appearance': 50,
-                       'max_iters': 50,
-                       'error_type': error_type
-    }
-
-    # set passed parameters
-    training_options['features'] = features
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    aam = aam_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-
-    # import bounding boxes
-    bboxes_list = import_bounding_boxes(fitting_bboxes_path)
-
-    # for all fittings, we crop to 0.5
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   bounding_boxes=bboxes_list,
-                                   verbose=verbose)
-
-    fitting_results = aam_fit_benchmark(fitting_images, aam,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "AAMs using {} and Alternating IC".format(
-            training_options['features'].__name__)
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def clm_basic_noise(training_db_path,  fitting_db_path,
-                    features=sparse_hog, classifier_trainers=linear_svm_lr,
-                    noise_std=0.04, verbose=False, plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.4,
-                          'convert_to_grey': True
-                          }
-    training_options = {'group': 'PTS',
-                        'classifier_trainers': linear_svm_lr,
-                        'patch_shape': (5, 5),
-                        'features': [sparse_hog] * 3,
-                        'normalization_diagonal': None,
-                        'n_levels': 3,
-                        'downscale': 1.1,
-                        'scaled_shape_models': True,
-                        'max_shape_components': None,
-                        'boundary': 3
-                        }
-    fitting_options = {'algorithm': RLMS,
-                       'pdm_transform': OrthoPDM,
-                       'n_shape': [3, 6, 12],
-                       'max_iters': 50,
-                       'error_type': error_type
-                       }
-    perturb_options = {'noise_std': 0.01,
-                       'rotation': False}
-
-    # set passed parameters
-    training_options['features'] = features
-    training_options['classifier_trainers'] = classifier_trainers
-    perturb_options['noise_std'] = noise_std
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    clm = clm_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   verbose=verbose)
-    fitting_results = clm_fit_benchmark(fitting_images, clm,
-                                        perturb_options=perturb_options,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "CLMs with {} and {} classifier using RLMS".format(
-            training_options['features'].__name__,
-            training_options['classifier_trainers'])
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def clm_basic_bbox(training_db_path,  fitting_db_path, fitting_bboxes_path,
-                   features=sparse_hog, classifier_trainers=linear_svm_lr,
-                   verbose=False, plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.5,
-                          'convert_to_grey': True
-    }
-    training_options = {'group': 'PTS',
-                        'classifier_trainers': linear_svm_lr,
-                        'patch_shape': (5, 5),
-                        'features': [sparse_hog] * 3,
-                        'normalization_diagonal': None,
-                        'n_levels': 3,
-                        'downscale': 1.1,
-                        'scaled_shape_models': True,
-                        'max_shape_components': None,
-                        'boundary': 3
-    }
-    fitting_options = {'algorithm': RLMS,
-                       'pdm_transform': OrthoPDM,
-                       'n_shape': [3, 6, 12],
-                       'max_iters': 50,
-                       'error_type': error_type
-    }
-
-    # set passed parameters
-    training_options['features'] = features
-    training_options['classifier_trainers'] = classifier_trainers
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-
-    clm = clm_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-
-    # import bounding boxes
-    bboxes_list = import_bounding_boxes(fitting_bboxes_path)
-
-    # for all fittings, we crop to 0.5
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   bounding_boxes=bboxes_list,
-                                   verbose=verbose)
-
-    fitting_results = clm_fit_benchmark(fitting_images, clm,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "CLMs with {} and {} classifier using RLMS".format(
-            training_options['features'].__name__,
-            training_options['classifier_trainers'])
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def sdm_fastest_bbox(training_db_path, fitting_db_path,
-                                 fitting_bboxes_path, features=None,
-                                 verbose=False, plot=False):
-
-    # predefined options
-    error_type = 'me_norm'
-    db_loading_options = {'crop_proportion': 0.8,
-                          'convert_to_grey': True
-    }
-    training_options = {'group': 'PTS',
-                        'normalization_diagonal': 200,
-                        'n_levels': 4,
-                        'downscale': 1.01,
-                        'noise_std': 0.08,
-                        'patch_shape': (16, 16),
-                        'n_perturbations': 15,
-    }
-    fitting_options = {
-                       'error_type': error_type
-    }
-
-    # run experiment
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    sdm = sdm_build_benchmark(training_images,
-                              training_options=training_options,
-                              verbose=verbose)
-
-    # import bounding boxes
-    bboxes_list = import_bounding_boxes(fitting_bboxes_path)
-
-    # for all fittings, we crop to 0.5
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   bounding_boxes=bboxes_list,
-                                   verbose=verbose)
-
-    fitting_results = sdm_fit_benchmark(fitting_images, sdm,
-                                        fitting_options=fitting_options,
-                                        verbose=verbose)
-
-    # convert results
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    final_error_curve, initial_error_curve, error_bins = \
-        convert_fitting_results_to_ced(fitting_results,
-                                       max_error_bin=max_error_bin,
-                                       bins_error_step=bins_error_step,
-                                       error_type=error_type)
-
-    # plot results
-    if plot:
-        title = "SDMs using default (sparse hogs)".format(
-            training_options['features'].__name__)
-        y_axis = [final_error_curve, initial_error_curve]
-        legend = ['Fitting', 'Initialization']
-        plot_fitting_curves(error_bins, y_axis, title, new_figure=True,
-                            x_limit=max_error_bin, legend_entries=legend,
-                            line_colour=['r', 'b'],
-                            marker_face_colour=['r', 'b'],
-                            marker_style=['o', 'x'])
-    return fitting_results, final_error_curve, initial_error_curve, error_bins
-
-
-def aam_params_combinations_noise(training_db_path, fitting_db_path,
-                                  n_experiments=1, features=None,
-                                  scaled_shape_models=None,
-                                  n_shape=None,
-                                  n_appearance=None, noise_std=None,
-                                  rotation=None, verbose=False, plot=False):
-
-    # parse input
-    if features is None:
-        features = [igo] * n_experiments
-    elif len(features) is not n_experiments:
-        raise ValueError("features has wrong length")
-    if scaled_shape_models is None:
-        scaled_shape_models = [True] * n_experiments
-    elif len(scaled_shape_models) is not n_experiments:
-        raise ValueError("scaled_shape_models has wrong length")
-    if n_shape is None:
-        n_shape = [[3, 6, 12]] * n_experiments
-    elif len(n_shape) is not n_experiments:
-        raise ValueError("n_shape has wrong length")
-    if n_appearance is None:
-        n_appearance = [50] * n_experiments
-    elif len(n_appearance) is not n_experiments:
-        raise ValueError("n_appearance has wrong length")
-    if noise_std is None:
-        noise_std = [0.04] * n_experiments
-    elif len(noise_std) is not n_experiments:
-        raise ValueError("noise_std has wrong length")
-    if rotation is None:
-        rotation = [False] * n_experiments
-    elif len(rotation) is not n_experiments:
-        raise ValueError("rotation has wrong length")
-
-    # load images
-    db_loading_options = {'crop_proportion': 0.1,
-                          'convert_to_grey': True
-                          }
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   verbose=verbose)
-
-    # run experiments
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    curves_to_plot = []
-    all_fitting_results = []
-    for i in range(n_experiments):
-        if verbose:
-            print("\nEXPERIMENT {}/{}:".format(i + 1, n_experiments))
-            print("- features: {}\n- scaled_shape_models: {}\n"
-                  "- n_shape: {}\n"
-                  "- n_appearance: {}\n- noise_std: {}\n"
-                  "- rotation: {}".format(
-                  features[i], scaled_shape_models[i],
-                  n_shape[i], n_appearance[i], noise_std[i], rotation[i]))
-
-        # predefined option dictionaries
-        error_type = 'me_norm'
-        training_options = {'group': 'PTS',
-                            'features': igo,
-                            'transform': DifferentiablePiecewiseAffine,
-                            'trilist': ibug_face_68_trimesh,
-                            'normalization_diagonal': None,
-                            'n_levels': 3,
-                            'downscale': 1.1,
-                            'scaled_shape_models': True,
-                            'max_shape_components': 25,
-                            'max_appearance_components': 250,
-                            'boundary': 3
-                            }
-        fitting_options = {'algorithm': AIC,
-                           'md_transform': OrthoMDTransform,
-                           'n_shape': [3, 6, 12],
-                           'n_appearance': 50,
-                           'max_iters': 50,
-                           'error_type': error_type
-                           }
-        pertrub_options = {'noise_std': 0.04,
-                           'rotation': False}
-
-        # training
-        training_options['features'] = features[i]
-        training_options['scaled_shape_models'] = scaled_shape_models[i]
-        aam = aam_build_benchmark(training_images,
-                                  training_options=training_options,
-                                  verbose=verbose)
-
-        # fitting
-        fitting_options['n_shape'] = n_shape[i]
-        fitting_options['n_appearance'] = n_appearance[i]
-        pertrub_options['noise_std'] = noise_std[i]
-        pertrub_options['rotation'] = rotation[i]
-        fitting_results = aam_fit_benchmark(fitting_images, aam,
-                                            perturb_options=pertrub_options,
-                                            fitting_options=fitting_options,
-                                            verbose=verbose)
-        all_fitting_results.append(fitting_results)
-
-        # convert results
-        final_error_curve, initial_error_curve, error_bins = \
-            convert_fitting_results_to_ced(
-                fitting_results, max_error_bin=max_error_bin,
-                bins_error_step=bins_error_step,
-                error_type=error_type)
-        curves_to_plot.append(final_error_curve)
-        if i == n_experiments - 1:
-            curves_to_plot.append(initial_error_curve)
-
-    # plot results
-    if plot:
-        title = "AAMs using Alternating IC"
-        colour_list = ['r', 'b', 'g', 'y', 'c'] * n_experiments
-        marker_list = ['o', 'x', 'v', 'd'] * n_experiments
-        plot_fitting_curves(error_bins, curves_to_plot, title, new_figure=True,
-                            x_limit=max_error_bin,  line_colour=colour_list,
-                            marker_face_colour=colour_list,
-                            marker_style=marker_list)
-    return all_fitting_results
-
-
-def clm_params_combinations_noise(training_db_path, fitting_db_path,
-                                  n_experiments=1, classifier_trainers=None,
-                                  patch_shape=None, features=None,
-                                  scaled_shape_models=None, n_shape=None,
-                                  noise_std=None, rotation=None, verbose=False,
-                                  plot=False):
-
-    # parse input
-    if classifier_trainers is None:
-        classifier_trainers = [linear_svm_lr] * n_experiments
-    elif len(classifier_trainers) is not n_experiments:
-        raise ValueError("classifier_trainers has wrong length")
-    if patch_shape is None:
-        patch_shape = [(5, 5)] * n_experiments
-    elif len(patch_shape) is not n_experiments:
-        raise ValueError("patch_shape has wrong length")
-    if features is None:
-        features = [igo] * n_experiments
-    elif len(features) is not n_experiments:
-        raise ValueError("features has wrong length")
-    if scaled_shape_models is None:
-        scaled_shape_models = [True] * n_experiments
-    elif len(scaled_shape_models) is not n_experiments:
-        raise ValueError("scaled_shape_models has wrong length")
-    if n_shape is None:
-        n_shape = [[3, 6, 12]] * n_experiments
-    elif len(n_shape) is not n_experiments:
-        raise ValueError("n_shape has wrong length")
-    if noise_std is None:
-        noise_std = [0.04] * n_experiments
-    elif len(noise_std) is not n_experiments:
-        raise ValueError("noise_std has wrong length")
-    if rotation is None:
-        rotation = [False] * n_experiments
-    elif len(rotation) is not n_experiments:
-        raise ValueError("rotation has wrong length")
-
-    # load images
-    db_loading_options = {'crop_proportion': 0.4,
-                          'convert_to_grey': True
-                          }
-    training_images = load_database(training_db_path,
-                                    db_loading_options=db_loading_options,
-                                    verbose=verbose)
-    fitting_images = load_database(fitting_db_path,
-                                   db_loading_options=db_loading_options,
-                                   verbose=verbose)
-
-    # run experiments
-    max_error_bin = 0.05
-    bins_error_step = 0.005
-    curves_to_plot = []
-    all_fitting_results = []
-    for i in range(n_experiments):
-        if verbose:
-            print("\nEXPERIMENT {}/{}:".format(i + 1, n_experiments))
-            print("- classifiers: {}\n- patch_shape: {}\n"
-                  "- features: {}\n- scaled_shape_models: {}\n"
-                  "- n_shape: {}\n"
-                  "- noise_std: {}\n- rotation: {}".format(
-                  classifier_trainers[i], patch_shape[i], features[i],
-                  scaled_shape_models[i], n_shape[i],
-                  noise_std[i], rotation[i]))
-
-        # predefined option dictionaries
-        error_type = 'me_norm'
-        training_options = {'group': 'PTS',
-                            'classifier_trainers': linear_svm_lr,
-                            'patch_shape': (5, 5),
-                            'features': sparse_hog,
-                            'normalization_diagonal': None,
-                            'n_levels': 3,
-                            'downscale': 1.1,
-                            'scaled_shape_models': False,
-                            'max_shape_components': None,
-                            'boundary': 3
-                            }
-        fitting_options = {'algorithm': RLMS,
-                           'pdm_transform': OrthoPDM,
-                           'n_shape': [3, 6, 12],
-                           'max_iters': 50,
-                           'error_type': error_type
-                           }
-        perturb_options = {'noise_std': 0.01,
-                           'rotation': False}
-
-        # training
-        training_options['classifier_trainers'] = classifier_trainers[i]
-        training_options['patch_shape'] = patch_shape[i]
-        training_options['features'] = features[i]
-        training_options['scaled_shape_models'] = scaled_shape_models[i]
-        clm = clm_build_benchmark(training_images,
-                                  training_options=training_options,
-                                  verbose=verbose)
-
-        # fitting
-        fitting_options['n_shape'] = n_shape[i]
-        perturb_options['noise_std'] = noise_std[i]
-        perturb_options['rotation'] = rotation[i]
-        fitting_results = clm_fit_benchmark(fitting_images, clm,
-                                            perturb_options=perturb_options,
-                                            fitting_options=fitting_options,
-                                            verbose=verbose)
-        all_fitting_results.append(fitting_results)
-
-        # convert results
-        final_error_curve, initial_error_curve, error_bins = \
-            convert_fitting_results_to_ced(
-                fitting_results, max_error_bin=max_error_bin,
-                bins_error_step=bins_error_step,
-                error_type=error_type)
-        curves_to_plot.append(final_error_curve)
-        if i == n_experiments - 1:
-            curves_to_plot.append(initial_error_curve)
-
-    # plot results
-    if plot:
-        title = "CLMs using RLMS"
-        colour_list = ['r', 'b', 'g', 'y', 'c'] * n_experiments
-        marker_list = ['o', 'x', 'v', 'd'] * n_experiments
-        plot_fitting_curves(error_bins, curves_to_plot, title, new_figure=True,
-                            x_limit=max_error_bin,  line_colour=colour_list,
-                            marker_face_colour=colour_list,
-                            marker_style=marker_list)
-    return all_fitting_results
diff --git a/menpofit/builder.py b/menpofit/builder.py
index f9a31fa..6fdeec1 100644
--- a/menpofit/builder.py
+++ b/menpofit/builder.py
@@ -1,12 +1,13 @@
 from __future__ import division
-import abc
+from functools import partial
 import numpy as np
-from menpo.shape import mean_pointcloud
+from menpo.shape import mean_pointcloud, PointCloud, TriMesh
+from menpo.image import Image, MaskedImage
+from menpo.feature import no_op
 from menpo.transform import Scale, Translation, GeneralizedProcrustesAnalysis
 from menpo.model.pca import PCAModel
-from menpo.visualize import print_dynamic, progress_bar_str
-
-from .base import is_pyramid_on_features
+from menpo.visualize import print_dynamic
+from menpofit.visualize import print_progress
 
 
 def compute_reference_shape(shapes, normalization_diagonal, verbose=False):
@@ -47,8 +48,21 @@ def compute_reference_shape(shapes, normalization_diagonal, verbose=False):
     return reference_shape
 
 
-def normalization_wrt_reference_shape(images, group, label,
-                                      normalization_diagonal, verbose=False):
+# TODO: document me!
+def rescale_images_to_reference_shape(images, group, reference_shape,
+                                      verbose=False):
+    r"""
+    """
+    wrap = partial(print_progress, prefix='- Normalizing images size',
+                   end_with_newline=False, verbose=verbose)
+
+    # Normalize the scaling of all images wrt the reference_shape size
+    normalized_images = [i.rescale_to_pointcloud(reference_shape, group=group)
+                         for i in wrap(images)]
+    return normalized_images
+
+
+def normalization_wrt_reference_shape(images, group, diagonal, verbose=False):
     r"""
     Function that normalizes the images sizes with respect to the reference
     shape (mean shape) scaling. This step is essential before building a
@@ -57,7 +71,7 @@ def normalization_wrt_reference_shape(images, group, label,
     The normalization includes:
     1) Computation of the reference shape as the mean shape of the images'
        landmarks.
-    2) Scaling of the reference shape using the normalization_diagonal.
+    2) Scaling of the reference shape using the diagonal.
     3) Rescaling of all the images so that their shape's scale is in
        correspondence with the reference shape's scale.
 
@@ -65,26 +79,19 @@ def normalization_wrt_reference_shape(images, group, label,
     ----------
     images : list of :class:`menpo.image.MaskedImage`
         The set of landmarked images to normalize.
-
     group : `str`
         The key of the landmark set that should be used. If None,
         and if there is only one set of landmarks, this set will be used.
-
-    label : `str`
-        The label of of the landmark manager that you wish to use. If no
-        label is passed, the convex hull of all landmarks is used.
-
-    normalization_diagonal: `int`
+    diagonal: `int`
         If int, it ensures that the mean shape is scaled so that the
         diagonal of the bounding box containing it matches the
-        normalization_diagonal value.
+        diagonal value.
         If None, the mean shape is not rescaled.
 
         Note that, because the reference frame is computed from the mean
         landmarks, this kwarg also specifies the diagonal length of the
         reference frame (provided that features computation does not change
         the image size).
-
     verbose : `bool`, Optional
         Flag that controls information and progress printing.
 
@@ -97,28 +104,185 @@ def normalization_wrt_reference_shape(images, group, label,
         A list with the normalized images.
     """
     # get shapes
-    shapes = [i.landmarks[group][label] for i in images]
+    shapes = [i.landmarks[group].lms for i in images]
 
     # compute the reference shape and fix its diagonal length
-    reference_shape = compute_reference_shape(shapes, normalization_diagonal,
-                                              verbose=verbose)
+    reference_shape = compute_reference_shape(shapes, diagonal, verbose=verbose)
 
     # normalize the scaling of all images wrt the reference_shape size
-    normalized_images = []
-    for c, i in enumerate(images):
-        if verbose:
-            print_dynamic('- Normalizing images size: {}'.format(
-                progress_bar_str((c + 1.) / len(images),
-                                 show_bar=False)))
-        normalized_images.append(i.rescale_to_reference_shape(
-            reference_shape, group=group, label=label))
-
-    if verbose:
-        print_dynamic('- Normalizing images size: Done\n')
+    normalized_images = rescale_images_to_reference_shape(
+        images, group, reference_shape, verbose=verbose)
     return reference_shape, normalized_images
 
 
-def build_shape_model(shapes, max_components):
+# TODO: document me!
+def compute_features(images, features, prefix='', verbose=False):
+    wrap = partial(print_progress,
+                   prefix='{}Computing feature space'.format(prefix),
+                   end_with_newline=not prefix, verbose=verbose)
+
+    return [features(i) for i in wrap(images)]
+
+
+# TODO: document me!
+def scale_images(images, scale, prefix='', verbose=False):
+    wrap = partial(print_progress,
+                   prefix='{}Scaling images'.format(prefix),
+                   end_with_newline=not prefix, verbose=verbose)
+
+    if not np.allclose(scale, 1):
+        return [i.rescale(scale) for i in wrap(images)]
+    else:
+        return images
+
+
+# TODO: document me!
+def warp_images(images, shapes, reference_frame, transform, prefix='',
+                verbose=None):
+    wrap = partial(print_progress,
+                   prefix='{}Warping images'.format(prefix),
+                   end_with_newline=not prefix, verbose=verbose)
+
+    warped_images = []
+    # Build a dummy transform, use set_target for efficiency
+    warp_transform = transform(reference_frame.landmarks['source'].lms,
+                               reference_frame.landmarks['source'].lms)
+    for i, s in wrap(zip(images, shapes)):
+        # Update Transform Target
+        warp_transform.set_target(s)
+        # warp images
+        warped_i = i.warp_to_mask(reference_frame.mask, warp_transform)
+        # attach reference frame landmarks to images
+        warped_i.landmarks['source'] = reference_frame.landmarks['source']
+        warped_images.append(warped_i)
+    return warped_images
+
+
+# TODO: document me!
+def extract_patches(images, shapes, patch_size, normalise_function=no_op,
+                    prefix='', verbose=False):
+    wrap = partial(print_progress,
+                   prefix='{}Extracting patches'.format(prefix),
+                   end_with_newline=not prefix, verbose=verbose)
+
+    parts_images = []
+    for i, s in wrap(zip(images, shapes)):
+        parts = i.extract_patches(s, patch_size=patch_size,
+                                  as_single_array=True)
+        parts = normalise_function(parts)
+        parts_images.append(Image(parts, copy=False))
+    return parts_images
+
+def build_reference_frame(landmarks, boundary=3, group='source'):
+    r"""
+    Builds a reference frame from a particular set of landmarks.
+
+    Parameters
+    ----------
+    landmarks : :map:`PointCloud`
+        The landmarks that will be used to build the reference frame.
+    boundary : `int`, optional
+        The number of pixels to be left as a safe margin on the boundaries
+        of the reference frame (has potential effects on the gradient
+        computation).
+    group : `string`, optional
+        Group that will be assigned to the provided set of landmarks on the
+        reference frame.
+
+    Returns
+    -------
+    reference_frame : :map:`Image`
+        The reference frame.
+    """
+    reference_frame = _build_reference_frame(landmarks, boundary=boundary,
+                                             group=group)
+    source_landmarks = reference_frame.landmarks['source'].lms
+    if isinstance(source_landmarks, TriMesh):
+        trilist = source_landmarks.trilist
+    else:
+        trilist = None
+
+    reference_frame.constrain_mask_to_landmarks(group=group, trilist=trilist)
+    return reference_frame
+
+
+def build_patch_reference_frame(landmarks, boundary=3, group='source',
+                                patch_size=(17, 17)):
+    r"""
+    Builds a reference frame from a particular set of landmarks.
+
+    Parameters
+    ----------
+    landmarks : :map:`PointCloud`
+        The landmarks that will be used to build the reference frame.
+
+    boundary : `int`, optional
+        The number of pixels to be left as a safe margin on the boundaries
+        of the reference frame (has potential effects on the gradient
+        computation).
+
+    group : `string`, optional
+        Group that will be assigned to the provided set of landmarks on the
+        reference frame.
+
+    patch_size : tuple of ints, optional
+        Tuple specifying the shape of the patches.
+
+    Returns
+    -------
+    patch_based_reference_frame : :map:`Image`
+        The patch based reference frame.
+    """
+    boundary = np.max(patch_size) + boundary
+    reference_frame = _build_reference_frame(landmarks, boundary=boundary,
+                                             group=group)
+
+    # mask reference frame
+    reference_frame.build_mask_around_landmarks(patch_size, group=group)
+
+    return reference_frame
+
+
+def _build_reference_frame(landmarks, boundary=3, group='source'):
+    # translate landmarks to the origin
+    minimum = landmarks.bounds(boundary=boundary)[0]
+    landmarks = Translation(-minimum).apply(landmarks)
+
+    resolution = landmarks.range(boundary=boundary)
+    reference_frame = MaskedImage.init_blank(resolution)
+    reference_frame.landmarks[group] = landmarks
+
+    return reference_frame
+
+
+# TODO: document me!
+def densify_shapes(shapes, reference_frame, transform):
+    # compute non-linear transforms
+    transforms = [transform(reference_frame.landmarks['source'].lms, s)
+                  for s in shapes]
+    # build dense shapes
+    dense_shapes = []
+    for (t, s) in zip(transforms, shapes):
+        warped_points = t.apply(reference_frame.mask.true_indices())
+        dense_shape = PointCloud(np.vstack((s.points, warped_points)))
+        dense_shapes.append(dense_shape)
+
+    return dense_shapes
+
+
+# TODO: document me!
+def align_shapes(shapes):
+    r"""
+    """
+    # centralize shapes
+    centered_shapes = [Translation(-s.centre()).apply(s) for s in shapes]
+    # align centralized shape using Procrustes Analysis
+    gpa = GeneralizedProcrustesAnalysis(centered_shapes)
+    return [s.aligned_source() for s in gpa.transforms]
+
+
+# TODO: rethink OrthoPDM, should this function be its constructor?
+def build_shape_model(shapes, max_components=None, prefix='', verbose=False):
     r"""
     Builds a shape model given a set of shapes.
 
@@ -137,38 +301,35 @@ def build_shape_model(shapes, max_components):
     shape_model: :class:`menpo.model.pca`
         The PCA shape model.
     """
-    # centralize shapes
-    centered_shapes = [Translation(-s.centre()).apply(s) for s in shapes]
-    # align centralized shape using Procrustes Analysis
-    gpa = GeneralizedProcrustesAnalysis(centered_shapes)
-    aligned_shapes = [s.aligned_source() for s in gpa.transforms]
-
+    if verbose:
+        print_dynamic('{}Building shape model'.format(prefix))
+    # compute aligned shapes
+    aligned_shapes = align_shapes(shapes)
     # build shape model
     shape_model = PCAModel(aligned_shapes)
     if max_components is not None:
         # trim shape model if required
         shape_model.trim_components(max_components)
+    return shape_model
 
+
+def increment_shape_model(shape_model, shapes, forgetting_factor=None,
+                          max_components=None, prefix='', verbose=False):
+    r"""
+    """
+    if verbose:
+        print_dynamic('{}Incrementing shape model'.format(prefix))
+    # compute aligned shapes
+    aligned_shapes = align_shapes(shapes)
+    # increment shape model
+    shape_model.increment(aligned_shapes, forgetting_factor=forgetting_factor)
+    if max_components is not None:
+        shape_model.trim_components(max_components)
     return shape_model
 
 
-class DeformableModelBuilder(object):
+class MenpoFitBuilderWarning(Warning):
     r"""
-    Abstract class with a set of functions useful to build a Deformable Model.
+    A warning that some part of building the model may cause issues.
     """
-    __metaclass__ = abc.ABCMeta
-
-    @abc.abstractmethod
-    def build(self, images, group=None, label=None):
-        r"""
-        Builds a Multilevel Deformable Model.
-        """
-
-    @property
-    def pyramid_on_features(self):
-        r"""
-        True if feature extraction happens once and then a gaussian pyramid
-        is taken. False if a gaussian pyramid is taken and then features are
-        extracted at each level.
-        """
-        return is_pyramid_on_features(self.features)
+    pass
diff --git a/menpofit/checks.py b/menpofit/checks.py
index 27b4e55..34c78b6 100644
--- a/menpofit/checks.py
+++ b/menpofit/checks.py
@@ -1,7 +1,48 @@
-from menpofit.base import is_pyramid_on_features
+import warnings
+from functools import partial
+import numpy as np
+from menpo.shape import TriMesh
+from menpo.transform import PiecewiseAffine
 
 
-def check_features(features, n_levels):
+def check_diagonal(diagonal):
+    r"""
+    Checks the diagonal length used to normalize the images' size that
+    must be >= 20.
+    """
+    if diagonal is not None and diagonal < 20:
+        raise ValueError("diagonal must be >= 20")
+    return diagonal
+
+
+def check_landmark_trilist(image, transform, group=None):
+    shape = image.landmarks[group].lms
+    check_trilist(shape, transform)
+
+
+def check_trilist(shape, transform):
+    if not isinstance(shape, TriMesh) and isinstance(transform,
+                                                     PiecewiseAffine):
+        warnings.warn('The given images do not have an explicit triangulation '
+                      'applied. A Delaunay Triangulation will be computed '
+                      'and used for warping. This may be suboptimal and cause '
+                      'warping artifacts.')
+
+
+# TODO: document me!
+def check_scales(scales):
+    if isinstance(scales, (int, float)):
+        return [scales]
+    elif len(scales) == 1 and isinstance(scales[0], (int, float)):
+        return list(scales)
+    elif len(scales) > 1:
+        return check_scales(scales[0]) + check_scales(scales[1:])
+    else:
+        raise ValueError("scales must be an int/float or a list/tuple of "
+                         "int/float")
+
+
+def check_features(features, n_scales):
     r"""
     Checks the feature type per level.
 
@@ -9,7 +50,7 @@ def check_features(features, n_levels):
     ----------
     features : callable or list of callables
         The features to apply to the images.
-    n_levels : int
+    n_scales : int
         The number of pyramid levels.
 
     Returns
@@ -17,85 +58,62 @@ def check_features(features, n_levels):
     feature_list : list
         A list of feature function.
     """
-    # Firstly, make sure we have a list of callables of the right length
-    if is_pyramid_on_features(features):
-        return features
+    if callable(features):
+        return [features] * n_scales
+    elif len(features) == 1 and np.alltrue([callable(f) for f in features]):
+        return list(features) * n_scales
+    elif len(features) == n_scales and np.alltrue([callable(f)
+                                                   for f in features]):
+        return list(features)
     else:
-        try:
-            all_callables = check_list_callables(features, n_levels,
-                                                 allow_single=False)
-        except ValueError:
-            raise ValueError("features must be a callable or a list of "
-                             "{} callables".format(n_levels))
-        return all_callables
-
-
-def check_list_callables(callables, n_callables, allow_single=True):
-    if not isinstance(callables, list):
-        if allow_single:
-            # expand to a list of callables for them
-            callables = [callables] * n_callables
-        else:
-            raise ValueError("Expected a list of callables "
-                             "(allow_single=False)")
-    # must have a list by now
-    for c in callables:
-        if not callable(c):
-            raise ValueError("All items must be callables")
-    if len(callables) != n_callables:
-        raise ValueError("List of callables must be {} "
-                         "long".format(n_callables))
-    return callables
-
-
-def check_n_levels(n_levels):
-    r"""
-    Checks the number of pyramid levels - must be int > 0.
-    """
-    if not isinstance(n_levels, int) or n_levels < 1:
-        raise ValueError("n_levels must be int > 0")
-
-
-def check_downscale(downscale):
-    r"""
-    Checks the downscale factor of the pyramid that must be >= 1.
-    """
-    if downscale < 1:
-        raise ValueError("downscale must be >= 1")
-
-
-def check_normalization_diagonal(normalization_diagonal):
-    r"""
-    Checks the diagonal length used to normalize the images' size that
-    must be >= 20.
-    """
-    if normalization_diagonal is not None and normalization_diagonal < 20:
-        raise ValueError("normalization_diagonal must be >= 20")
+        raise ValueError("features must be a callable or a list/tuple of "
+                         "callables with the same length as the number "
+                         "of scales")
 
 
-def check_boundary(boundary):
+# TODO: document me!
+def check_scale_features(scale_features, features):
     r"""
-    Checks the boundary added around the reference shape that must be
-    int >= 0.
     """
-    if not isinstance(boundary, int) or boundary < 0:
-        raise ValueError("boundary must be >= 0")
+    if np.alltrue([f == features[0] for f in features]):
+        return scale_features
+    else:
+        warnings.warn('scale_features has been automatically set to False '
+                      'because different types of features are used at each '
+                      'level.')
+        return False
+
+
+# TODO: document me!
+def check_patch_size(patch_size, n_scales):
+    if len(patch_size) == 2 and isinstance(patch_size[0], int):
+        return [patch_size] * n_scales
+    elif len(patch_size) == 1:
+        return check_patch_size(patch_size[0], 1)
+    elif len(patch_size) == n_scales:
+        l1 = check_patch_size(patch_size[0], 1)
+        l2 = check_patch_size(patch_size[1:], n_scales-1)
+        return l1 + l2
+    else:
+        raise ValueError("patch_size must be a list/tuple of int or a "
+                         "list/tuple of lit/tuple of int/float with the "
+                         "same length as the number of scales")
 
 
-def check_max_components(max_components, n_levels, var_name):
+def check_max_components(max_components, n_scales, var_name):
     r"""
     Checks the maximum number of components per level either of the shape
     or the appearance model. It must be None or int or float or a list of
-    those containing 1 or {n_levels} elements.
+    those containing 1 or {n_scales} elements.
     """
     str_error = ("{} must be None or an int > 0 or a 0 <= float <= 1 or "
                  "a list of those containing 1 or {} elements").format(
-        var_name, n_levels)
-    if not isinstance(max_components, list):
-        max_components_list = [max_components] * n_levels
+        var_name, n_scales)
+    if not isinstance(max_components, (list, tuple)):
+        max_components_list = [max_components] * n_scales
     elif len(max_components) == 1:
-        max_components_list = [max_components[0]] * n_levels
-    elif len(max_components) == n_levels:
+        max_components_list = [max_components[0]] * n_scales
+    elif len(max_components) == n_scales:
         max_components_list = max_components
     else:
         raise ValueError(str_error)
@@ -105,3 +123,81 @@ def check_max_components(max_components, n_levels, var_name):
                 if not isinstance(comp, float):
                     raise ValueError(str_error)
     return max_components_list
+
+
+# TODO: document me!
+def check_max_iters(max_iters, n_scales):
+    if type(max_iters) is int:
+        max_iters = [np.round(max_iters/n_scales)
+                     for _ in range(n_scales)]
+    elif len(max_iters) == 1 and n_scales > 1:
+        max_iters = [np.round(max_iters[0]/n_scales)
+                     for _ in range(n_scales)]
+    elif len(max_iters) != n_scales:
+        raise ValueError('max_iters can be integer, integer list '
+                         'containing 1 or {} elements or '
+                         'None'.format(n_scales))
+    return np.require(max_iters, dtype=np.int)
+
+
+# TODO: document me!
+def check_sampling(sampling, n_scales):
+    if (isinstance(sampling, (list, tuple)) and
+        np.alltrue([isinstance(s, (np.ndarray, np.int)) or sampling is None
+                    for s in sampling])):
+        if len(sampling) == 1:
+            return sampling * n_scales
+        elif len(sampling) == n_scales:
+            return sampling
+        else:
+            raise ValueError('A sampling list can only '
+                             'contain 1 element or {} '
+                             'elements'.format(n_scales))
+    elif isinstance(sampling, (np.ndarray, np.int)) or sampling is None:
+        return [sampling] * n_scales
+    else:
+        raise ValueError('sampling can be an integer or ndarray, '
+                         'a integer or ndarray list '
+                         'containing 1 or {} elements or '
+                         'None'.format(n_scales))
+
+
+def set_models_components(models, n_components):
+    if n_components is not None:
+        n_scales = len(models)
+        if type(n_components) is int or type(n_components) is float:
+            for am in models:
+                am.n_active_components = n_components
+        elif len(n_components) == 1 and n_scales > 1:
+            for am in models:
+                am.n_active_components = n_components[0]
+        elif len(n_components) == n_scales:
+            for am, n in zip(models, n_components):
+                am.n_active_components = n
+        else:
+            raise ValueError('n_components can be an integer or a float '
+                             'or None or a list containing 1 or {} of '
+                             'those'.format(n_scales))
+
+
+def check_algorithm_cls(algorithm_cls, n_scales, base_algorithm_cls):
+    r"""
+    """
+    if (isinstance(algorithm_cls, partial) and
+        base_algorithm_cls in algorithm_cls.func.mro()):
+        return [algorithm_cls] * n_scales
+    elif (isinstance(algorithm_cls, type) and
+          base_algorithm_cls in algorithm_cls.mro()):
+        return [algorithm_cls] * n_scales
+    elif len(algorithm_cls) == 1:
+        return check_algorithm_cls(algorithm_cls[0], n_scales,
+                                   base_algorithm_cls)
+    elif len(algorithm_cls) == n_scales:
+        return [check_algorithm_cls(a, 1, base_algorithm_cls)[0]
+                for a in algorithm_cls]
+    else:
+        raise ValueError("algorithm_cls must be a subclass of {} or a "
+                         "list/tuple of {} subclasses with the same length "
+                         "as the number of scales {}"
+                         .format(base_algorithm_cls, base_algorithm_cls,
+                                 n_scales))
diff --git a/menpofit/clm/__init__.py b/menpofit/clm/__init__.py
old mode 100755
new mode 100644
index 18186e9..9406f54
--- a/menpofit/clm/__init__.py
+++ b/menpofit/clm/__init__.py
@@ -1,3 +1,5 @@
 from .base import CLM
-from .builder import CLMBuilder
 from .fitter import GradientDescentCLMFitter
+from .algorithm import ActiveShapeModel, RegularisedLandmarkMeanShift
+from .expert import (
+    CorrelationFilterExpertEnsemble, IncrementalCorrelationFilterThinWrapper)
diff --git a/menpofit/clm/algorithm/__init__.py b/menpofit/clm/algorithm/__init__.py
new file mode 100644
index 0000000..a4ba38c
--- /dev/null
+++ b/menpofit/clm/algorithm/__init__.py
@@ -0,0 +1,3 @@
+from .gd import (
+    GradientDescentCLMAlgorithm, ActiveShapeModel,
+    RegularisedLandmarkMeanShift)
diff --git a/menpofit/clm/algorithm/gd.py b/menpofit/clm/algorithm/gd.py
new file mode 100644
index 0000000..a140f5c
--- /dev/null
+++ b/menpofit/clm/algorithm/gd.py
@@ -0,0 +1,238 @@
+from __future__ import division
+import numpy as np
+from menpofit.base import build_grid
+from menpofit.clm.result import CLMAlgorithmResult
+
+multivariate_normal = None  # expensive, from scipy.stats
+
+
+# TODO: document me!
+class GradientDescentCLMAlgorithm(object):
+    r"""
+    """
+
+
+# TODO: Document me!
+class ActiveShapeModel(GradientDescentCLMAlgorithm):
+    r"""
+    Active Shape Model (ASM) algorithm
+    """
+    def __init__(self, expert_ensemble, shape_model, gaussian_covariance=10,
+                 eps=10**-5):
+        # Set parameters
+        self.expert_ensemble, = expert_ensemble,
+        self.transform = shape_model
+        self.gaussian_covariance = gaussian_covariance
+        self.eps = eps
+        # Perform pre-computations
+        self._precompute()
+
+    def _precompute(self):
+        r"""
+        """
+        # Import multivariate normal distribution from scipy
+        global multivariate_normal
+        if multivariate_normal is None:
+            from scipy.stats import multivariate_normal  # expensive
+
+        # Build grid associated to size of the search space
+        search_size = self.expert_ensemble.search_size
+        self.half_search_size = np.round(
+            np.asarray(self.expert_ensemble.search_size) / 2)
+        self.search_grid = build_grid(search_size)[None, None]
+
+        # set rho2
+        self.rho2 = self.transform.model.noise_variance()
+
+        # Compute Gaussian-KDE grid
+        self.mvn = multivariate_normal(mean=np.zeros(2),
+                                       cov=self.gaussian_covariance)
+
+        # Compute shape model prior
+        sim_prior = np.zeros((4,))
+        pdm_prior = self.rho2 / self.transform.model.eigenvalues
+        self.rho2_inv_L = np.hstack((sim_prior, pdm_prior))
+
+        # Compute Jacobian
+        J = np.rollaxis(self.transform.d_dp(None), -1, 1)
+        self.J = J.reshape((-1, J.shape[-1]))
+        # Compute inverse Hessian
+        self.JJ = self.J.T.dot(self.J)
+        # Compute Jacobian pseudo-inverse
+        self.pinv_J = np.linalg.solve(self.JJ, self.J.T)
+        self.inv_JJ_prior = np.linalg.inv(self.JJ + np.diag(self.rho2_inv_L))
+
+    def run(self, image, initial_shape, max_iters=20, gt_shape=None,
+            map_inference=False):
+        r"""
+        """
+        # Initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # Initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Expectation-Maximisation loop
+        while k < max_iters and eps > self.eps:
+
+            target = self.transform.target
+            # Obtain all landmark positions l_i = (x_i, y_i) being considered
+            # ie all pixel positions in each landmark's search space
+            candidate_landmarks = (target.points[:, None, None, None, :] +
+                                   self.search_grid)
+
+            # Compute responses
+            responses = self.expert_ensemble.predict_probability(image, target)
+
+            # Approximate responses using isotropic Gaussian
+            max_indices = np.argmax(
+                responses.reshape(responses.shape[:2] + (-1,)), axis=-1)
+            max_indices = np.unravel_index(max_indices, responses.shape)[-2:]
+            max_indices = np.hstack((max_indices[0], max_indices[1]))
+            max_indices = max_indices[:, None, None, None, ...]
+            max_indices -= self.half_search_size
+            gaussian_responses = self.mvn.pdf(max_indices + self.search_grid)
+            # Normalise smoothed responses
+            gaussian_responses /= np.sum(gaussian_responses,
+                                         axis=(-2, -1))[..., None, None]
+
+            # Compute new target
+            new_target = np.sum(gaussian_responses[:, None, ..., None] *
+                                candidate_landmarks, axis=(-3, -2))
+
+            # Compute shape error term
+            error = target.as_vector() - new_target.ravel()
+
+            # Solve for increments on the shape parameters
+            if map_inference:
+                Je = (self.rho2_inv_L * self.transform.as_vector() -
+                      self.J.T.dot(error))
+                dp = -self.inv_JJ_prior.dot(Je)
+            else:
+                dp = self.pinv_J.dot(error)
+
+            # Update pdm
+            s_k = self.transform.target.points
+            self.transform.from_vector_inplace(self.transform.as_vector() + dp)
+            p_list.append(self.transform.as_vector())
+
+            # Test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # Increase iteration counter
+            k += 1
+
+        # Return algorithm result
+        return CLMAlgorithmResult(image, self.transform, p_list,
+                                  gt_shape=gt_shape)
+
+
+# TODO: Document me!
+class RegularisedLandmarkMeanShift(GradientDescentCLMAlgorithm):
+    r"""
+    Regularized Landmark Mean-Shift (RLMS) algorithm
+    """
+    def __init__(self, expert_ensemble, shape_model, kernel_covariance=10,
+                 eps=10**-5):
+        # Set parameters
+        self.expert_ensemble, = expert_ensemble,
+        self.transform = shape_model
+        self.kernel_covariance = kernel_covariance
+        self.eps = eps
+        # Perform pre-computations
+        self._precompute()
+
+    def _precompute(self):
+        r"""
+        """
+        # Import multivariate normal distribution from scipy
+        global multivariate_normal
+        if multivariate_normal is None:
+            from scipy.stats import multivariate_normal  # expensive
+
+        # Build grid associated to size of the search space
+        search_size = self.expert_ensemble.search_size
+        self.search_grid = build_grid(search_size)
+
+        # set rho2
+        self.rho2 = self.transform.model.noise_variance()
+
+        # Compute Gaussian-KDE grid
+        mvn = multivariate_normal(mean=np.zeros(2), cov=self.kernel_covariance)
+        self.kernel_grid = mvn.pdf(self.search_grid)[None, None]
+
+        # Compute shape model prior
+        sim_prior = np.zeros((4,))
+        pdm_prior = self.rho2 / self.transform.model.eigenvalues
+        self.rho2_inv_L = np.hstack((sim_prior, pdm_prior))
+
+        # Compute Jacobian
+        J = np.rollaxis(self.transform.d_dp(None), -1, 1)
+        self.J = J.reshape((-1, J.shape[-1]))
+        # Compute inverse Hessian
+        self.JJ = self.J.T.dot(self.J)
+        # Compute Jacobian pseudo-inverse
+        self.pinv_J = np.linalg.solve(self.JJ, self.J.T)
+        self.inv_JJ_prior = np.linalg.inv(self.JJ + np.diag(self.rho2_inv_L))
+
+    def run(self, image, initial_shape, max_iters=20, gt_shape=None,
+            map_inference=False):
+        r"""
+        """
+        # Initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        # Initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Expectation-Maximisation loop
+        while k < max_iters and eps > self.eps:
+
+            target = self.transform.target
+            # Obtain all landmark positions l_i = (x_i, y_i) being considered
+            # ie all pixel positions in each landmark's search space
+            candidate_landmarks = (target.points[:, None, None, None, :] +
+                                   self.search_grid)
+
+            # Compute patch responses
+            patch_responses = self.expert_ensemble.predict_probability(image,
+                                                                       target)
+
+            # Smooth responses using the Gaussian-KDE grid
+            patch_kernels = patch_responses * self.kernel_grid
+            # Normalise smoothed responses
+            patch_kernels /= np.sum(patch_kernels,
+                                    axis=(-2, -1))[..., None, None]
+
+            # Compute mean shift target
+            mean_shift_target = np.sum(patch_kernels[..., None] *
+                                       candidate_landmarks, axis=(-3, -2))
+
+            # Compute shape error term
+            error = mean_shift_target.ravel() - target.as_vector()
+
+            # Solve for increments on the shape parameters
+            if map_inference:
+                Je = (self.rho2_inv_L * self.transform.as_vector() -
+                      self.J.T.dot(error))
+                dp = -self.inv_JJ_prior.dot(Je)
+            else:
+                dp = self.pinv_J.dot(error)
+
+            # Update pdm
+            s_k = self.transform.target.points
+            self.transform.from_vector_inplace(self.transform.as_vector() + dp)
+            p_list.append(self.transform.as_vector())
+
+            # Test convergence
+            eps = np.abs(np.linalg.norm(s_k - self.transform.target.points))
+
+            # Increase iteration counter
+            k += 1
+
+        # Return algorithm result
+        return CLMAlgorithmResult(image, self, p_list, gt_shape=gt_shape)
diff --git a/menpofit/clm/algorithm/sd.py b/menpofit/clm/algorithm/sd.py
new file mode 100644
index 0000000..29d9bbc
--- /dev/null
+++ b/menpofit/clm/algorithm/sd.py
@@ -0,0 +1,9 @@
+
+
+# TODO: implement me!
+# TODO: document me!
+class SupervisedDescentCLMAlgorithm(object):
+    r"""
+    """
+    def __init__(self):
+        raise NotImplementedError
diff --git a/menpofit/clm/base.py b/menpofit/clm/base.py
index 3711a5d..780f874 100644
--- a/menpofit/clm/base.py
+++ b/menpofit/clm/base.py
@@ -1,321 +1,246 @@
-import numpy as np
-from menpo.image import Image
-
-from menpofit.base import DeformableModel
-
-
-class CLM(DeformableModel):
+from __future__ import division
+import warnings
+from menpo.feature import no_op
+from menpo.visualize import print_dynamic
+from menpofit import checks
+from menpofit.base import batch
+from menpofit.builder import (
+    compute_features, scale_images, build_shape_model, increment_shape_model,
+    MenpoFitBuilderWarning, compute_reference_shape,
+    rescale_images_to_reference_shape)
+from .expert import ExpertEnsemble, CorrelationFilterExpertEnsemble
+
+
+# TODO: Document me!
+# TODO: Introduce shape_model_cls
+# TODO: Get rid of max_shape_components and shape_forgetting_factor
+class CLM(object):
     r"""
-    Constrained Local Model class.
+    Constrained Local Model (CLM) class.
 
     Parameters
-    -----------
-    shape_models : :map:`PCAModel` list
-        A list containing the shape models of the CLM.
-
-    classifiers : ``[[callable]]``
-        A list containing the list of classifier callables per each pyramidal
-        level of the CLM.
-
-    n_training_images : `int`
-        The number of training images used to build the AAM.
-
-    patch_shape : tuple of `int`
-        The shape of the patches used to train the classifiers.
-
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-    reference_shape : :map:`PointCloud`
-        The reference shape that was used to resize all training images to a
-        consistent object size.
-
-    downscale : `float`
-        The downscale factor that was used to create the different pyramidal
-        levels.
-
-    scaled_shape_models : `boolean`, Optional
-        If ``True``, the reference frames are the mean shapes of each pyramid
-        level, so the shape models are scaled.
-
-        If ``False``, the reference frames of all levels are the mean shape of
-        the highest level, so the shape models are not scaled; they have the
-        same size.
+    ----------
 
+    Returns
+    -------
+    clm : :map:`CLM`
+        The CLM object
     """
-    def __init__(self, shape_models, classifiers, n_training_images,
-                 patch_shape, features, reference_shape, downscale,
-                 scaled_shape_models):
-        DeformableModel.__init__(self, features)
-        self.shape_models = shape_models
-        self.classifiers = classifiers
-        self.n_training_images = n_training_images
-        self.patch_shape = patch_shape
+    def __init__(self, images, group=None, verbose=False, batch_size=None,
+                 diagonal=None, scales=(0.5, 1), holistic_features=no_op,
+                 # shape_model_cls=build_normalised_pca_shape_model,
+                 expert_ensemble_cls=CorrelationFilterExpertEnsemble,
+                 max_shape_components=None, reference_shape=None,
+                 shape_forgetting_factor=1.0):
+        self.diagonal = checks.check_diagonal(diagonal)
+        self.scales = checks.check_scales(scales)
+        self.holistic_features = checks.check_features(holistic_features,
+                                                       self.n_scales)
+        # self.shape_model_cls = checks.check_algorithm_cls(
+        #     shape_model_cls, self.n_scales, ShapeModel)
+        self.expert_ensemble_cls = checks.check_algorithm_cls(
+            expert_ensemble_cls, self.n_scales, ExpertEnsemble)
+
+        self.max_shape_components = checks.check_max_components(
+            max_shape_components, self.n_scales, 'max_shape_components')
+        self.shape_forgetting_factor = shape_forgetting_factor
         self.reference_shape = reference_shape
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
+        self.shape_models = []
+        self.expert_ensembles = []
 
-    @property
-    def n_levels(self):
-        """
-        The number of multi-resolution pyramidal levels of the CLM.
-
-        :type: `int`
-        """
-        return len(self.shape_models)
+        # Train CLM
+        self._train(images, increment=False, group=group, verbose=verbose,
+                    batch_size=batch_size)
 
     @property
-    def n_classifiers_per_level(self):
-        """
-        The number of classifiers per pyramidal level of the CLM.
+    def n_scales(self):
+        r"""
+        The number of scales of the CLM.
 
         :type: `int`
         """
-        return [len(clf) for clf in self.classifiers]
+        return len(self.scales)
 
-    def instance(self, shape_weights=None, level=-1):
+    def _train(self, images, increment=False, group=None, verbose=False,
+               batch_size=None):
         r"""
-        Generates a novel CLM instance given a set of shape weights. If no
-        weights are provided, the mean CLM instance is returned.
-
-        Parameters
-        -----------
-        shape_weights : ``(n_weights,)`` `ndarray` or `float` list
-            Weights of the shape model that will be used to create
-            a novel shape instance. If `None`, the mean shape
-            ``(shape_weights = [0, 0, ..., 0])`` is used.
-
-        level : `int`, optional
-            The pyramidal level to be used.
-
-        Returns
-        -------
-        shape_instance : :map:`PointCloud`
-            The novel CLM instance.
         """
-        sm = self.shape_models[level]
-        # TODO: this bit of logic should to be transferred down to PCAModel
-        if shape_weights is None:
-            shape_weights = [0]
-        n_shape_weights = len(shape_weights)
-        shape_weights *= sm.eigenvalues[:n_shape_weights] ** 0.5
-        shape_instance = sm.instance(shape_weights)
-        return shape_instance
-
-    def random_instance(self, level=-1):
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        if batch_size is not None:
+            # Create a generator of fixed sized batches. Will still work even
+            # on an infinite list.
+            image_batches = batch(images, batch_size)
+        else:
+            image_batches = [list(images)]
+
+        for k, image_batch in enumerate(image_batches):
+            if k == 0:
+                if self.reference_shape is None:
+                    # If no reference shape was given, use the mean of the first
+                    # batch
+                    if batch_size is not None:
+                        warnings.warn('No reference shape was provided. The '
+                                      'mean of the first batch will be the '
+                                      'reference shape. If the batch mean is '
+                                      'not representative of the true mean, '
+                                      'this may cause issues.',
+                                      MenpoFitBuilderWarning)
+                    self.reference_shape = compute_reference_shape(
+                        [i.landmarks[group].lms for i in image_batch],
+                        self.diagonal, verbose=verbose)
+
+            # After the first batch, we are incrementing the model
+            if k > 0:
+                increment = True
+
+            if verbose:
+                print('Computing batch {}'.format(k))
+
+            # Train each batch
+            self._train_batch(image_batch, increment=increment, group=group,
+                              verbose=verbose)
+
+    def _train_batch(self, image_batch, increment=False, group=None,
+                     verbose=False):
         r"""
-        Generates a novel random CLM instance.
-
-        Parameters
-        -----------
-        level : `int`, optional
-            The pyramidal level to be used.
-
-        Returns
-        -------
-        shape_instance : :map:`PointCloud`
-            The novel CLM instance.
         """
-        sm = self.shape_models[level]
-        # TODO: this bit of logic should to be transferred down to PCAModel
-        shape_weights = (np.random.randn(sm.n_active_components) *
-                         sm.eigenvalues[:sm.n_active_components]**0.5)
-        shape_instance = sm.instance(shape_weights)
-        return shape_instance
-
-    def response_image(self, image, group=None, label=None, level=-1):
-        r"""
-        Generates a response image result of applying the classifiers of a
-        particular pyramidal level of the CLM to an image.
-
-        Parameters
-        -----------
-        image: :map:`Image`
-            The image.
-        group : `string`, optional
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-        label : `string`, optional
-            The label of of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-        level: `int`, optional
-            The pyramidal level to be used.
+        # normalize images
+        image_batch = rescale_images_to_reference_shape(
+            image_batch, group, self.reference_shape, verbose=verbose)
+
+        # build models at each scale
+        if verbose:
+            print_dynamic('- Training models\n')
+
+        # for each level (low --> high)
+        for i in range(self.n_scales):
+            if verbose:
+                if self.n_scales > 1:
+                    prefix = '  - Scale {}: '.format(j)
+                else:
+                    prefix = '  - '
+            else:
+                prefix = None
+
+            # Handle holistic features
+            if i == 0 and self.holistic_features[i] == no_op:
+                # Saves a lot of memory
+                feature_images = image_batch
+            elif i == 0 or self.holistic_features[i] is not self.holistic_features[i - 1]:
+                # compute features only if this is the first pass through
+                # the loop or the features at this scale are different from
+                # the features at the previous scale
+                feature_images = compute_features(image_batch,
+                                                  self.holistic_features[i],
+                                                  prefix=prefix,
+                                                  verbose=verbose)
+            # handle scales
+            if self.scales[i] != 1:
+                # scale feature images only if scale is different than 1
+                scaled_images = scale_images(feature_images,
+                                             self.scales[i],
+                                             prefix=prefix,
+                                             verbose=verbose)
+            else:
+                scaled_images = feature_images
 
-        Returns
-        -------
-        image : :map:`Image`
-            The response image.
-        """
-        # rescale image
-        image = image.rescale_to_reference_shape(self.reference_shape,
-                                                 group=group, label=label)
+            # extract scaled shapes
+            scaled_shapes = [image.landmarks[group].lms
+                             for image in scaled_images]
 
-        # apply pyramid
-        if self.n_levels > 1:
-            if self.pyramid_on_features:
-                # compute features at highest level
-                feature_image = self.features(image)
+            # train shape model
+            if verbose:
+                print_dynamic('{}Training shape model'.format(prefix))
 
-                # apply pyramid on feature image
-                pyramid = feature_image.gaussian_pyramid(
-                    n_levels=self.n_levels, downscale=self.downscale)
+            # TODO: This should be cleaned up by defining shape model classes
+            if increment:
+                increment_shape_model(
+                    self.shape_models[i], scaled_shapes,
+                    max_components=self.max_shape_components[i],
+                    forgetting_factor=self.shape_forgetting_factor,
+                    prefix=prefix, verbose=verbose)
 
-                # get rescaled feature images
-                images = list(pyramid)
             else:
-                # create pyramid on intensities image
-                pyramid = image.gaussian_pyramid(
-                    n_levels=self.n_levels, downscale=self.downscale)
-
-                # compute features at each level
-                images = [self.features[self.n_levels - j - 1](i)
-                          for j, i in enumerate(pyramid)]
-            images.reverse()
-        else:
-            images = [self.features(image)]
+                shape_model = build_shape_model(
+                    scaled_shapes, max_components=self.max_shape_components[i],
+                    prefix=prefix, verbose=verbose)
+                self.shape_models.append(shape_model)
+
+            # train expert ensemble
+            if verbose:
+                print_dynamic('{}Training expert ensemble'.format(prefix))
+
+            if increment:
+                self.expert_ensembles[i].increment(scaled_images,
+                                                   scaled_shapes,
+                                                   prefix=prefix,
+                                                   verbose=verbose)
+            else:
+                expert_ensemble = self.expert_ensemble_cls[i](scaled_images,
+                                                              scaled_shapes,
+                                                              prefix=prefix,
+                                                              verbose=verbose)
+                self.expert_ensembles.append(expert_ensemble)
 
-        # initialize responses
-        image = images[level]
-        image_pixels = np.reshape(image.pixels, (-1, image.n_channels))
-        response_data = np.zeros((image.shape[0], image.shape[1],
-                                  self.n_classifiers_per_level[level]))
-        # Compute responses
-        for j, clf in enumerate(self.classifiers[level]):
-            response_data[:, :, j] = np.reshape(clf(image_pixels),
-                                                image.shape)
-        return Image(image_data=response_data)
+            if verbose:
+                print_dynamic('{}Done\n'.format(prefix))
 
-    @property
-    def _str_title(self):
+    def increment(self, images, group=None, verbose=False, batch_size=None):
         r"""
-        Returns a string containing name of the model.
-
-        : str
         """
-        return 'Constrained Local Model'
+        return self._train(images, increment=True, group=group, verbose=verbose,
+                           batch_size=batch_size)
 
-    def view_shape_models_widget(self, n_parameters=5, mode='multiple',
+    def view_shape_models_widget(self, n_parameters=5,
                                  parameters_bounds=(-3.0, 3.0),
-                                 figure_size=(10, 8), style='coloured'):
+                                 mode='multiple', figure_size=(10, 8)):
         r"""
-        Visualizes the shape models of the CLM object using the
+        Visualizes the shape models of the AAM object using the
         `menpo.visualize.widgets.visualize_shape_model` widget.
 
         Parameters
         -----------
         n_parameters : `int` or `list` of `int` or ``None``, optional
-            The number of principal components to be used for the parameters
-            sliders. If `int`, then the number of sliders per level is the
-            minimum between `n_parameters` and the number of active components
-            per level. If `list` of `int`, then a number of sliders is defined
-            per level. If ``None``, all the active components per level will
-            have a slider.
-        mode : {``'single'``, ``'multiple'``}, optional
-            If ``'single'``, then only a single slider is constructed along with
-            a drop down menu. If ``'multiple'``, then a slider is constructed
-            for each parameter.
+            The number of shape principal components to be used for the
+            parameters sliders.
+            If `int`, then the number of sliders per level is the minimum
+            between `n_parameters` and the number of active components per
+            level.
+            If `list` of `int`, then a number of sliders is defined per level.
+            If ``None``, all the active components per level will have a slider.
         parameters_bounds : (`float`, `float`), optional
             The minimum and maximum bounds, in std units, for the sliders.
+        mode : {``single``, ``multiple``}, optional
+            If ``'single'``, only a single slider is constructed along with a
+            drop down menu.
+            If ``'multiple'``, a slider is constructed for each parameter.
+        popup : `bool`, optional
+            If ``True``, the widget will appear as a popup window.
         figure_size : (`int`, `int`), optional
             The size of the plotted figures.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
         """
         from menpofit.visualize import visualize_shape_model
-        visualize_shape_model(
-            self.shape_models, n_parameters=n_parameters,
-            parameters_bounds=parameters_bounds, figure_size=figure_size,
-            mode=mode, style=style)
+        visualize_shape_model(self.shape_models, n_parameters=n_parameters,
+                              parameters_bounds=parameters_bounds,
+                              figure_size=figure_size, mode=mode,)
 
-    def __str__(self):
-        from menpofit.base import name_of_callable
-        out = "{}\n - {} training images.\n".format(self._str_title,
-                                                    self.n_training_images)
-        # small strings about number of channels, channels string and downscale
-        down_str = []
-        for j in range(self.n_levels):
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        temp_img = Image(image_data=np.random.rand(50, 50))
-        if self.pyramid_on_features:
-            temp = self.features(temp_img)
-            n_channels = [temp.n_channels] * self.n_levels
-        else:
-            n_channels = []
-            for j in range(self.n_levels):
-                temp = self.features[j](temp_img)
-                n_channels.append(temp.n_channels)
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(
-                name_of_callable(self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
-        else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                feat_str.append("- Feature is {} with ".format(
-                    name_of_callable(self.features[j])))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
-                else:
-                    ch_str.append("channels")
-        if self.n_levels > 1:
-            if self.scaled_shape_models:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}.\n   - Each level has a scaled shape " \
-                      "model (reference frame).\n   - Patch size is {}W x " \
-                      "{}H.\n".format(out, self.n_levels, self.downscale,
-                                      self.patch_shape[1], self.patch_shape[0])
+    # TODO: Implement me!
+    def view_expert_ensemble_widget(self):
+        r"""
+        """
+        raise NotImplementedError
 
-            else:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}:\n   - Shape models (reference frames) " \
-                      "are not scaled.\n   - Patch size is {}W x " \
-                      "{}H.\n".format(out, self.n_levels, self.downscale,
-                                      self.patch_shape[1], self.patch_shape[0])
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
-            else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-            for i in range(self.n_levels - 1, -1, -1):
-                out = "{}   - Level {} {}: \n".format(out, self.n_levels - i,
-                                                      down_str[i])
-                if not self.pyramid_on_features:
-                    out = "{}     {}{} {} per image.\n".format(
-                        out, feat_str[i], n_channels[i], ch_str[i])
-                out = "{0}     - {1} shape components ({2:.2f}% of " \
-                      "variance)\n     - {3} {4} classifiers.\n".format(
-                    out, self.shape_models[i].n_components,
-                    self.shape_models[i].variance_ratio() * 100,
-                    self.n_classifiers_per_level[i],
-                    name_of_callable(self.classifiers[i][0]))
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n" \
-                  "   - {4} shape components ({5:.2f}% of " \
-                  "variance)\n   - {6} {7} classifiers.".format(
-                out, feat_str[0], n_channels[0], ch_str[0],
-                self.shape_models[0].n_components,
-                self.shape_models[0].variance_ratio() * 100,
-                self.n_classifiers_per_level[0],
-                name_of_callable(self.classifiers[0][0]))
-        return out
+    # TODO: Implement me!
+    def view_clm_widget(self):
+        r"""
+        """
+        raise NotImplementedError
+
+    # TODO: Implement me!
+    def __str__(self):
+        r"""
+        """
+        raise NotImplementedError
diff --git a/menpofit/clm/builder.py b/menpofit/clm/builder.py
deleted file mode 100644
index 4f05930..0000000
--- a/menpofit/clm/builder.py
+++ /dev/null
@@ -1,361 +0,0 @@
-from __future__ import division, print_function
-import numpy as np
-from menpo.feature import sparse_hog
-from menpo.visualize import print_dynamic, progress_bar_str
-
-from menpofit import checks
-from menpofit.base import create_pyramid, build_sampling_grid
-from menpofit.builder import (DeformableModelBuilder, build_shape_model,
-                              normalization_wrt_reference_shape)
-from .classifier import linear_svm_lr
-
-
-class CLMBuilder(DeformableModelBuilder):
-    r"""
-    Class that builds Multilevel Constrained Local Models.
-
-    Parameters
-    ----------
-    classifier_trainers : ``callable -> callable`` or ``[callable -> callable]``
-
-        Each ``classifier_trainers`` is a callable that will be invoked as:
-
-            classifer = classifier_trainer(X, t)
-
-        where X is a matrix of samples and t is a matrix of classifications
-        for each sample. `classifier` is then itself a callable,
-        which will be used to classify novel instance by the CLM.
-
-        If list of length ``n_levels``, then a classifier_trainer callable is
-        defined per level. The first element of the list specifies the
-        classifier_trainer to be used at the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified
-        classifier_trainer will be used for all levels.
-
-
-        Examples of such classifier trainers can be found in
-        `menpo.fitmultilevel.clm.classifier`
-
-    patch_shape : tuple of `int`
-        The shape of the patches used by the classifier trainers.
-
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-
-    normalization_diagonal : `int` >= ``20``, optional
-        During building an AAM, all images are rescaled to ensure that the
-        scale of their landmarks matches the scale of the mean shape.
-
-        If `int`, it ensures that the mean shape is scaled so that the diagonal
-        of the bounding box containing it matches the ``normalization_diagonal``
-        value.
-        If ``None``, the mean shape is not rescaled.
-
-        Note that, because the reference frame is computed from the mean
-        landmarks, this kwarg also specifies the diagonal length of the
-        reference frame (provided that features computation does not change
-        the image size).
-
-    n_levels : `int` > ``0``, optional
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale : `float` >= ``1``, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(n_levels)
-
-    scaled_shape_models : `boolean`, optional
-        If ``True``, the reference frames will be the mean shapes of each
-        pyramid level, so the shape models will be scaled.
-
-        If ``False``, the reference frames of all levels will be the mean shape
-        of the highest level, so the shape models will not be scaled; they will
-        have the same size.
-
-    max_shape_components : ``None`` or `int` > ``0`` or ``0`` <= `float` <= ``1`` or list of those, optional
-        If list of length ``n_levels``, then a number of shape components is
-        defined per level. The first element of the list specifies the number
-        of components of the lowest pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        shape components will be used for all levels.
-
-        Per level:
-            If `int`, it specifies the exact number of components to be
-            retained.
-
-            If `float`, it specifies the percentage of variance to be retained.
-
-            If ``None``, all the available components are kept
-            (100% of variance).
-
-    boundary : `int` >= ``0``, optional
-        The number of pixels to be left as a safe margin on the boundaries
-        of the reference frame (has potential effects on the gradient
-        computation).
-
-    Returns
-    -------
-    clm : :map:`CLMBuilder`
-        The CLM Builder object
-    """
-    def __init__(self, classifier_trainers=linear_svm_lr, patch_shape=(5, 5),
-                 features=sparse_hog, normalization_diagonal=None,
-                 n_levels=3, downscale=1.1, scaled_shape_models=True,
-                 max_shape_components=None, boundary=3):
-
-        # general deformable model checks
-        checks.check_n_levels(n_levels)
-        checks.check_downscale(downscale)
-        checks.check_normalization_diagonal(normalization_diagonal)
-        checks.check_boundary(boundary)
-        max_shape_components = checks.check_max_components(
-            max_shape_components, n_levels, 'max_shape_components')
-        features = checks.check_features(features, n_levels)
-
-        # CLM specific checks
-        classifier_trainers = check_classifier_trainers(classifier_trainers, n_levels)
-        patch_shape = check_patch_shape(patch_shape)
-
-        # store parameters
-        self.classifier_trainers = classifier_trainers
-        self.patch_shape = patch_shape
-        self.features = features
-        self.normalization_diagonal = normalization_diagonal
-        self.n_levels = n_levels
-        self.downscale = downscale
-        self.scaled_shape_models = scaled_shape_models
-        self.max_shape_components = max_shape_components
-        self.boundary = boundary
-
-    def build(self, images, group=None, label=None, verbose=False):
-        r"""
-        Builds a Multilevel Constrained Local Model from a list of
-        landmarked images.
-
-        Parameters
-        ----------
-        images : list of :map:`Image`
-            The set of landmarked images from which to build the AAM.
-        group : string, Optional
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-        label : `string`, optional
-            The label of of the landmark manager that you wish to use. If
-            ``None``, the convex hull of all landmarks is used.
-        verbose : `boolean`, optional
-            Flag that controls information and progress printing.
-
-        Returns
-        -------
-        clm : :map:`CLM`
-            The CLM object
-        """
-        # compute reference_shape and normalize images size
-        self.reference_shape, normalized_images = \
-            normalization_wrt_reference_shape(
-                images, group, label, self.normalization_diagonal,
-                verbose=verbose)
-
-        # create pyramid
-        generators = create_pyramid(normalized_images, self.n_levels,
-                                    self.downscale, self.features,
-                                    verbose=verbose)
-
-        # build the model at each pyramid level
-        if verbose:
-            if self.n_levels > 1:
-                print_dynamic('- Building model for each of the {} pyramid '
-                              'levels\n'.format(self.n_levels))
-            else:
-                print_dynamic('- Building model\n')
-
-        shape_models = []
-        classifiers = []
-        # for each pyramid level (high --> low)
-        for j in range(self.n_levels):
-            # since models are built from highest to lowest level, the
-            # parameters of type list need to use a reversed index
-            rj = self.n_levels - j - 1
-
-            if verbose:
-                level_str = '  - '
-                if self.n_levels > 1:
-                    level_str = '  - Level {}: '.format(j + 1)
-
-            # get images of current level
-            feature_images = []
-            for c, g in enumerate(generators):
-                if verbose:
-                    print_dynamic(
-                        '{}Computing feature space/rescaling - {}'.format(
-                            level_str,
-                            progress_bar_str((c + 1.) / len(generators),
-                                             show_bar=False)))
-                feature_images.append(next(g))
-
-            # extract potentially rescaled shapes
-            shapes = [i.landmarks[group][label] for i in feature_images]
-
-            # define shapes that will be used for training
-            if j == 0:
-                original_shapes = shapes
-                train_shapes = shapes
-            else:
-                if self.scaled_shape_models:
-                    train_shapes = shapes
-                else:
-                    train_shapes = original_shapes
-
-            # train shape model and find reference frame
-            if verbose:
-                print_dynamic('{}Building shape model'.format(level_str))
-            shape_model = build_shape_model(
-                train_shapes, self.max_shape_components[rj])
-
-            # add shape model to the list
-            shape_models.append(shape_model)
-
-            # build classifiers
-            sampling_grid = build_sampling_grid(self.patch_shape)
-            n_points = shapes[0].n_points
-            level_classifiers = []
-            for k in range(n_points):
-                if verbose:
-                    print_dynamic('{}Building classifiers - {}'.format(
-                        level_str,
-                        progress_bar_str((k + 1.) / n_points,
-                                         show_bar=False)))
-
-                positive_labels = []
-                negative_labels = []
-                positive_samples = []
-                negative_samples = []
-
-                for i, s in zip(feature_images, shapes):
-
-                    max_x = i.shape[0] - 1
-                    max_y = i.shape[1] - 1
-
-                    point = (np.round(s.points[k, :])).astype(int)
-                    patch_grid = sampling_grid + point[None, None, ...]
-                    positive, negative = get_pos_neg_grid_positions(
-                        patch_grid, positive_grid_size=(1, 1))
-
-                    x = positive[:, 0]
-                    y = positive[:, 1]
-                    x[x > max_x] = max_x
-                    y[y > max_y] = max_y
-                    x[x < 0] = 0
-                    y[y < 0] = 0
-
-                    positive_sample = i.pixels[:, x, y].T
-                    positive_samples.append(positive_sample)
-                    positive_labels.append(np.ones(positive_sample.shape[0]))
-
-                    x = negative[:, 0]
-                    y = negative[:, 1]
-                    x[x > max_x] = max_x
-                    y[y > max_y] = max_y
-                    x[x < 0] = 0
-                    y[y < 0] = 0
-
-                    negative_sample = i.pixels[:, x, y].T
-                    negative_samples.append(negative_sample)
-                    negative_labels.append(-np.ones(negative_sample.shape[0]))
-
-                positive_samples = np.asanyarray(positive_samples)
-                positive_samples = np.reshape(positive_samples,
-                                              (-1, positive_samples.shape[-1]))
-                positive_labels = np.asanyarray(positive_labels).flatten()
-
-                negative_samples = np.asanyarray(negative_samples)
-                negative_samples = np.reshape(negative_samples,
-                                              (-1, negative_samples.shape[-1]))
-                negative_labels = np.asanyarray(negative_labels).flatten()
-
-                X = np.vstack((positive_samples, negative_samples))
-                t = np.hstack((positive_labels, negative_labels))
-
-                clf = self.classifier_trainers[rj](X, t)
-                level_classifiers.append(clf)
-
-            # add level classifiers to the list
-            classifiers.append(level_classifiers)
-
-            if verbose:
-                print_dynamic('{}Done\n'.format(level_str))
-
-        # reverse the list of shape and appearance models so that they are
-        # ordered from lower to higher resolution
-        shape_models.reverse()
-        classifiers.reverse()
-        n_training_images = len(images)
-
-        from .base import CLM
-        return CLM(shape_models, classifiers, n_training_images,
-                   self.patch_shape, self.features, self.reference_shape,
-                   self.downscale, self.scaled_shape_models)
-
-
-def get_pos_neg_grid_positions(sampling_grid, positive_grid_size=(1, 1)):
-    r"""
-    Divides a sampling grid in positive and negative pixel positions. By
-    default only the centre of the grid is considered to be positive.
-    """
-    positive_grid_size = np.array(positive_grid_size)
-    mask = np.zeros(sampling_grid.shape[:-1], dtype=np.bool)
-    centre = np.round(np.array(mask.shape) / 2).astype(int)
-    positive_grid_size -= [1, 1]
-    start = centre - positive_grid_size
-    end = centre + positive_grid_size + 1
-    mask[start[0]:end[0], start[1]:end[1]] = True
-    positive = sampling_grid[mask]
-    negative = sampling_grid[~mask]
-    return positive, negative
-
-
-def check_classifier_trainers(classifier_trainers, n_levels):
-    r"""
-    Checks the classifier_trainers. Must be a ``callable`` ->
-    ``callable`` or
-    or a list containing 1 or {n_levels} callables each of which returns a
-    callable.
-    """
-    str_error = ("classifier must be a callable "
-                 "of a list containing 1 or {} callables").format(n_levels)
-    if not isinstance(classifier_trainers, list):
-        classifier_list = [classifier_trainers] * n_levels
-    elif len(classifier_trainers) == 1:
-        classifier_list = [classifier_trainers[0]] * n_levels
-    elif len(classifier_trainers) == n_levels:
-        classifier_list = classifier_trainers
-    else:
-        raise ValueError(str_error)
-    for classifier in classifier_list:
-        if not callable(classifier):
-            raise ValueError(str_error)
-    return classifier_list
-
-
-def check_patch_shape(patch_shape):
-    r"""
-    Checks the patch shape. It must be a tuple with `int` > ``1``.
-    """
-    str_error = "patch_size mast be a tuple with two integers"
-    if not isinstance(patch_shape, tuple) or len(patch_shape) != 2:
-        raise ValueError(str_error)
-    for sh in patch_shape:
-        if not isinstance(sh, int) or sh < 2:
-            raise ValueError(str_error)
-    return patch_shape
diff --git a/menpofit/clm/classifier.py b/menpofit/clm/classifier.py
deleted file mode 100644
index 377b4cd..0000000
--- a/menpofit/clm/classifier.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from sklearn import svm
-from sklearn import linear_model
-
-
-class linear_svm_lr(object):
-    r"""
-    Binary classifier that combines Linear Support Vector Machines and
-    Logistic Regression.
-    """
-    def __init__(self, X, t):
-        self.clf1 = svm.LinearSVC(class_weight='auto')
-        self.clf1.fit(X, t)
-        t1 = self.clf1.decision_function(X)
-        self.clf2 = linear_model.LogisticRegression(class_weight='auto')
-        self.clf2.fit(t1[..., None], t)
-
-    def __call__(self, x):
-        t1_pred = self.clf1.decision_function(x)
-        return self.clf2.predict_proba(t1_pred[..., None])[:, 1]
diff --git a/menpofit/clm/expert/__init__.py b/menpofit/clm/expert/__init__.py
new file mode 100644
index 0000000..c0e7ae7
--- /dev/null
+++ b/menpofit/clm/expert/__init__.py
@@ -0,0 +1,2 @@
+from .ensemble import ExpertEnsemble, CorrelationFilterExpertEnsemble
+from .base import IncrementalCorrelationFilterThinWrapper
diff --git a/menpofit/clm/expert/base.py b/menpofit/clm/expert/base.py
new file mode 100644
index 0000000..c2f0d38
--- /dev/null
+++ b/menpofit/clm/expert/base.py
@@ -0,0 +1,28 @@
+import numpy as np
+from menpofit.math.correlationfilter import mccf, imccf
+
+
+# TODO: document me!
+class IncrementalCorrelationFilterThinWrapper(object):
+    r"""
+    """
+    def __init__(self, cf_callable=mccf, icf_callable=imccf):
+        self.cf_callable = cf_callable
+        self.icf_callable = icf_callable
+
+    def increment(self, A, B, n_x, Z, t):
+        r"""
+        """
+        # Turn list of X into ndarray
+        if isinstance(Z, list):
+            Z = np.asarray(Z)
+        return self.icf_callable(A, B, n_x, Z, t)
+
+    def train(self, X, t):
+        r"""
+        """
+        # Turn list of X into ndarray
+        if isinstance(X, list):
+            X = np.asarray(X)
+        # Return linear svm filter and bias
+        return self.cf_callable(X, t)
diff --git a/menpofit/clm/expert/ensemble.py b/menpofit/clm/expert/ensemble.py
new file mode 100644
index 0000000..1843ee7
--- /dev/null
+++ b/menpofit/clm/expert/ensemble.py
@@ -0,0 +1,260 @@
+from __future__ import division
+from functools import partial
+import numpy as np
+from scipy.stats import multivariate_normal
+from menpo.shape import PointCloud
+from menpo.image import Image
+from menpofit.base import build_grid
+from menpofit.feature import normalize_norm, probability_map
+from menpofit.math.fft_utils import (
+    fft2, ifft2, fftshift, pad, crop, fft_convolve2d_sum)
+from menpofit.visualize import print_progress
+from .base import IncrementalCorrelationFilterThinWrapper
+
+
+# TODO: Document me!
+class ExpertEnsemble(object):
+    r"""
+    """
+
+
+# TODO: Document me!
+# TODO: Should convolutional experts of ensembles support patch features?
+class ConvolutionBasedExpertEnsemble(ExpertEnsemble):
+    r"""
+    """
+    @property
+    def n_experts(self):
+        r"""
+        """
+        return self.fft_padded_filters.shape[0]
+
+    @property
+    def n_sample_offsets(self):
+        r"""
+        """
+        if self.sample_offsets:
+            return self.sample_offsets.n_points
+        else:
+            return 1
+
+    @property
+    def padded_size(self):
+        r"""
+        """
+        pad_size = np.floor(1.5 * np.asarray(self.patch_size) - 1).astype(int)
+        return tuple(pad_size)
+
+    @property
+    def search_size(self):
+        r"""
+        """
+        return self.patch_size
+
+    def increment(self, images, shapes, prefix='', verbose=False):
+        r"""
+        """
+        self._train(images, shapes, prefix=prefix, verbose=verbose,
+                    increment=True)
+
+    @property
+    def spatial_filter_images(self):
+        r"""
+        """
+        filter_images = []
+        for fft_padded_filter in self.fft_padded_filters:
+            spatial_filter = np.real(ifft2(fft_padded_filter))
+            spatial_filter = crop(spatial_filter,
+                                  self.patch_size)[:, ::-1, ::-1]
+            filter_images.append(Image(spatial_filter))
+        return filter_images
+
+    @property
+    def frequency_filter_images(self):
+        r"""
+        """
+        filter_images = []
+        for fft_padded_filter in self.fft_padded_filters:
+            spatial_filter = np.real(ifft2(fft_padded_filter))
+            spatial_filter = crop(spatial_filter,
+                                  self.patch_size)[:, ::-1, ::-1]
+            frequency_filter = np.abs(fftshift(fft2(spatial_filter)))
+            filter_images.append(Image(frequency_filter))
+        return filter_images
+
+    def _extract_patch(self, image, landmark):
+        r"""
+        """
+        # Extract patch from image
+        patch = image.extract_patches(
+            landmark, patch_size=self.patch_size,
+            sample_offsets=self.sample_offsets, as_single_array=True)
+        # Reshape patch
+        # patch: (offsets x ch) x h x w
+        patch = patch.reshape((-1,) + patch.shape[-2:])
+        # Normalise patch
+        return self.patch_normalisation(patch)
+
+    def _extract_patches(self, image, shape):
+        r"""
+        """
+        # Obtain patch ensemble, the whole shape is used to extract patches
+        # from all landmarks at once
+        patches = image.extract_patches(shape, patch_size=self.patch_size,
+                                        sample_offsets=self.sample_offsets,
+                                        as_single_array=True)
+        # Reshape patches
+        # patches: n_patches x (n_offsets x n_channels) x height x width
+        patches = patches.reshape((patches.shape[0], -1) + patches.shape[-2:])
+        # Normalise patches
+        return self.patch_normalisation(patches)
+
+    def predict_response(self, image, shape):
+        r"""
+        """
+        # Extract patches
+        patches = self._extract_patches(image, shape)
+        # Predict responses
+        return fft_convolve2d_sum(patches, self.fft_padded_filters,
+                                  fft_filter=True, axis=1)
+
+    def predict_probability(self, image, shape):
+        r"""
+        """
+        # Predict responses
+        responses = self.predict_response(image, shape)
+        # Turn them into proper probability maps
+        return probability_map(responses)
+
+
+# TODO: Document me!
+class CorrelationFilterExpertEnsemble(ConvolutionBasedExpertEnsemble):
+    r"""
+    """
+    def __init__(self, images, shapes, verbose=False, prefix='',
+                 icf_cls=IncrementalCorrelationFilterThinWrapper,
+                 patch_size=(17, 17), context_size=(34, 34),
+                 response_covariance=3, patch_normalisation=normalize_norm,
+                 cosine_mask=True, sample_offsets=None):
+        # TODO: check parameters?
+        # Set parameters
+        self._icf = icf_cls()
+        self.patch_size = patch_size
+        self.context_size = context_size
+        self.response_covariance = response_covariance
+        self.patch_normalisation = patch_normalisation
+        self.cosine_mask = cosine_mask
+        self.sample_offsets = sample_offsets
+
+        # Generate cosine mask
+        self._cosine_mask = generate_cosine_mask(self.context_size)
+
+        # Generate desired response, i.e. a Gaussian response with the
+        # specified covariance centred at the middle of the patch
+        self.response = generate_gaussian_response(
+            self.patch_size, self.response_covariance)[None, ...]
+
+        # Train ensemble of correlation filter experts
+        self._train(images, shapes, verbose=verbose, prefix=prefix)
+
+    def _extract_patch(self, image, landmark):
+        r"""
+        """
+        # Extract patch from image
+        patch = image.extract_patches(
+            landmark, patch_size=self.context_size,
+            sample_offsets=self.sample_offsets, as_single_array=True)
+        # Reshape patch
+        # patch: (offsets x ch) x h x w
+        patch = patch.reshape((-1,) + patch.shape[-2:])
+        # Normalise patch
+        patch = self.patch_normalisation(patch)
+        if self.cosine_mask:
+            # Apply cosine mask if require
+            patch = self._cosine_mask * patch
+        return patch
+
+    def _train(self, images, shapes, prefix='', verbose=False,
+               increment=False):
+        r"""
+        """
+        # Define print_progress partial
+        wrap = partial(print_progress,
+                       prefix='{}Training experts'
+                              .format(prefix),
+                       end_with_newline=not prefix,
+                       verbose=verbose)
+
+        # If increment is False, we need to initialise/reset the ensemble of
+        # experts
+        if not increment:
+            self.fft_padded_filters = []
+            self.auto_correlations = []
+            self.cross_correlations = []
+            # Set number of images
+            self.n_images = len(images)
+        else:
+            # Update number of images
+            self.n_images += len(images)
+
+        # Obtain total number of experts
+        n_experts = shapes[0].n_points
+
+        # Train ensemble of correlation filter experts
+        fft_padded_filters = []
+        auto_correlations = []
+        cross_correlations = []
+        for i in wrap(range(n_experts)):
+            patches = []
+            for image, shape in zip(images, shapes):
+                # Select the appropriate landmark
+                landmark = PointCloud([shape.points[i]])
+                # Extract patch
+                patch = self._extract_patch(image, landmark)
+                # Add patch to the list
+                patches.append(patch)
+
+            if increment:
+                # Increment correlation filter
+                correlation_filter, auto_correlation, cross_correlation = (
+                    self._icf.increment(self.auto_correlations[i],
+                                        self.cross_correlations[i],
+                                        self.n_images,
+                                        patches,
+                                        self.response))
+            else:
+                # Train correlation filter
+                correlation_filter, auto_correlation, cross_correlation = (
+                    self._icf.train(patches, self.response))
+
+            # Pad filter with zeros
+            padded_filter = pad(correlation_filter, self.padded_size)
+            # Compute fft of padded filter
+            fft_padded_filter = fft2(padded_filter)
+            # Add fft padded filter to list
+            fft_padded_filters.append(fft_padded_filter)
+            auto_correlations.append(auto_correlation)
+            cross_correlations.append(cross_correlation)
+
+        # Turn list into ndarray
+        self.fft_padded_filters = np.asarray(fft_padded_filters)
+        self.auto_correlations = np.asarray(auto_correlations)
+        self.cross_correlations = np.asarray(cross_correlations)
+
+
+# TODO: Document me!
+def generate_gaussian_response(patch_size, response_covariance):
+    r"""
+    """
+    grid = build_grid(patch_size)
+    mvn = multivariate_normal(mean=np.zeros(2), cov=response_covariance)
+    return mvn.pdf(grid)
+
+
+# TODO: Document me!
+def generate_cosine_mask(patch_size):
+    r"""
+    """
+    cy = np.hanning(patch_size[0])
+    cx = np.hanning(patch_size[1])
+    return cy[..., None].dot(cx[None, ...])
diff --git a/menpofit/clm/fitter.py b/menpofit/clm/fitter.py
index 4ad4a56..26fe326 100644
--- a/menpofit/clm/fitter.py
+++ b/menpofit/clm/fitter.py
@@ -1,272 +1,49 @@
-from __future__ import division
-import numpy as np
-from menpo.image import Image
+from menpofit import checks
+from menpofit.fitter import ModelFitter
+from menpofit.modelinstance import OrthoPDM
+from .algorithm import (
+    GradientDescentCLMAlgorithm, RegularisedLandmarkMeanShift)
+from .result import CLMFitterResult
 
-from menpofit.transform import DifferentiableAlignmentSimilarity
-from menpofit.modelinstance import PDM, OrthoPDM
-from menpofit.fitter import MultilevelFitter
-from menpofit.gradientdescent import RLMS
 
-
-class CLMFitter(MultilevelFitter):
+# TODO: Document me!
+class CLMFitter(ModelFitter):
     r"""
-    Abstract Interface for defining Constrained Local Models Fitters.
-
-    Parameters
-    -----------
-    clm : :map:`CLM`
-        The Constrained Local Model to be used.
     """
-    def __init__(self, clm):
-        self.clm = clm
-
-    @property
-    def reference_shape(self):
-        r"""
-        The reference shape of the CLM.
-
-        :type: :map:`PointCloud`
-        """
-        return self.clm.reference_shape
-
-    @property
-    def features(self):
-        r"""
-        The feature extracted at each pyramidal level during CLM building.
-        Stored in ascending pyramidal order.
-
-        :type: `list`
-        """
-        return self.clm.features
-
     @property
-    def n_levels(self):
-        r"""
-        The number of pyramidal levels used during CLM building.
+    def clm(self):
+        return self._model
 
-        :type: `int`
-        """
-        return self.clm.n_levels
-
-    @property
-    def downscale(self):
-        r"""
-        The downscale used to generate the final scale factor applied at
-        each pyramidal level during CLM building.
-        The scale factor is computed as:
-
-            ``(downscale ** k) for k in range(n_levels)``
-
-        :type: `float`
-        """
-        return self.clm.downscale
+    def _fitter_result(self, image, algorithm_results, affine_correction,
+                       gt_shape=None):
+        return CLMFitterResult(image, self, algorithm_results,
+                               affine_correction, gt_shape=gt_shape)
 
 
+# TODO: Document me!
+# TODO: Rethink shape model and OrthoPDM relation
 class GradientDescentCLMFitter(CLMFitter):
     r"""
-    Gradient Descent based :map:`Fitter` for Constrained Local Models.
-
-    Parameters
-    -----------
-    clm : :map:`CLM`
-        The Constrained Local Model to be used.
-
-    algorithm : subclass :map:`GradientDescent`, optional
-        The :map:`GradientDescent` class to be used.
-
-    pdm_transform : :map:`GlobalPDM` or subclass, optional
-        The point distribution class to be used.
-
-        .. note::
-
-            Only :map:`GlobalPDM` and its subclasses are supported.
-            :map:`PDM` is not supported at the moment.
-
-    n_shape : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-        previous or ``None``, optional
-        The number of shape components or amount of shape variance to be
-        used per pyramidal level.
-
-        If `None`, all available shape components ``(n_active_components)``
-        will be used.
-        If `int` ``> 1``, the specified number of shape components will be
-        used.
-        If ``0. <=`` `float` ``<= 1.``, the number of shape components
-        capturing the specified variance ratio will be computed and used.
-
-        If `list` of length ``n_levels``, then the number of components is
-        defined per level. The first element of the list corresponds to the
-        lowest pyramidal level and so on.
-        If not a `list` or a `list` of length 1, then the specified number of
-        components will be used for all levels.
     """
-    def __init__(self, clm, algorithm=RLMS,
-                 pdm_transform=OrthoPDM, n_shape=None, **kwargs):
-        super(GradientDescentCLMFitter, self).__init__(clm)
-        self._set_up(algorithm=algorithm, pdm_transform=pdm_transform,
-                     n_shape=n_shape, **kwargs)
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns a string containing the name of fitting algorithm.
-
-        :type: `string`
-        """
-        return 'GD-CLM-' + self._fitters[0].algorithm
-
-    def _set_up(self, algorithm=RLMS,
-                pdm_transform=OrthoPDM,
-                global_transform=DifferentiableAlignmentSimilarity,
-                n_shape=None, **kwargs):
-        r"""
-        Sets up the Gradient Descent Fitter object.
-
-        Parameters
-        -----------
-        algorithm : :map:`GradientDescent`, optional
-            The Gradient Descent class to be used.
-
-        pdm_transform : :map:`GlobalPDM` or subclass, optional
-            The point distribution class to be used.
-
-        n_shape : `int` ``> 1``, ``0. <=`` `float` ``<= 1.``, `list` of the
-            previous or ``None``, optional
-            The number of shape components or amount of shape variance to be
-            used per fitting level.
-
-            If `None`, all available shape components ``(n_active_components)``
-            will be used.
-            If `int` ``> 1``, the specified number of shape components will be
-            used.
-            If ``0. <=`` `float` ``<= 1.``, the number of components capturing the
-            specified variance ratio will be computed and used.
-
-            If `list` of length ``n_levels``, then the number of components is
-            defined per level. The first element of the list corresponds to the
-            lowest pyramidal level and so on.
-            If not a `list` or a `list` of length 1, then the specified number of
-            components will be used for all levels.
-        """
-        # check n_shape parameter
-        if n_shape is not None:
-            if type(n_shape) is int or type(n_shape) is float:
-                for sm in self.clm.shape_models:
-                    sm.n_active_components = n_shape
-            elif len(n_shape) == 1 and self.clm.n_levels > 1:
-                for sm in self.clm.shape_models:
-                    sm.n_active_components = n_shape[0]
-            elif len(n_shape) == self.clm.n_levels:
-                for sm, n in zip(self.clm.shape_models, n_shape):
-                    sm.n_active_components = n
-            else:
-                raise ValueError('n_shape can be an integer or a float or None'
-                                 'or a list containing 1 or {} of '
-                                 'those'.format(self.clm.n_levels))
-
-        self._fitters = []
-        for j, (sm, clf) in enumerate(zip(self.clm.shape_models,
-                                          self.clm.classifiers)):
-
-            if pdm_transform is not PDM:
-                pdm_trans = pdm_transform(sm, global_transform)
-            else:
-                pdm_trans = pdm_transform(sm)
-            self._fitters.append(algorithm(clf, self.clm.patch_shape,
-                                           pdm_trans, **kwargs))
-
-    def __str__(self):
-        from menpofit.base import name_of_callable
-        out = "{0} Fitter\n" \
-              " - Gradient-Descent {1}\n" \
-              " - Transform is {2}.\n" \
-              " - {3} training images.\n".format(
-            self.clm._str_title, self._fitters[0].algorithm,
-            self._fitters[0].transform.__class__.__name__,
-            self.clm.n_training_images)
-        # small strings about number of channels, channels string and downscale
-        down_str = []
-        for j in range(self.n_levels):
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        temp_img = Image(image_data=np.random.rand(50, 50))
-        if self.pyramid_on_features:
-            temp = self.features(temp_img)
-            n_channels = [temp.n_channels] * self.n_levels
-        else:
-            n_channels = []
-            for j in range(self.n_levels):
-                temp = self.features[j](temp_img)
-                n_channels.append(temp.n_channels)
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(
-                name_of_callable(self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
-        else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                if isinstance(self.features[j], str):
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j]))
-                elif self.features[j] is None:
-                    feat_str.append("- No features extracted. ")
-                else:
-                    feat_str.append("- Feature is {} with ".format(
-                        name_of_callable(self.features[j])))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
-                else:
-                    ch_str.append("channels")
-        if self.n_levels > 1:
-            if self.clm.scaled_shape_models:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}.\n   - Each level has a scaled shape " \
-                      "model (reference frame).\n   - Patch size is {}W x " \
-                      "{}H.\n".format(out, self.n_levels, self.downscale,
-                                      self.clm.patch_shape[1],
-                                      self.clm.patch_shape[0])
-
-            else:
-                out = "{} - Gaussian pyramid with {} levels and downscale " \
-                      "factor of {}:\n   - Shape models (reference frames) " \
-                      "are not scaled.\n   - Patch size is {}W x " \
-                      "{}H.\n".format(out, self.n_levels, self.downscale,
-                                      self.clm.patch_shape[1],
-                                      self.clm.patch_shape[0])
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
-            else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-            for i in range(self.n_levels - 1, -1, -1):
-                out = "{}   - Level {} {}: \n".format(out, self.n_levels - i,
-                                                      down_str[i])
-                if not self.pyramid_on_features:
-                    out = "{}     {}{} {} per image.\n".format(
-                        out, feat_str[i], n_channels[i], ch_str[i])
-                out = "{0}     - {1} motion components\n     - {2} {3} " \
-                      "classifiers.\n".format(
-                    out, self._fitters[i].transform.n_parameters,
-                    len(self._fitters[i].classifiers),
-                    name_of_callable(self._fitters[i].classifiers[0]))
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n" \
-                  "   - {4} motion components\n   - {5} {6} " \
-                  "classifiers.".format(
-                out, feat_str[0], n_channels[0], ch_str[0],
-                out, self._fitters[0].transform.n_parameters,
-                len(self._fitters[0].classifiers),
-                name_of_callable(self._fitters[0].classifiers[0]))
-        return out
+    def __init__(self, clm, gd_algorithm_cls=RegularisedLandmarkMeanShift,
+                 n_shape=None):
+        self._model = clm
+        self._gd_algorithms_cls = checks.check_algorithm_cls(
+            gd_algorithm_cls, self.n_scales, GradientDescentCLMAlgorithm)
+        self._check_n_shape(n_shape)
+
+        self.algorithms = []
+        for i in range(self.clm.n_scales):
+            pdm = OrthoPDM(self.clm.shape_models[i])
+            algorithm = self._gd_algorithms_cls[i](
+                self.clm.expert_ensembles[i], pdm)
+            self.algorithms.append(algorithm)
+
+
+# TODO: Implement me!
+# TODO: Document me!
+class SupervisedDescentCLMFitter(CLMFitter):
+    r"""
+    """
+    def __init__(self):
+        raise NotImplementedError
diff --git a/menpofit/clm/result.py b/menpofit/clm/result.py
new file mode 100644
index 0000000..09e532b
--- /dev/null
+++ b/menpofit/clm/result.py
@@ -0,0 +1,14 @@
+from menpofit.result import ParametricAlgorithmResult, MultiFitterResult
+
+
+# TODO: document me!
+class CLMAlgorithmResult(ParametricAlgorithmResult):
+    r"""
+    """
+
+
+# TODO: document me!
+class CLMFitterResult(MultiFitterResult):
+    r"""
+    """
+
diff --git a/menpofit/feature/__init__.py b/menpofit/feature/__init__.py
new file mode 100644
index 0000000..2d00a6a
--- /dev/null
+++ b/menpofit/feature/__init__.py
@@ -0,0 +1,2 @@
+from .features import (
+    centralize, normalize_norm, normalize_std, normalize_var, probability_map)
diff --git a/menpofit/feature/features.py b/menpofit/feature/features.py
new file mode 100644
index 0000000..8344b35
--- /dev/null
+++ b/menpofit/feature/features.py
@@ -0,0 +1,74 @@
+from __future__ import division
+import numpy as np
+import warnings
+from menpo.feature import ndfeature
+
+
+# TODO: Document me!
+@ndfeature
+def centralize(x, axes=(-2, -1)):
+    r"""
+    """
+    mean = np.mean(x, axis=axes, keepdims=True)
+    return x - mean
+
+
+# TODO: Document me!
+@ndfeature
+def normalize_norm(x, axes=(-2, -1)):
+    r"""
+    """
+    x = centralize(x, axes=axes)
+    norm = np.asarray(np.linalg.norm(x, axis=axes))
+    positions = np.asarray(axes) + len(x.shape)
+    for axis in positions:
+        norm = np.expand_dims(norm, axis=axis)
+    return handle_div_by_zero(x, norm)
+
+
+# TODO: document me!
+@ndfeature
+def normalize_std(x, axes=(-2, -1)):
+    r"""
+    """
+    x = centralize(x, axes=axes)
+    std = np.std(x, axis=axes, keepdims=True)
+    return handle_div_by_zero(x, std)
+
+
+# TODO: document me!
+@ndfeature
+def normalize_var(x, axes=(-2, -1)):
+    r"""
+    """
+    x = centralize(x, axes=axes)
+    var = np.var(x, axis=axes, keepdims=True)
+    return handle_div_by_zero(x, var)
+
+
+# TODO: document me!
+@ndfeature
+def probability_map(x, axes=(-2, -1)):
+    r"""
+    """
+    x = x - np.min(x, axis=axes, keepdims=True)
+    total = np.sum(x, axis=axes, keepdims=True)
+    nonzero = total > 0
+    if np.any(~nonzero):
+       warnings.warn("some of x axes have 0 variance - uniform probability "
+                     "maps are used them.")
+    x[nonzero] /= total[nonzero]
+    x[~nonzero] = 1 / np.prod(axes)
+    return x
+
+
+# TODO: document me!
+def handle_div_by_zero(x, normalizer):
+    r"""
+    """
+    nonzero = normalizer > 0
+    if np.any(~nonzero):
+        warnings.warn("some of x axes have 0 variance - they cannot be "
+                      "normalized.")
+    x[nonzero] /= normalizer[nonzero]
+    return x
diff --git a/menpofit/fitter.py b/menpofit/fitter.py
index 3d93d55..899488f 100644
--- a/menpofit/fitter.py
+++ b/menpofit/fitter.py
@@ -1,115 +1,28 @@
 from __future__ import division
-import abc
-from menpo.transform import AlignmentAffine, Scale, AlignmentSimilarity
 import numpy as np
+from copy import deepcopy
 from menpo.shape import PointCloud
-from menpofit.base import is_pyramid_on_features, pyramid_of_feature_images, \
-    noisy_align
-from menpofit.fittingresult import MultilevelFittingResult
+from menpo.transform import (
+    scale_about_centre, rotate_ccw_about_centre, Translation,
+    Scale, Similarity, AlignmentAffine, AlignmentSimilarity)
+import menpofit.checks as checks
 
 
-class Fitter(object):
+# TODO: document me!
+class MultiFitter(object):
     r"""
-    Abstract interface that all :map:`Fitter` objects must implement.
     """
-    __metaclass__ = abc.ABCMeta
-
-    @abc.abstractmethod
-    def _set_up(self, **kwargs):
-        r"""
-        Abstract method that sets up the fitter object.
-        """
-        pass
-
-    def fit(self, image, initial_parameters, gt_shape=None, **kwargs):
-        r"""
-        Fits the fitter to an image.
-
-        Parameters
-        -----------
-        image: :map:`Image` or subclass
-            The image to be fitted.
-        initial_parameters: list
-            The initial parameters of the model.
-        gt_shape: :map:`PointCloud`
-            The ground truth shape associated to the image.
-
-        Returns
-        -------
-        fitting_result: :map:`FittingResult`
-            The fitting result containing the result of fitting procedure.
-        """
-        fitting_result = self._create_fitting_result(
-            image, initial_parameters, gt_shape=gt_shape)
-        return self._fit(fitting_result, **kwargs)
-
-    @abc.abstractmethod
-    def _create_fitting_result(self, **kwargs):
-        r"""
-        Abstract method that defines the fitting result object associated to
-        the fitter object.
-        """
-        pass
-
-    @abc.abstractmethod
-    def _fit(self, **kwargs):
-        r"""
-        Abstract method implements a particular alignment algorithm.
-        """
-        pass
-
-    def get_parameters(self, shape):
-        r"""
-        Abstract method that gets the parameters.
-        """
-        pass
-
-
-class MultilevelFitter(Fitter):
-    r"""
-    Abstract interface that all :map:`MultilevelFitter` must implement.
-    """
-
-    @abc.abstractproperty
-    def reference_shape(self):
-        r"""
-        The reference shape of the multilevel fitter.
-        """
-        pass
-
-    @abc.abstractproperty
-    def features(self):
-        r"""
-        Returns the feature computation functions applied at each pyramidal
-        level.
-        """
-        pass
-
-    @abc.abstractproperty
-    def n_levels(self):
-        r"""
-        The number of pyramidal levels.
-        """
-        pass
-
-    @abc.abstractproperty
-    def downscale(self):
-        r"""
-        The downscale factor used by the multiple fitter.
-        """
-        pass
-
     @property
-    def pyramid_on_features(self):
+    def n_scales(self):
         r"""
-        Returns True if the pyramid is computed on the feature image and False
-        if it is computed on the original (intensities) image and features are
-        extracted at each level.
+        The number of scales used during alignment.
+
+        :type: `int`
         """
-        return is_pyramid_on_features(self.features)
+        return len(self.scales)
 
-    def fit(self, image, initial_shape, max_iters=50, gt_shape=None,
-            crop_image=0.5, **kwargs):
+    def fit_from_shape(self, image, initial_shape, max_iters=20, gt_shape=None,
+                       crop_image=None, **kwargs):
         r"""
         Fits the multilevel fitter to an image.
 
@@ -117,20 +30,16 @@ def fit(self, image, initial_shape, max_iters=50, gt_shape=None,
         -----------
         image: :map:`Image` or subclass
             The image to be fitted.
-
         initial_shape: :map:`PointCloud`
             The initial shape estimate from which the fitting procedure
             will start.
-
         max_iters: `int` or `list` of `int`, optional
             The maximum number of iterations.
             If `int`, specifies the overall maximum number of iterations.
             If `list` of `int`, specifies the maximum number of iterations per
             level.
-
         gt_shape: :map:`PointCloud`
             The ground truth shape associated to the image.
-
         crop_image: `None` or float`, optional
             If `float`, it specifies the proportion of the border wrt the
             initial shape to which the image will be internally cropped around
@@ -140,7 +49,6 @@ def fit(self, image, initial_shape, max_iters=50, gt_shape=None,
             This will limit the fitting algorithm search region but is
             likely to speed up its running time, specially when the
             modeled object occupies a small portion of the image.
-
         **kwargs:
             Additional keyword arguments that can be passed to specific
             implementations of ``_fit`` method.
@@ -155,74 +63,28 @@ def fit(self, image, initial_shape, max_iters=50, gt_shape=None,
         images, initial_shapes, gt_shapes = self._prepare_image(
             image, initial_shape, gt_shape=gt_shape, crop_image=crop_image)
 
-        # detach added landmarks from image
-        del image.landmarks['initial_shape']
-        if gt_shape:
-            del image.landmarks['gt_shape']
-
         # work out the affine transform between the initial shape of the
         # highest pyramidal level and the initial shape of the original image
         affine_correction = AlignmentAffine(initial_shapes[-1], initial_shape)
 
         # run multilevel fitting
-        fitting_results = self._fit(images, initial_shapes[0],
-                                    max_iters=max_iters,
-                                    gt_shapes=gt_shapes, **kwargs)
+        algorithm_results = self._fit(images, initial_shapes[0],
+                                      max_iters=max_iters,
+                                      gt_shapes=gt_shapes, **kwargs)
 
         # build multilevel fitting result
-        multi_fitting_result = self._create_fitting_result(
-            image, fitting_results, affine_correction, gt_shape=gt_shape)
-
-        return multi_fitting_result
+        fitter_result = self._fitter_result(
+            image, algorithm_results, affine_correction, gt_shape=gt_shape)
 
-    def perturb_shape(self, gt_shape, noise_std=0.04, rotation=False):
-        r"""
-        Generates an initial shape by adding gaussian noise to the perfect
-        similarity alignment between the ground truth and reference_shape.
-
-        Parameters
-        -----------
-        gt_shape: :class:`menpo.shape.PointCloud`
-            The ground truth shape.
-        noise_std: float, optional
-            The standard deviation of the gaussian noise used to produce the
-            initial shape.
+        return fitter_result
 
-            Default: 0.04
-        rotation: boolean, optional
-            Specifies whether ground truth in-plane rotation is to be used
-            to produce the initial shape.
-
-            Default: False
-
-        Returns
-        -------
-        initial_shape: :class:`menpo.shape.PointCloud`
-            The initial shape.
-        """
-        reference_shape = self.reference_shape
-        return noisy_align(reference_shape, gt_shape, noise_std=noise_std,
-                           rotation=rotation).apply(reference_shape)
-
-    def obtain_shape_from_bb(self, bounding_box):
-        r"""
-        Generates an initial shape given a bounding box detection.
-
-        Parameters
-        -----------
-        bounding_box: (2, 2) ndarray
-            The bounding box specified as:
-
-                np.array([[x_min, y_min], [x_max, y_max]])
-
-        Returns
-        -------
-        initial_shape: :class:`menpo.shape.PointCloud`
-            The initial shape.
-        """
-        reference_shape = self.reference_shape
-        return align_shape_with_bb(reference_shape,
-                                   bounding_box).apply(reference_shape)
+    def fit_from_bb(self, image, bounding_box, max_iters=20, gt_shape=None,
+                    crop_image=None, **kwargs):
+        initial_shape = align_shape_with_bounding_box(self.reference_shape,
+                                                      bounding_box)
+        return self.fit_from_shape(image, initial_shape, max_iters=max_iters,
+                                   gt_shape=gt_shape, crop_image=crop_image,
+                                   **kwargs)
 
     def _prepare_image(self, image, initial_shape, gt_shape=None,
                        crop_image=0.5):
@@ -239,100 +101,79 @@ def _prepare_image(self, image, initial_shape, gt_shape=None,
         ----------
         image : :map:`Image` or subclass
             The image to be fitted.
-
         initial_shape : :map:`PointCloud`
             The initial shape from which the fitting will start.
-
         gt_shape : class : :map:`PointCloud`, optional
             The original ground truth shape associated to the image.
-
         crop_image: `None` or float`, optional
             If `float`, it specifies the proportion of the border wrt the
             initial shape to which the image will be internally cropped around
             the initial shape range.
             If `None`, no cropping is performed.
-
             This will limit the fitting algorithm search region but is
             likely to speed up its running time, specially when the
             modeled object occupies a small portion of the image.
-
         Returns
         -------
         images : `list` of :map:`Image` or subclass
             The list of images that will be fitted by the fitters.
-
         initial_shapes : `list` of :map:`PointCloud`
             The initial shape for each one of the previous images.
-
         gt_shapes : `list` of :map:`PointCloud`
             The ground truth shape for each one of the previous images.
         """
-        # attach landmarks to the image
-        image.landmarks['initial_shape'] = initial_shape
+        # Attach landmarks to the image
+        image.landmarks['__initial_shape'] = initial_shape
         if gt_shape:
-            image.landmarks['gt_shape'] = gt_shape
+            image.landmarks['__gt_shape'] = gt_shape
 
-        # if specified, crop the image
         if crop_image:
-            image = image.copy()
-            image.crop_to_landmarks_proportion_inplace(crop_image,
-                                                       group='initial_shape')
+            # If specified, crop the image
+            image = image.crop_to_landmarks_proportion(crop_image,
+                                                       group='__initial_shape')
 
-        # rescale image wrt the scale factor between reference_shape and
+        # Rescale image wrt the scale factor between reference_shape and
         # initial_shape
-        image = image.rescale_to_reference_shape(self.reference_shape,
-                                                 group='initial_shape')
-
-        images = list(reversed(list(pyramid_of_feature_images(
-            self.n_levels, self.downscale, self.features, image))))
-
-        # get initial shapes per level
-        initial_shapes = [i.landmarks['initial_shape'].lms for i in images]
-
-        # get ground truth shapes per level
+        image = image.rescale_to_pointcloud(self.reference_shape,
+                                            group='__initial_shape')
+
+        # Compute image representation
+        images = []
+        for i in range(self.n_scales):
+            # Handle features
+            if i == 0 or self.holistic_features[i] is not self.holistic_features[i - 1]:
+                # Compute features only if this is the first pass through
+                # the loop or the features at this scale are different from
+                # the features at the previous scale
+                feature_image = self.holistic_features[i](image)
+
+            # Handle scales
+            if self.scales[i] != 1:
+                # Scale feature images only if scale is different than 1
+                scaled_image = feature_image.rescale(self.scales[i])
+            else:
+                scaled_image = feature_image
+
+            # Add scaled image to list
+            images.append(scaled_image)
+
+        # Get initial shapes per level
+        initial_shapes = [i.landmarks['__initial_shape'].lms for i in images]
+
+        # Get ground truth shapes per level
         if gt_shape:
-            gt_shapes = [i.landmarks['gt_shape'].lms for i in images]
-            del image.landmarks['gt_shape']
+            gt_shapes = [i.landmarks['__gt_shape'].lms for i in images]
         else:
             gt_shapes = None
 
-        return images, initial_shapes, gt_shapes
-
-    def _create_fitting_result(self, image, fitting_results, affine_correction,
-                               gt_shape=None):
-        r"""
-        Creates the :class: `menpo.aam.fitting.MultipleFitting` object
-        associated with a particular Fitter object.
-
-        Parameters
-        -----------
-        image: :class:`menpo.image.masked.MaskedImage`
-            The original image to be fitted.
-        fitting_results: :class:`menpo.fit.fittingresult.FittingResultList`
-            A list of basic fitting objects containing the state of the
-            different fitting levels.
-        affine_correction: :class: `menpo.transforms.affine.Affine`
-            An affine transform that maps the result of the top resolution
-            fitting level to the space scale of the original image.
-        gt_shape: class:`menpo.shape.PointCloud`, optional
-            The ground truth shape associated to the image.
-
-            Default: None
-        error_type: 'me_norm', 'me' or 'rmse', optional
-            Specifies the way in which the error between the fitted and
-            ground truth shapes is to be computed.
-
-            Default: 'me_norm'
+        # detach added landmarks from image
+        del image.landmarks['__initial_shape']
+        if gt_shape:
+            del image.landmarks['__gt_shape']
 
-        Returns
-        -------
-        fitting: :class:`menpo.fitmultilevel.fittingresult.MultilevelFittingResult`
-            The fitting object that will hold the state of the fitter.
-        """
-        return MultilevelFittingResult(image, self, fitting_results,
-                                       affine_correction, gt_shape=gt_shape)
+        return images, initial_shapes, gt_shapes
 
-    def _fit(self, images, initial_shape, gt_shapes=None, max_iters=50,
+    def _fit(self, images, initial_shape, gt_shapes=None, max_iters=20,
              **kwargs):
         r"""
         Fits the fitter to the multilevel pyramidal images.
@@ -346,8 +187,6 @@ def _fit(self, images, initial_shape, gt_shapes=None, max_iters=50,
         gt_shapes: :class:`menpo.shape.PointCloud` list, optional
             The original ground truth shapes associated to the multilevel
             images.
-
-            Default: None
         max_iters: int or list, optional
             The maximum number of iterations.
             If int, then this will be the overall maximum number of iterations
@@ -355,66 +194,208 @@ def _fit(self, images, initial_shape, gt_shapes=None, max_iters=50,
             If list, then a maximum number of iterations is specified for each
             pyramidal level.
 
-            Default: 50
-
         Returns
         -------
-        fitting_results: :class:`menpo.fit.fittingresult.FittingResult` list
+        algorithm_results: :class:`FittingResult` list
             The fitting object containing the state of the whole fitting
             procedure.
         """
+        # Perform check
+        max_iters = checks.check_max_iters(max_iters, self.n_scales)
+
+        # Set initial and ground truth shapes
         shape = initial_shape
         gt_shape = None
-        n_levels = self.n_levels
-
-        # check max_iters parameter
-        if type(max_iters) is int:
-            max_iters = [np.round(max_iters/n_levels)
-                         for _ in range(n_levels)]
-        elif len(max_iters) == 1 and n_levels > 1:
-            max_iters = [np.round(max_iters[0]/n_levels)
-                         for _ in range(n_levels)]
-        elif len(max_iters) != n_levels:
-            raise ValueError('max_iters can be integer, integer list '
-                             'containing 1 or {} elements or '
-                             'None'.format(self.n_levels))
-
-        # fit images
-        fitting_results = []
-        for j, (i, f, it) in enumerate(zip(images, self._fitters, max_iters)):
+
+        # Initialize list of algorithm results
+        algorithm_results = []
+        for i in range(self.n_scales):
+            # Handle ground truth shape
             if gt_shapes is not None:
-                gt_shape = gt_shapes[j]
+                gt_shape = gt_shapes[i]
 
-            parameters = f.get_parameters(shape)
-            fitting_result = f.fit(i, parameters, gt_shape=gt_shape,
-                                   max_iters=it, **kwargs)
-            fitting_results.append(fitting_result)
+            # Run algorithm
+            algorithm_result = self.algorithms[i].run(images[i], shape,
+                                                      gt_shape=gt_shape,
+                                                      max_iters=max_iters[i],
+                                                      **kwargs)
+            # Add algorithm result to the list
+            algorithm_results.append(algorithm_result)
 
-            shape = fitting_result.final_shape
-            Scale(self.downscale, n_dims=shape.n_dims).apply_inplace(shape)
+            # Prepare this scale's final shape for the next scale
+            shape = algorithm_result.final_shape
+            if self.scales[i] != self.scales[-1]:
+                shape = Scale(self.scales[i + 1] / self.scales[i],
+                              n_dims=shape.n_dims).apply(shape)
 
-        return fitting_results
+        # Return list of algorithm results
+        return algorithm_results
 
 
-def align_shape_with_bb(shape, bounding_box):
+# TODO: document me!
+class ModelFitter(MultiFitter):
     r"""
-    Returns the Similarity transform that aligns the provided shape with the
-    provided bounding box.
+    """
+    @property
+    def reference_shape(self):
+        r"""
+        The reference shape of the AAM.
+
+        :type: :map:`PointCloud`
+        """
+        return self._model.reference_shape
+
+    @property
+    def holistic_features(self):
+        r"""
+        """
+        return self._model.holistic_features
+
+    @property
+    def scales(self):
+        return self._model.scales
+
+    def _check_n_shape(self, n_shape):
+        checks.set_models_components(self._model.shape_models, n_shape)
+
+    def noisy_shape_from_bounding_box(self, bounding_box, noise_type='uniform',
+                                      noise_percentage=0.1, rotation=False):
+        return noisy_shape_from_bounding_box(
+            self.reference_shape, bounding_box, noise_type=noise_type,
+            noise_percentage=noise_percentage, rotation=rotation)
+
+    def noisy_shape_from_shape(self, shape, noise_type='uniform',
+                               noise_percentage=0.1, rotation=False):
+        return noisy_shape_from_shape(
+            self.reference_shape, shape, noise_type=noise_type,
+            noise_percentage=noise_percentage, rotation=rotation)
+
+
+def noisy_alignment_similarity_transform(source, target, noise_type='uniform',
+                                         noise_percentage=0.1, rotation=False):
+    r"""
+    Constructs and perturbs the optimal similarity transform between source
+    and target by adding noise to its parameters.
 
     Parameters
     ----------
-    shape: :class:`menpo.shape.PointCloud`
-        The shape to be aligned.
-    bounding_box: (2, 2) ndarray
-        The bounding box specified as:
+    source: :class:`menpo.shape.PointCloud`
+        The source pointcloud instance used in the alignment
+    target: :class:`menpo.shape.PointCloud`
+        The target pointcloud instance used in the alignment
+    noise_type: str, optional
+        The type of noise to be added, 'uniform' or 'gaussian'.
+    noise_percentage: 0 < float < 1 or triplet of 0 < float < 1, optional
+        The standard percentage of noise to be added. If float the same amount
+        of noise is applied to the scale, rotation and translation
+        parameters of the true similarity transform. If triplet of
+        floats, the first, second and third elements denote the amount of
+        noise to be applied to the scale, rotation and translation
+        parameters respectively.
+    rotation: boolean, optional
+        If False rotation is not considered when computing the optimal
+        similarity transform between source and target.
+
+    Returns
+    -------
+    noisy_alignment_similarity_transform : :class: `menpo.transform.Similarity`
+        The noisy Similarity Transform between source and target.
+    """
+    if isinstance(noise_percentage, float):
+        noise_percentage = [noise_percentage] * 3
+    elif len(noise_percentage) == 1:
+        noise_percentage *= 3
+
+    similarity = AlignmentSimilarity(source, target, rotation=rotation)
+
+    if noise_type is 'gaussian':
+        s = noise_percentage[0] * (0.5 / 3) * np.asscalar(np.random.randn(1))
+        r = noise_percentage[1] * (180 / 3) * np.asscalar(np.random.randn(1))
+        t = noise_percentage[2] * (target.range() / 3) * np.random.randn(2)
+
+        s = scale_about_centre(target, 1 + s)
+        r = rotate_ccw_about_centre(target, r)
+        t = Translation(t, source.n_dims)
+
+    elif noise_type is 'uniform':
+        s = noise_percentage[0] * 0.5 * (2 * np.asscalar(np.random.randn(1)) - 1)
+        r = noise_percentage[1] * 180 * (2 * np.asscalar(np.random.rand(1)) - 1)
+        t = noise_percentage[2] * target.range() * (2 * np.random.rand(2) - 1)
+
+        s = scale_about_centre(target, 1. + s)
+        r = rotate_ccw_about_centre(target, r)
+        t = Translation(t, source.n_dims)
+
+    return similarity.compose_after(t.compose_after(s.compose_after(r)))
+
 
-            np.array([[x_min, y_min], [x_max, y_max]])
+def noisy_target_alignment_transform(source, target,
+                                     alignment_transform_cls=AlignmentAffine,
+                                     noise_std=0.1, **kwargs):
+    r"""
+    Constructs and the optimal alignment transform between the source and
+    a noisy version of the target obtained by adding white noise to each of
+    its points.
+
+    Parameters
+    ----------
+    source: :class:`menpo.shape.PointCloud`
+        The source pointcloud instance used in the alignment
+    target: :class:`menpo.shape.PointCloud`
+        The target pointcloud instance used in the alignment
+    alignment_transform_cls: :class:`menpo.transform.Alignment`, optional
+        The alignment transform class used to perform the alignment.
+    noise_std: float or triplet of floats, optional
+        The standard deviation of the white noise to be added to each one of
+        the target points.
+
+    Returns
+    -------
+    noisy_transform : :class: `menpo.transform.Alignment`
+        The noisy Similarity Transform
+    """
+    noise = noise_std * target.range() * np.random.randn(target.n_points,
+                                                         target.n_dims)
+    noisy_target = PointCloud(target.points + noise)
+    return alignment_transform_cls(source, noisy_target, **kwargs)
+
+
+def noisy_shape_from_bounding_box(shape, bounding_box, noise_type='uniform',
+                                  noise_percentage=0.1, rotation=False):
+    transform = noisy_alignment_similarity_transform(
+        shape.bounding_box(), bounding_box, noise_type=noise_type,
+        noise_percentage=noise_percentage, rotation=rotation)
+    return transform.apply(shape)
+
+
+def noisy_shape_from_shape(reference_shape, shape, noise_type='uniform',
+                           noise_percentage=0.1, rotation=False):
+    transform = noisy_alignment_similarity_transform(
+        reference_shape, shape, noise_type=noise_type,
+        noise_percentage=noise_percentage, rotation=rotation)
+    return transform.apply(reference_shape)
+
+
+def align_shape_with_bounding_box(shape, bounding_box,
+                                  alignment_transform_cls=AlignmentSimilarity,
+                                  **kwargs):
+    r"""
+    Aligns the shape with the bounding box using a particular ali .
+
+    Parameters
+    ----------
+    source: :class:`menpo.shape.PointCloud`
+        The shape instance used in the alignment.
+    bounding_box: :class:`menpo.shape.PointCloud`
+        The bounding box instance used in the alignment.
+    alignment_transform_cls: :class:`menpo.transform.Alignment`, optional
+        The class of the alignment transform used to perform the alignment.
 
     Returns
     -------
-    transform : :class: `menpo.transform.Similarity`
-        The align transform
+    noisy_transform : :class: `menpo.transform.Alignment`
+        The noisy Alignment Transform
     """
-    shape_box = PointCloud(shape.bounds())
-    bounding_box = PointCloud(bounding_box)
-    return AlignmentSimilarity(shape_box, bounding_box, rotation=False)
\ No newline at end of file
+    shape_bb = shape.bounding_box()
+    transform = alignment_transform_cls(shape_bb, bounding_box, **kwargs)
+    return transform.apply(shape)
diff --git a/menpofit/gradientdescent/__init__.py b/menpofit/gradientdescent/__init__.py
deleted file mode 100755
index 8d1122e..0000000
--- a/menpofit/gradientdescent/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .base import RLMS
diff --git a/menpofit/gradientdescent/base.py b/menpofit/gradientdescent/base.py
deleted file mode 100644
index c9546ee..0000000
--- a/menpofit/gradientdescent/base.py
+++ /dev/null
@@ -1,201 +0,0 @@
-from __future__ import division
-import numpy as np
-from menpofit.base import build_sampling_grid
-
-multivariate_normal = None  # expensive, from scipy.stats
-
-from menpofit.fitter import Fitter
-from menpofit.fittingresult import SemiParametricFittingResult
-
-
-# TODO: incorporate different residuals
-# TODO: generalize transform prior, and map the changes to LK methods
-class GradientDescent(Fitter):
-    r"""
-    Abstract Interface for defining Gradient Descent based fitting algorithms
-    for Constrained Local Models [1]_.
-
-    Parameters
-    ----------
-    classifiers : `list` of ``classifier_callable``
-        The list containing the classifier that will produce the response
-        maps for each landmark point.
-    patch_shape : `tuple` of `int`
-        The shape of the patches used to train the classifiers.
-    transform : :map:`GlobalPDM` or subclass
-        The global point distribution model to be used.
-
-        .. note::
-
-            Only :map:`GlobalPDM` and its subclasses are supported.
-            :map:`PDM` is not supported at the moment.
-    eps : `float`, optional
-        The convergence value. When calculating the level of convergence, if
-        the norm of the delta parameter updates is less than ``eps``, the
-        algorithm is considered to have converged.
-
-    References
-    ----------
-    .. [1] J. Saragih, S. Lucey and J. Cohn, ''Deformable Model Fitting by
-    Regularized Landmark Mean-Shifts", International Journal of Computer
-    Vision (IJCV), 2010.
-    """
-    def __init__(self, classifiers, patch_shape, pdm, eps=10**-10):
-        self.classifiers = classifiers
-        self.patch_shape = patch_shape
-        self.transform = pdm
-        self.eps = eps
-        # pre-computations
-        self._set_up()
-
-    def _create_fitting_result(self, image, parameters, gt_shape=None):
-        return SemiParametricFittingResult(
-            image, self, parameters=[parameters], gt_shape=gt_shape)
-
-    def fit(self, image, initial_parameters, gt_shape=None, **kwargs):
-        self.transform.from_vector_inplace(initial_parameters)
-        return Fitter.fit(self, image, initial_parameters, gt_shape=gt_shape,
-                          **kwargs)
-
-    def get_parameters(self, shape):
-        self.transform.set_target(shape)
-        return self.transform.as_vector()
-
-
-class RLMS(GradientDescent):
-    r"""
-    Implementation of the Regularized Landmark Mean-Shifts algorithm for
-    fitting Constrained Local Models described in [1]_.
-
-    Parameters
-    ----------
-    classifiers : `list` of ``classifier_callable``
-        The list containing the classifier that will produce the response
-        maps for each landmark point.
-    patch_shape : `tuple` of `int`
-        The shape of the patches used to train the classifiers.
-    transform : :map:`GlobalPDM` or subclass
-        The global point distribution model to be used.
-
-        .. note::
-
-            Only :map:`GlobalPDM` and its subclasses are supported.
-            :map:`PDM` is not supported at the moment.
-    eps : `float`, optional
-        The convergence value. When calculating the level of convergence, if
-        the norm of the delta parameter updates is less than ``eps``, the
-        algorithm is considered to have converged.
-    scale: `float`, optional
-        Constant value that will be multiplied to the `noise_variance` of
-        the pdm in order to compute the covariance of the KDE
-        approximation.
-
-    References
-    ----------
-    .. [1] J. Saragih, S. Lucey and J. Cohn, ''Deformable Model Fitting by
-    Regularized Landmark Mean-Shifts", International Journal of Computer
-    Vision (IJCV), 2010.
-    """
-    def __init__(self, classifiers, patch_shape, pdm, eps=10**-10, scale=10):
-        self.scale = scale
-        super(RLMS, self).__init__(
-            classifiers, patch_shape, pdm, eps=eps)
-
-    @property
-    def algorithm(self):
-        return 'RLMS'
-
-    def _set_up(self):
-        global multivariate_normal
-        if multivariate_normal is None:
-            from scipy.stats import multivariate_normal  # expensive
-        # Build the sampling grid associated to the patch shape
-        self._sampling_grid = build_sampling_grid(self.patch_shape)
-        # Define the 2-dimensional gaussian distribution
-        mean = np.zeros(self.transform.n_dims)
-        covariance = self.scale * self.transform.model.noise_variance()
-        mvn = multivariate_normal(mean=mean, cov=covariance)
-        # Compute Gaussian-KDE grid
-        self._kernel_grid = mvn.pdf(self._sampling_grid)
-
-        # Jacobian
-        self._J = self.transform.d_dp([])
-
-        # Prior
-        sim_prior = np.zeros((4,))
-        pdm_prior = 1 / self.transform.model.eigenvalues
-        self._J_prior = np.hstack((sim_prior, pdm_prior))
-
-        # Inverse Hessian
-        H = np.einsum('ijk, ilk -> jl', self._J, self._J)
-        self._inv_H = np.linalg.inv(np.diag(self._J_prior) + H)
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        target = self.transform.target
-        n_iters = 0
-
-        max_h = image.shape[-2] - 1
-        max_w = image.shape[-1] - 1
-
-        image_pixels = np.reshape(image.pixels, (image.n_channels, -1)).T
-        response_image = np.zeros((target.n_points, image.shape[-2],
-                                   image.shape[-1]))
-
-        # Compute response maps
-        for j, clf in enumerate(self.classifiers):
-            response_image[j, :, :] = np.reshape(clf(image_pixels),
-                                                 image.shape)
-
-        while n_iters < max_iters and error > self.eps:
-
-            mean_shift_target = np.zeros_like(target.points)
-
-            # Compute mean-shift vectors
-            for j, point in enumerate(target.points):
-
-                patch_grid = (self._sampling_grid +
-                              np.round(point[None, None, ...]).astype(int))
-
-                x = patch_grid[:, :, 0]
-                y = patch_grid[:, :, 1]
-
-                # deal with boundaries
-                x[x > max_h] = max_h
-                y[y > max_w] = max_w
-                x[x < 0] = 0
-                y[y < 0] = 0
-
-                kernel_response = response_image[j, x, y] * self._kernel_grid
-                normalizer = np.sum(kernel_response)
-                normalized_kernel_response = kernel_response / normalizer
-
-                mean_shift_target[j, :] = np.sum(
-                    normalized_kernel_response * (x, y), axis=(1, 2))
-
-            # Compute (shape) error term
-            error = mean_shift_target - target.points
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = np.einsum('ijk, ik -> j', self._J, error)
-
-            # TODO: a similar approach could be implemented in LK
-            # Deal with prior
-            prior = self._J_prior * self.transform.as_vector()
-
-            # Compute parameter updates
-            delta_p = -np.dot(self._inv_H, prior - sd_delta_p)
-
-            # Update transform weights
-            parameters = self.transform.as_vector() + delta_p
-            fitting_result.parameters.append(parameters)
-            self.transform.from_vector_inplace(parameters)
-            target = self.transform.target
-
-            # Test convergence
-            error = np.abs(np.linalg.norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
diff --git a/menpofit/gradientdescent/residual.py b/menpofit/gradientdescent/residual.py
deleted file mode 100755
index 658e0ca..0000000
--- a/menpofit/gradientdescent/residual.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import abc
-
-
-class Residual(object):
-    r"""
-    """
-    __metaclass__ = abc.ABCMeta
-
-    @abc.abstractproperty
-    def error(self):
-        pass
-
-    @abc.abstractproperty
-    def error_derivative(self):
-        pass
-
-    @abc.abstractproperty
-    def d_dp(self):
-        pass
-
-    @abc.abstractproperty
-    def hessian(self):
-        pass
-
-
-class SSD(Residual):
-
-    type = 'SSD'
-
-    def error(self):
-        raise ValueError("Not implemented")
-
-    def error_derivative(self):
-        raise ValueError("Not implemented")
-
-    def d_dp(self):
-        raise ValueError("Not implemented")
-
-    def hessian(self):
-        raise ValueError("Not implemented")
-
-
-class Robust(Residual):
-
-    def __init__(self):
-        raise ValueError("Not implemented")
-
-    def error(self):
-        raise ValueError("Not implemented")
-
-    def error_derivative(self):
-        raise ValueError("Not implemented")
-
-    def d_dp(self):
-        raise ValueError("Not implemented")
-
-    def hessian(self):
-        raise ValueError("Not implemented")
-
-    @abc.abstractmethod
-    def _weights(self):
-        pass
-
-
-class Fair(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
-
-
-class L1L2(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
-
-
-class GemanMcClure(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
-
-
-class Cauchy(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
-
-
-class Welsch(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
-
-
-class Huber(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
-
-
-class Turkey(Robust):
-
-    def _weights(self):
-        raise ValueError("Not implemented")
diff --git a/menpofit/lk/__init__.py b/menpofit/lk/__init__.py
new file mode 100644
index 0000000..7a1abbc
--- /dev/null
+++ b/menpofit/lk/__init__.py
@@ -0,0 +1,5 @@
+from .fitter import LucasKanadeFitter
+from .algorithm import (
+    ForwardAdditive, ForwardCompositional, InverseCompositional)
+from .residual import (
+    SSD, FourierSSD, ECC, GradientImages, GradientCorrelation)
diff --git a/menpofit/lk/algorithm.py b/menpofit/lk/algorithm.py
new file mode 100644
index 0000000..49bcd5b
--- /dev/null
+++ b/menpofit/lk/algorithm.py
@@ -0,0 +1,203 @@
+from scipy.linalg import norm
+import numpy as np
+from .result import LucasKanadeAlgorithmResult
+
+
+# TODO: implement Inverse Additive Algorithm?
+# TODO: implement sampling?
+# TODO: document me!
+class LucasKanade(object):
+    r"""
+    """
+    def __init__(self, template, transform, residual, eps=10**-10):
+        self.template = template
+        self.transform = transform
+        self.residual = residual
+        self.eps = eps
+
+
+# TODO: document me!
+class ForwardAdditive(LucasKanade):
+    r"""
+    Forward Additive Lucas-Kanade algorithm
+    """
+    def run(self, image, initial_shape, max_iters=20, gt_shape=None):
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        cost_functions = []
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Forward Compositional Algorithm
+        while k < max_iters and eps > self.eps:
+            # warp image
+            IWxp = image.warp_to_mask(self.template.mask, self.transform)
+
+            # compute warp jacobian
+            dW_dp = np.rollaxis(
+                self.transform.d_dp(self.template.indices()), -1)
+            dW_dp = dW_dp.reshape(dW_dp.shape[:1] + self.template.shape +
+                                  dW_dp.shape[-1:])
+
+            # compute steepest descent images
+            filtered_J, J = self.residual.steepest_descent_images(
+                image, dW_dp, forward=(self.template, self.transform))
+
+            # compute hessian
+            H = self.residual.hessian(filtered_J, sdi2=J)
+
+            # compute steepest descent parameter updates.
+            sd_dp = self.residual.steepest_descent_update(
+                filtered_J, IWxp, self.template)
+
+            # compute gradient descent parameter updates
+            dp = -np.real(np.linalg.solve(H, sd_dp))
+
+            # Update warp weights
+            self.transform.from_vector_inplace(self.transform.as_vector() + dp)
+            p_list.append(self.transform.as_vector())
+
+            # update cost
+            cost_functions.append(self.residual.cost_closure())
+
+            # test convergence
+            eps = np.abs(norm(dp))
+
+            # increase iteration counter
+            k += 1
+
+        return LucasKanadeAlgorithmResult(image, self, p_list,
+                                          cost_functions=cost_functions,
+                                          gt_shape=gt_shape)
+
+
+# TODO: document me!
+class ForwardCompositional(LucasKanade):
+    r"""
+    Forward Compositional Lucas-Kanade algorithm
+    """
+    def __init__(self, template, transform, residual, eps=10**-10):
+        super(ForwardCompositional, self).__init__(
+            template, transform, residual, eps=eps)
+        self._precompute()
+
+    def _precompute(self):
+        # compute warp jacobian
+        dW_dp = np.rollaxis(
+            self.transform.d_dp(self.template.indices()), -1)
+        self.dW_dp = dW_dp.reshape(dW_dp.shape[:1] + self.template.shape +
+                                   dW_dp.shape[-1:])
+
+    def run(self, image, initial_shape, max_iters=20, gt_shape=None):
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        cost_functions = []
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Forward Compositional Algorithm
+        while k < max_iters and eps > self.eps:
+            # warp image
+            IWxp = image.warp_to_mask(self.template.mask, self.transform)
+
+            # compute steepest descent images
+            filtered_J, J = self.residual.steepest_descent_images(
+                IWxp, self.dW_dp)
+
+            # compute hessian
+            H = self.residual.hessian(filtered_J, sdi2=J)
+
+            # compute steepest descent parameter updates.
+            sd_dp = self.residual.steepest_descent_update(
+                filtered_J, IWxp, self.template)
+
+            # compute gradient descent parameter updates
+            dp = -np.real(np.linalg.solve(H, sd_dp))
+
+            # Update warp weights
+            self.transform.compose_after_from_vector_inplace(dp)
+            p_list.append(self.transform.as_vector())
+
+            # update cost
+            cost_functions.append(self.residual.cost_closure())
+
+            # test convergence
+            eps = np.abs(norm(dp))
+
+            # increase iteration counter
+            k += 1
+
+        return LucasKanadeAlgorithmResult(image, self, p_list,
+                                          cost_functions=cost_functions,
+                                          gt_shape=gt_shape)
+
+
+# TODO: document me!
+class InverseCompositional(LucasKanade):
+    r"""
+    Inverse Compositional Lucas-Kanade algorithm
+    """
+    def __init__(self, template, transform, residual, eps=10**-10):
+        super(InverseCompositional, self).__init__(
+            template, transform, residual, eps=eps)
+        self._precompute()
+
+    def _precompute(self):
+        # compute warp jacobian
+        dW_dp = np.rollaxis(self.transform.d_dp(self.template.indices()), -1)
+        dW_dp = dW_dp.reshape(dW_dp.shape[:1] + self.template.shape +
+                              dW_dp.shape[-1:])
+        # compute steepest descent images
+        self.filtered_J, J = self.residual.steepest_descent_images(
+            self.template, dW_dp)
+        # compute hessian
+        self.H = self.residual.hessian(self.filtered_J, sdi2=J)
+
+    def run(self, image, initial_shape, max_iters=20, gt_shape=None):
+        # initialize transform
+        self.transform.set_target(initial_shape)
+        p_list = [self.transform.as_vector()]
+
+        cost_functions = []
+
+        # initialize iteration counter and epsilon
+        k = 0
+        eps = np.Inf
+
+        # Baker-Matthews, Inverse Compositional Algorithm
+        while k < max_iters and eps > self.eps:
+            # warp image
+            IWxp = image.warp_to_mask(self.template.mask, self.transform)
+
+            # compute steepest descent parameter updates.
+            sd_dp = self.residual.steepest_descent_update(
+                self.filtered_J, IWxp, self.template)
+
+            # compute gradient descent parameter updates
+            dp = np.real(np.linalg.solve(self.H, sd_dp))
+
+            # update warp
+            inv_dp = self.transform.pseudoinverse_vector(dp)
+            self.transform.compose_after_from_vector_inplace(inv_dp)
+            p_list.append(self.transform.as_vector())
+
+            # update cost
+            cost_functions.append(self.residual.cost_closure())
+
+            # test convergence
+            eps = np.abs(norm(dp))
+
+            # increase iteration counter
+            k += 1
+
+        return LucasKanadeAlgorithmResult(image, self, p_list,
+                                          cost_functions=cost_functions,
+                                          gt_shape=gt_shape)
diff --git a/menpofit/lk/fitter.py b/menpofit/lk/fitter.py
new file mode 100644
index 0000000..b1a639a
--- /dev/null
+++ b/menpofit/lk/fitter.py
@@ -0,0 +1,71 @@
+from __future__ import division
+from menpo.feature import no_op
+from menpofit.transform import DifferentiableAlignmentAffine
+from menpofit.fitter import (MultiFitter, noisy_shape_from_shape,
+                             noisy_shape_from_bounding_box)
+from menpofit import checks
+from .algorithm import InverseCompositional
+from .residual import SSD
+from .result import LucasKanadeFitterResult
+
+
+# TODO: document me!
+class LucasKanadeFitter(MultiFitter):
+    r"""
+    """
+    def __init__(self, template, group=None, holistic_features=no_op,
+                 transform_cls=DifferentiableAlignmentAffine, diagonal=None,
+                 scales=(0.5, 1.0), algorithm_cls=InverseCompositional,
+                 residual_cls=SSD):
+
+        checks.check_diagonal(diagonal)
+        scales = checks.check_scales(scales)
+        holistic_features = checks.check_features(holistic_features,
+                                                  len(scales))
+
+        self.holistic_features = holistic_features
+        self.transform_cls = transform_cls
+        self.diagonal = diagonal
+        self.scales = list(scales)
+        # Make template masked for warping
+        template = template.as_masked(copy=False)
+
+        if self.diagonal:
+            template = template.rescale_landmarks_to_diagonal_range(
+                self.diagonal, group=group)
+        self.reference_shape = template.landmarks[group].lms
+
+        self.templates, self.sources = self._prepare_template(template,
+                                                              group=group)
+        self._set_up(algorithm_cls, residual_cls)
+
+    def _set_up(self, algorithm_cls, residual_cls):
+        self.algorithms = []
+        for j, (t, s) in enumerate(zip(self.templates, self.sources)):
+            transform = self.transform_cls(s, s)
+            residual = residual_cls()
+            algorithm = algorithm_cls(t, transform, residual)
+            self.algorithms.append(algorithm)
+
+    def _prepare_template(self, template, group=None):
+        gt_shape = template.landmarks[group].lms
+        templates, _, sources = self._prepare_image(template, gt_shape,
+                                                    gt_shape=gt_shape)
+        return templates, sources
+
+    def noisy_shape_from_bounding_box(self, bounding_box, noise_type='uniform',
+                                      noise_percentage=0.1, rotation=False):
+        return noisy_shape_from_bounding_box(
+            self.reference_shape, bounding_box, noise_type=noise_type,
+            noise_percentage=noise_percentage, rotation=rotation)
+
+    def noisy_shape_from_shape(self, shape, noise_type='uniform',
+                               noise_percentage=0.1, rotation=False):
+        return noisy_shape_from_shape(
+            self.reference_shape, shape, noise_type=noise_type,
+            noise_percentage=noise_percentage, rotation=rotation)
+
+    def _fitter_result(self, image, algorithm_results, affine_correction,
+                       gt_shape=None):
+        return LucasKanadeFitterResult(image, self, algorithm_results,
+                                       affine_correction, gt_shape=gt_shape)
diff --git a/menpofit/lucaskanade/residual.py b/menpofit/lk/residual.py
similarity index 56%
rename from menpofit/lucaskanade/residual.py
rename to menpofit/lk/residual.py
index f97d06f..9a44f20 100755
--- a/menpofit/lucaskanade/residual.py
+++ b/menpofit/lk/residual.py
@@ -1,34 +1,12 @@
-"""
-This module contains a set of similarity measures that was designed for use
-within the Lucas-Kanade framework. They therefore expose a number of methods
-that make them useful for inverse compositional and forward additive
-Lucas-Kanade.
-
-These similarity measures are designed to be dimension independent where
-possible. For this reason, some methods look more complicated than would be
-normally the case. For example, calculating the Hessian involves summing
-a multi-dimensional array, so we dynamically calculate the list of axes
-to sum over. However, the basics of the logic, other than dimension
-reduction, should be similar to the original algorithms.
-
-References
-----------
-
-.. [1] Lucas, Bruce D., and Takeo Kanade.
-       "An iterative image registration technique with an application to stereo
-       vision."
-       IJCAI. Vol. 81. 1981.
-"""
+from __future__ import division
 import abc
 import numpy as np
-from numpy.fft import fftshift, fft2
+from numpy.fft import fftn, ifftn, fft2
 import scipy.linalg
-
-from menpo.math import log_gabor
-from menpo.image import MaskedImage
 from menpo.feature import gradient
 
 
+# TODO: Do we want residuals to support masked templates?
 class Residual(object):
     """
     An abstract base class for calculating the residual between two images
@@ -37,30 +15,41 @@ class Residual(object):
     guarantee is made that calling methods on these subclasses will generate
     correct results.
     """
-    __metaclass__ = abc.ABCMeta
-
-    def __init__(self):
-        pass
-
-    @property
-    def error(self):
+    @classmethod
+    def gradient(cls, image, forward=None):
         r"""
-        The RMS of the error image.
-
-        :type: float
+        Calculates the gradients of the given method.
 
-        Notes
-        -----
-        Will only generate a result if the
-        :func:`steepest_descent_update` function has been calculated prior.
+        If `forward` is provided, then the gradients are warped
+        (as required in the forward additive algorithm)
 
-        .. math::
-            error = \sqrt{\sum_x E(x)^2}
+        Parameters
+        ----------
+        image : :class:`menpo.image.base.Image`
+            The image to calculate the gradients for
+        forward : (:map:`Image`, :map:`AlignableTransform>`), optional
+            A tuple containing the extra weights required for the function
+            `warp` (which should be passed as a function handle).
 
-        where :math:`E(x) = T(x) - I(W(x;p))` within the forward additive
-        framework.
+            Default: `None`
         """
-        return np.sqrt(np.mean(self._error_img ** 2))
+        if forward:
+            # Calculate the gradient over the image
+            # grad:  (dims x ch) x H x W
+            grad = gradient(image)
+            # Warp gradient for forward additive using the given transform
+            # grad:  (dims x ch) x h x w
+            template, transform = forward
+            grad = grad.warp_to_mask(template.mask, transform,
+                                     warp_landmarks=False)
+        else:
+            # Calculate the gradient over the image and set one pixels along
+            # the boundary of the image mask to zero (no reliable gradient
+            # can be computed there!)
+            # grad:  (dims x ch) x h x w
+            grad = gradient(image)
+            grad.set_boundary_pixels()
+        return grad
 
     @abc.abstractmethod
     def steepest_descent_images(self, image, dW_dp, **kwargs):
@@ -92,7 +81,7 @@ def steepest_descent_images(self, image, dW_dp, **kwargs):
         pass
 
     @abc.abstractmethod
-    def calculate_hessian(self, VT_dW_dp):
+    def hessian(self, sdi):
         r"""
         Calculates the Gauss-Newton approximation to the Hessian.
 
@@ -105,7 +94,7 @@ def calculate_hessian(self, VT_dW_dp):
 
         Parameters
         ----------
-        VT_dW_dp : (N, n_params) ndarray
+        J : (N, n_params) ndarray
             The steepest descent images.
 
         Returns
@@ -116,7 +105,7 @@ def calculate_hessian(self, VT_dW_dp):
         pass
 
     @abc.abstractmethod
-    def steepest_descent_update(self, VT_dW_dp, IWxp, template):
+    def steepest_descent_update(self, sdi, image, template):
         r"""
         Calculates the steepest descent parameter updates.
 
@@ -127,9 +116,9 @@ def steepest_descent_update(self, VT_dW_dp, IWxp, template):
 
         Parameters
         ----------
-        VT_dW_dp : (N, n_params) ndarray
+        J : (N, n_params) ndarray
             The steepest descent images.
-        IWxp : :class:`menpo.image.base.Image`
+        image : :class:`menpo.image.base.Image`
             Either the warped image or the template
             (depending on the framework)
         template : :class:`menpo.image.base.Image`
@@ -143,69 +132,57 @@ def steepest_descent_update(self, VT_dW_dp, IWxp, template):
         """
         pass
 
-    def _calculate_gradients(self, image, forward=None):
-        r"""
-        Calculates the gradients of the given method.
-
-        If `forward` is provided, then the gradients are warped
-        (as required in the forward additive algorithm)
-
-        Parameters
-        ----------
-        image : :class:`menpo.image.base.Image`
-            The image to calculate the gradients for
-        forward : (:map:`Image`, :map:`AlignableTransform>`), optional
-            A tuple containing the extra weights required for the function
-            `warp` (which should be passed as a function handle).
-
-            Default: `None`
-        """
-        if forward:
-            # Calculate the gradient over the image
-            # grad:  (dims x ch) x H x W
-            grad = gradient(image)
-            # Warp gradient for forward additive using the given transform
-            # grad:  (dims x ch) x h x w
-            template, transform = forward
-            grad = grad.warp_to_mask(template.mask, transform,
-                                     warp_landmarks=False)
-        else:
-            # Calculate the gradient over the image and set one pixels along
-            # the boundary of the image mask to zero (no reliable gradient
-            # can be computed there!)
-            # grad:  (dims x ch) x h x w
-            grad = gradient(image)
-            grad.set_boundary_pixels()
-        return grad
+    @abc.abstractmethod
+    def cost_closure(self):
+        pass
 
 
 class SSD(Residual):
-
-    type = 'SSD'
+    r"""
+    """
+    def __init__(self, kernel=None):
+        self._kernel = kernel
 
     def steepest_descent_images(self, image, dW_dp, forward=None):
         # compute gradient
-        # grad:  dims x ch x pixels
-        grad = self._calculate_gradients(image, forward=forward)
-        grad = grad.as_vector().reshape((image.n_dims, image.n_channels, -1))
+        # grad:  dims x ch x h x w
+        nabla = self.gradient(image, forward=forward)
+        nabla = nabla.as_vector().reshape((image.n_dims, image.n_channels) +
+                                          nabla.shape)
 
         # compute steepest descent images
-        # gradient: dims x ch x pixels
-        # dw_dp:    dims x    x pixels x params
-        # sdi:             ch x pixels x params
+        # gradient: dims x ch x h x w
+        # dw_dp:    dims x    x h x w x params
+        # sdi:             ch x h x w x params
         sdi = 0
-        a = grad[..., None] * dW_dp[:, None, ...]
+        a = nabla[..., None] * dW_dp[:, None, ...]
         for d in a:
             sdi += d
 
-        # reshape steepest descent images
-        # sdi: (ch x pixels) x params
-        return sdi.reshape((-1, sdi.shape[-1]))
-
-    def calculate_hessian(self, sdi, sdi2=None):
+        if self._kernel is None:
+            # reshape steepest descent images
+            # sdi:           (ch x h x w) x params
+            # filtered_sdi:  (ch x h x w) x params
+            sdi = sdi.reshape((-1, sdi.shape[-1]))
+            filtered_sdi = sdi
+        else:
+            # if required, filter steepest descent images
+            # fft_sdi:  ch x h x w x params
+            filtered_sdi = ifftn(self._kernel[..., None] *
+                                 fftn(sdi, axes=(-3, -2)),
+                                 axes=(-3, -2))
+            # reshape steepest descent images
+            # sdi:           (ch x h x w) x params
+            # filtered_sdi:  (ch x h x w) x params
+            sdi = sdi.reshape((-1, sdi.shape[-1]))
+            filtered_sdi = filtered_sdi.reshape(sdi.shape)
+
+        return filtered_sdi, sdi
+
+    def hessian(self, sdi, sdi2=None):
         # compute hessian
-        # sdi.T:   params x (ch x pixels)
-        # sdi:              (ch x pixels) x params
+        # sdi.T:   params x (ch x h x w)
+        # sdi:              (ch x h x w) x params
         # hessian: params x               x params
         if sdi2 is None:
             H = sdi.T.dot(sdi)
@@ -213,123 +190,102 @@ def calculate_hessian(self, sdi, sdi2=None):
             H = sdi.T.dot(sdi2)
         return H
 
-    def steepest_descent_update(self, sdi, IWxp, template):
-        self._error_img = IWxp.as_vector() - template.as_vector()
+    def steepest_descent_update(self, sdi, image, template):
+        self._error_img = image.as_vector() - template.as_vector()
         return sdi.T.dot(self._error_img)
 
-
-class GaborFourier(Residual):
-
-    type = 'GaborFourier'
-
-    def __init__(self, image_shape, **kwargs):
-        super(GaborFourier, self).__init__()
-
-        if 'filter_bank' in kwargs:
-            self._filter_bank = kwargs.get('filter_bank')
-            if self._filter_bank.shape != image_shape:
-                raise ValueError('Filter bank shape must match the shape '
-                                 'of the image')
-        else:
-            gabor = log_gabor(np.ones(image_shape), **kwargs)
-            # Get filter bank matrix
-            self._filter_bank = gabor[2]
-
-        # Flatten the filter bank for vectorized calculations
-        self._filter_bank = self._filter_bank.ravel()
+    def cost_closure(self):
+        def cost_closure(x, k):
+            if k is None:
+                return lambda: x.T.dot(x)
+            else:
+                x = x.reshape((-1,) + k.shape[-2:])
+                kx = ifftn(k[..., None] * fftn(x, axes=(-2, -1)),
+                           axes=(-2, -1))
+                return lambda: x.ravel().T.dot(kx.ravel())
+        return cost_closure(self._error_img, self._kernel)
+
+
+# TODO: Does not support masked templates at the moment
+class FourierSSD(Residual):
+    r"""
+    """
+    def __init__(self, kernel=None):
+        self._kernel = kernel
 
     def steepest_descent_images(self, image, dW_dp, forward=None):
-        n_dims = image.n_dims
-        n_channels = image.n_channels
-        n_params = dW_dp.shape[-1]
-
         # compute gradient
-        # grad:  dims x ch x pixels
-        grad_img = self._calculate_gradients(image, forward=forward)
-        grad = grad_img.as_vector().reshape((n_dims, n_channels, -1))
+        # grad:  dims x ch x h x w
+        nabla = self.gradient(image, forward=forward)
+        nabla = nabla.as_vector().reshape((image.n_dims, image.n_channels) +
+                                          nabla.shape)
 
         # compute steepest descent images
-        # gradient: dims x ch x pixels
-        # dw_dp:    dims x    x pixels x params
-        # sdi:             ch x pixels x params
+        # gradient: dims x ch x h x w
+        # dw_dp:    dims x    x h x w x params
+        # sdi:             ch x h x w x params
         sdi = 0
-        a = grad[..., None] * dW_dp[:, None, ...]
+        a = nabla[..., None] * dW_dp[:, None, ...]
         for d in a:
             sdi += d
 
-        # make sdi images
-        # sdi_img:  ch x h x w x params
-        sdi_mask = np.tile(grad_img.mask.pixels[0, ..., None],
-                           (1, 1, n_params))
-        sdi_img = MaskedImage.blank(grad_img.shape + (n_params,),
-                                    n_channels=n_channels,
-                                    mask=sdi_mask)
-        sdi_img.from_vector_inplace(sdi.ravel())
-
-        # compute FFT over each channel, parameter and dimension
+        # compute steepest descent images fft
         # fft_sdi:  ch x h x w x params
-        fft_sdi = fftshift(fft2(sdi_img.pixels, axes=(-3, -2)), axes=(-3, -2))
-        # Note that, fft_sdi is rectangular, i.e. is not define in
-        # terms of the mask pixels, but in terms of the whole image.
-        # Selecting mask pixels once the fft has been computed makes no
-        # sense because they have lost their original spatial meaning.
+        fft_sdi = fftn(sdi, axes=(-3, -2))
+
+        if self._kernel is None:
+            # reshape steepest descent images
+            # fft_sdi:           (ch x h x w) x params
+            # filtered_fft_sdi:  (ch x h x w) x params
+            fft_sdi = fft_sdi.reshape((-1, fft_sdi.shape[-1]))
+            filtered_fft_sdi = fft_sdi
+        else:
+            # if required, filter steepest descent images
+            filtered_fft_sdi = self._kernel[..., None] * fft_sdi
+            # reshape steepest descent images
+            # fft_sdi:           (ch x h x w) x params
+            # filtered_fft_sdi:  (ch x h x w) x params
+            fft_sdi = fft_sdi.reshape((-1, fft_sdi.shape[-1]))
+            filtered_fft_sdi = filtered_fft_sdi.reshape(fft_sdi.shape)
 
-        # reshape steepest descent images
-        # sdi:  (ch x h x w) x params
-        return fft_sdi.reshape((-1, fft_sdi.shape[-1]))
+        return filtered_fft_sdi, fft_sdi
 
-    def calculate_hessian(self, sdi):
-        # reshape steepest descent images
-        # sdi:  ch x (h x w) x params
-        sdi = sdi.reshape((-1, self._filter_bank.shape[0], sdi.shape[-1]))
-
-        # compute filtered steepest descent images
-        # filter_bank:        (h x w)
-        # sdi:           ch x (h x w) x params
-        # filtered_sdi:  ch x (h x w) x params
-        filtered_sdi = (self._filter_bank[None, ..., None] ** 0.5) * sdi
-
-        # reshape filtered steepest descent images
-        # filtered_sdi:  (ch x h x w) x params
-        filtered_sdi = filtered_sdi.reshape((-1, sdi.shape[-1]))
-
-        # compute filtered hessian
-        # filtered_sdi.T:  params x (ch x h x w)
-        # filtered_sdi:             (ch x h x w) x params
-        # hessian:         params x              x  n_param
-        return np.conjugate(filtered_sdi).T.dot(filtered_sdi)
-
-    def steepest_descent_update(self, sdi, IWxp, template):
+    def hessian(self, sdi, sdi2=None):
+        if sdi2 is None:
+            H = sdi.conjugate().T.dot(sdi)
+        else:
+            H = sdi.conjugate().T.dot(sdi2)
+        return H
+
+    def steepest_descent_update(self, sdi, image, template):
         # compute error image
         # error_img:  ch x h x w
-        error_img = IWxp.pixels - template.pixels
+        self._error_img = image.pixels - template.pixels
 
-        # compute FFT error image
+        # compute error image fft
         # fft_error_img:  ch x (h x w)
-        fft_error_img = fftshift(fft2(error_img))
-        fft_error_img = fft_error_img.reshape((IWxp.n_channels, -1))
-
-        # compute filtered steepest descent images
-        # filter_bank:              (h x w)
-        # fft_error_img:       ch x (h x w)
-        # filtered_error_img:  ch x (h x w)
-        filtered_error_img = self._filter_bank * fft_error_img
-
-        # reshape _error_img
-        # error_img:  (ch x h x w)
-        self._error_img = filtered_error_img.ravel()
+        fft_error_img = fft2(self._error_img)
 
         # compute steepest descent update
-        # sdi:        params x (ch x h x w)
-        # error_img:           (ch x h x w)
-        # sdu:        params
-        return sdi.T.dot(np.conjugate(self._error_img))
+        # fft_sdi:        params x (ch x h x w)
+        # fft_error_img:           (ch x h x w)
+        # fft_sdu:        params
+        return sdi.conjugate().T.dot(fft_error_img.ravel())
+
+    def cost_closure(self):
+        def cost_closure(x, k):
+            if k is None:
+                return lambda: x.ravel().T.dot(x.ravel())
+            else:
+                kx = ifftn(k[..., None] * fftn(x, axes=(-2, -1)),
+                           axes=(-2, -1))
+                return lambda: x.ravel().T.dot(kx.ravel())
+        return cost_closure(self._error_img, self._kernel)
 
 
 class ECC(Residual):
-
-    type = 'ECC'
-
+    r"""
+    """
     def _normalise_images(self, image):
         # TODO: do we need to copy the image?
         # TODO: is this supposed to be per channel normalization?
@@ -343,8 +299,9 @@ def steepest_descent_images(self, image, dW_dp, forward=None):
 
         # compute gradient
         # gradient:  dims x ch x pixels
-        grad = self._calculate_gradients(norm_image, forward=forward)
-        grad = grad.as_vector().reshape((image.n_dims, image.n_channels, -1))
+        grad = self.gradient(norm_image, forward=forward)
+        grad = grad.as_vector().reshape((image.n_dims, image.n_channels) +
+                                         grad.shape)
 
         # compute steepest descent images
         # gradient: dims x ch x pixels
@@ -357,32 +314,38 @@ def steepest_descent_images(self, image, dW_dp, forward=None):
 
         # reshape steepest descent images
         # sdi: (ch x pixels) x params
-        return sdi.reshape((-1, sdi.shape[-1]))
+        sdi = sdi.reshape((-1, sdi.shape[-1]))
+
+        return sdi, sdi
 
-    def calculate_hessian(self, sdi):
+    def hessian(self, sdi, sdi2=None):
         # compute hessian
-        # sdi.T:   params x (ch x pixels)
-        # sdi:              (ch x pixels) x params
+        # sdi.T:   params x (ch x h x w)
+        # sdi:              (ch x h x w) x params
         # hessian: params x               x params
-        H = sdi.T.dot(sdi)
+        if sdi2 is None:
+            H = sdi.T.dot(sdi)
+        else:
+            H = sdi.T.dot(sdi2)
         self._H_inv = scipy.linalg.inv(H)
         return H
 
-    def steepest_descent_update(self, sdi, IWxp, template):
-        normalised_IWxp = self._normalise_images(IWxp).as_vector()
-        normalised_template = self._normalise_images(template).as_vector()
+    def steepest_descent_update(self, sdi, image, template):
+        self._normalised_IWxp = self._normalise_images(image).as_vector()
+        self._normalised_template = self._normalise_images(
+            template).as_vector()
 
-        Gt = sdi.T.dot(normalised_template)
-        Gw = sdi.T.dot(normalised_IWxp)
+        Gt = sdi.T.dot(self._normalised_template)
+        Gw = sdi.T.dot(self._normalised_IWxp)
 
         # Calculate the numerator
-        IWxp_norm = scipy.linalg.norm(normalised_IWxp)
+        IWxp_norm = scipy.linalg.norm(self._normalised_IWxp)
         num1 = IWxp_norm ** 2
         num2 = np.dot(Gw.T, np.dot(self._H_inv, Gw))
         num = num1 - num2
 
         # Calculate the denominator
-        den1 = np.dot(normalised_template, normalised_IWxp)
+        den1 = np.dot(self._normalised_template, self._normalised_IWxp)
         den2 = np.dot(Gt.T, np.dot(self._H_inv, Gw))
         den = den1 - den2
 
@@ -396,15 +359,19 @@ def steepest_descent_update(self, sdi, IWxp, template):
             l2 = - den / den3
             l = np.maximum(l1, l2)
 
-        self._error_img = l * normalised_IWxp - normalised_template
+        self._error_img = l * self._normalised_IWxp - self._normalised_template
 
         return sdi.T.dot(self._error_img)
 
+    def cost_closure(self):
+        def cost_closure(x, y):
+            return lambda: x.T.dot(y)
+        return cost_closure(self._normalised_IWxp, self._normalised_template)
 
-class GradientImages(Residual):
-
-    type = 'GradientImages'
 
+class GradientImages(Residual):
+    r"""
+    """
     def _regularise_gradients(self, grad):
         pixels = grad.pixels
         ab = np.sqrt(np.sum(pixels**2, axis=0))
@@ -418,14 +385,14 @@ def steepest_descent_images(self, image, dW_dp, forward=None):
         n_channels = image.n_channels
 
         # compute gradient
-        first_grad = self._calculate_gradients(image, forward=forward)
+        first_grad = self.gradient(image, forward=forward)
         self._template_grad = self._regularise_gradients(first_grad)
 
         # compute gradient
         # second_grad:  dims x dims x ch x pixels
-        second_grad = self._calculate_gradients(self._template_grad)
+        second_grad = self.gradient(self._template_grad)
         second_grad = second_grad.masked_pixels().flatten().reshape(
-            (n_dims, n_dims,  n_channels, -1))
+            (n_dims, n_dims,  n_channels) + second_grad.shape)
 
         # Fix crossed derivatives: dydx = dxdy
         second_grad[1, 0, ...] = second_grad[0, 1, ...]
@@ -440,19 +407,25 @@ def steepest_descent_images(self, image, dW_dp, forward=None):
             sdi += d
 
         # reshape steepest descent images
-        # sdi: (dims x ch x h x w) x params
-        return sdi.reshape((-1, sdi.shape[-1]))
+        # sdi: (ch x pixels) x params
+        sdi = sdi.reshape((-1, sdi.shape[-1]))
 
-    def calculate_hessian(self, sdi):
+        return sdi, sdi
+
+    def hessian(self, sdi, sdi2=None):
         # compute hessian
-        # sdi.T:   params x (dims x ch x pixels)
-        # sdi:              (dims x ch x pixels) x params
-        # hessian: params x                     x params
-        return sdi.T.dot(sdi)
-
-    def steepest_descent_update(self, sdi, IWxp, template):
-        # compute IWxp regularized gradient
-        IWxp_grad = self._calculate_gradients(IWxp)
+        # sdi.T:   params x (ch x h x w)
+        # sdi:              (ch x h x w) x params
+        # hessian: params x               x params
+        if sdi2 is None:
+            H = sdi.T.dot(sdi)
+        else:
+            H = sdi.T.dot(sdi2)
+        return H
+
+    def steepest_descent_update(self, sdi, image, template):
+        # compute image regularized gradient
+        IWxp_grad = self.gradient(image)
         IWxp_grad = self._regularise_gradients(IWxp_grad)
 
         # compute vectorized error_image
@@ -466,19 +439,23 @@ def steepest_descent_update(self, sdi, IWxp, template):
         # sdu:        params
         return sdi.T.dot(self._error_img)
 
+    def cost_closure(self):
+        def cost_closure(x):
+            return lambda: x.T.dot(x)
+        return cost_closure(self._error_img)
 
-class GradientCorrelation(Residual):
-
-    type = 'GradientCorrelation'
 
+class GradientCorrelation(Residual):
+    r"""
+    """
     def steepest_descent_images(self, image, dW_dp, forward=None):
         n_dims = image.n_dims
         n_channels = image.n_channels
 
         # compute gradient
         # grad:  dims x ch x pixels
-        grad = self._calculate_gradients(image, forward=forward)
-        grad2 = grad.as_vector().reshape((n_dims, n_channels, -1))
+        grad = self.gradient(image, forward=forward)
+        grad2 = grad.as_vector().reshape((n_dims, n_channels) + grad.shape)
 
         # compute IGOs (remember axis 0 is y, axis 1 is x)
         # grad:    dims x ch x pixels
@@ -501,9 +478,9 @@ def steepest_descent_images(self, image, dW_dp, forward=None):
 
         # compute IGOs gradient
         # second_grad:  dims x dims x ch x pixels
-        second_grad = self._calculate_gradients(grad)
+        second_grad = self.gradient(grad)
         second_grad = second_grad.masked_pixels().flatten().reshape(
-            (n_dims, n_dims,  n_channels, -1))
+            (n_dims, n_dims,  n_channels) + second_grad.shape)
 
         # Fix crossed derivatives: dydx = dxdy
         second_grad[1, 0, ...] = second_grad[0, 1, ...]
@@ -528,23 +505,29 @@ def steepest_descent_images(self, image, dW_dp, forward=None):
 
         # reshape steepest descent images
         # sdi: (ch x pixels) x params
-        return sdi.reshape((-1, sdi.shape[-1]))
+        sdi = sdi.reshape((-1, sdi.shape[-1]))
+
+        return sdi, sdi
 
-    def calculate_hessian(self, sdi):
+    def hessian(self, sdi, sdi2=None):
         # compute hessian
-        # sdi.T:   params x (dims x ch x pixels)
-        # sdi:              (dims x ch x pixels) x params
-        # hessian: params x                      x params
-        return sdi.T.dot(sdi)
+        # sdi.T:   params x (ch x h x w)
+        # sdi:              (ch x h x w) x params
+        # hessian: params x               x params
+        if sdi2 is None:
+            H = sdi.T.dot(sdi)
+        else:
+            H = sdi.T.dot(sdi2)
+        return H
 
-    def steepest_descent_update(self, sdi, IWxp, template):
-        n_dims = IWxp.n_dims
-        n_channels = IWxp.n_channels
+    def steepest_descent_update(self, sdi, image, template):
+        n_dims = image.n_dims
+        n_channels = image.n_channels
 
-        # compute IWxp gradient
-        IWxp_grad = self._calculate_gradients(IWxp)
+        # compute image gradient
+        IWxp_grad = self.gradient(image)
         IWxp_grad = IWxp_grad.as_vector().reshape(
-            (n_dims, n_channels, -1))
+            (n_dims, n_channels) + image.shape)
 
         # compute IGOs (remember axis 0 is y, axis 1 is x)
         # IWxp_grad:     dims x ch x pixels
@@ -569,5 +552,10 @@ def steepest_descent_update(self, sdi, IWxp, template):
         # compute step size
         qp = np.sum(self._cos_phi * IWxp_cos_phi +
                     self._sin_phi * IWxp_sin_phi)
-        l = self._N / qp
-        return l * sdu
+        self._l = self._N / qp
+        return self._l * sdu
+
+    def cost_closure(self):
+        def cost_closure(x):
+            return lambda: 1/x
+        return cost_closure(self._l)
diff --git a/menpofit/lk/result.py b/menpofit/lk/result.py
new file mode 100644
index 0000000..874fbda
--- /dev/null
+++ b/menpofit/lk/result.py
@@ -0,0 +1,61 @@
+from __future__ import division
+from menpofit.result import ParametricAlgorithmResult, MultiFitterResult
+
+
+# TODO: document me!
+class LucasKanadeAlgorithmResult(ParametricAlgorithmResult):
+    def __init__(self, image, algorithm, shape_parameters,
+                 cost_functions=None, gt_shape=None):
+        super(LucasKanadeAlgorithmResult, self).__init__(
+            image, algorithm, shape_parameters, gt_shape=gt_shape)
+        self._cost_functions = cost_functions
+        self._warped_images = None
+        self._costs = None
+
+    @property
+    def warped_images(self):
+        if self._warped_images is None:
+            self._warped_images = []
+            for p in self.shape_parameters:
+                self.algorithm.transform.from_vector_inplace(p)
+                self._warped_images.append(
+                    self.image.warp_to_mask(self.algorithm.template.mask,
+                                            self.algorithm.transform))
+        return self._warped_images
+
+    @property
+    def costs(self):
+        if self._costs is None:
+            self._costs = [f() for f in self._cost_functions]
+        return self._costs
+
+
+# TODO: document me!
+class LucasKanadeFitterResult(MultiFitterResult):
+    r"""
+    """
+    def __init__(self, image, fitter, algorithm_results, affine_correction,
+                 gt_shape=None):
+        super(LucasKanadeFitterResult, self).__init__(
+            image, fitter, algorithm_results, affine_correction,
+            gt_shape=gt_shape)
+        self._warped_images = None
+
+    @property
+    def warped_images(self):
+        if self._warped_images is None:
+            algorithm = self.algorithm_results[-1].algorithm
+            self._warped_images = []
+            for s in self.shapes:
+                algorithm.transform.set_target(s)
+                self._warped_images.append(
+                    self.image.warp_to_mask(algorithm.template.mask,
+                                            algorithm.transform))
+        return self._warped_images
+
+    @property
+    def costs(self):
+        costs = []
+        for a in self.algorithm_results:
+            costs += a.costs
+        return costs
diff --git a/menpofit/lucaskanade/__init__.py b/menpofit/lucaskanade/__init__.py
deleted file mode 100755
index a01f4c8..0000000
--- a/menpofit/lucaskanade/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .appearance import SFA, SFC, SIC, AFA, AFC, AIC, PIC
-
-from .image import FA, FC, IC
diff --git a/menpofit/lucaskanade/appearance/__init__.py b/menpofit/lucaskanade/appearance/__init__.py
deleted file mode 100644
index 46657ca..0000000
--- a/menpofit/lucaskanade/appearance/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .simultaneous import SFA, SFC, SIC
-from .alternating import AFA, AFC, AIC
-from .projectout import PIC
diff --git a/menpofit/lucaskanade/appearance/alternating.py b/menpofit/lucaskanade/appearance/alternating.py
deleted file mode 100644
index b80955d..0000000
--- a/menpofit/lucaskanade/appearance/alternating.py
+++ /dev/null
@@ -1,174 +0,0 @@
-from scipy.linalg import norm
-import numpy as np
-
-from .base import AppearanceLucasKanade
-
-
-class AFA(AppearanceLucasKanade):
-    r"""
-    Alternating Forward Additive algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Alternating-FA'
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        fitting_result.weights = [[0]]
-        n_iters = 0
-
-        # Forward Additive Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute appearance
-            weights = self.appearance_model.project(IWxp)
-            self.template = self.appearance_model.instance(weights)
-            fitting_result.weights.append(weights)
-
-            # Compute warp Jacobian
-            dW_dp = np.rollaxis(
-                self.transform.d_dp(self.template.indices()), -1)
-
-            # Compute steepest descent images, VI_dW_dp
-            self._J = self.residual.steepest_descent_images(
-                image, dW_dp, forward=(self.template, self.transform))
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, self.template, IWxp)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Update warp weights
-            parameters = self.transform.as_vector() + delta_p
-            self.transform.from_vector_inplace(parameters)
-            fitting_result.parameters.append(parameters)
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
-
-
-class AFC(AppearanceLucasKanade):
-    r"""
-    Alternating Forward Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Alternating-FC'
-
-    def _set_up(self):
-        # Compute warp Jacobian
-        self._dW_dp = np.rollaxis(
-            self.transform.d_dp(self.template.indices()), -1)
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        fitting_result.weights = [[0]]
-        n_iters = 0
-
-        # Forward Additive Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute template by projection
-            weights = self.appearance_model.project(IWxp)
-            self.template = self.appearance_model.instance(weights)
-            fitting_result.weights.append(weights)
-
-            # Compute steepest descent images, VI_dW_dp
-            self._J = self.residual.steepest_descent_images(IWxp, self._dW_dp)
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, self.template, IWxp)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
-
-
-class AIC(AppearanceLucasKanade):
-    r"""
-    Alternating Inverse Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Alternating-IC'
-
-    def _set_up(self):
-        # Compute warp Jacobian
-        self._dW_dp = np.rollaxis(
-            self.transform.d_dp(self.template.indices()), -1)
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        fitting_result.weights = [[0]]
-        n_iters = 0
-
-        # Baker-Matthews, Inverse Compositional Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute appearance
-            weights = self.appearance_model.project(IWxp)
-            self.template = self.appearance_model.instance(weights)
-            fitting_result.weights.append(weights)
-
-            # Compute steepest descent images, VT_dW_dp
-            self._J = self.residual.steepest_descent_images(self.template,
-                                                            self._dW_dp)
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, IWxp, self.template)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Request the pesudoinverse vector from the transform
-            inv_delta_p = self.transform.pseudoinverse_vector(delta_p)
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(inv_delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
diff --git a/menpofit/lucaskanade/appearance/base.py b/menpofit/lucaskanade/appearance/base.py
deleted file mode 100644
index 6cf28d6..0000000
--- a/menpofit/lucaskanade/appearance/base.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from menpofit.lucaskanade.residual import SSD
-from menpofit.lucaskanade.base import LucasKanade
-
-
-class AppearanceLucasKanade(LucasKanade):
-
-    def __init__(self, model, transform, eps=10**-6):
-        # Note that the only supported residual for Appearance LK is SSD.
-        # This is because, in general, we don't know how to take the appropriate
-        # derivatives for arbitrary residuals with (for instance) a project out
-        # AAM.
-        # See https://github.com/menpo/menpo/issues/130 for details.
-        super(AppearanceLucasKanade, self).__init__(SSD(),
-                                                    transform, eps=eps)
-
-        # in appearance alignment, target image is aligned to appearance model
-        self.appearance_model = model
-        # by default, template is assigned to mean appearance
-        self.template = model.mean()
-        # pre-compute
-        self._set_up()
diff --git a/menpofit/lucaskanade/appearance/projectout.py b/menpofit/lucaskanade/appearance/projectout.py
deleted file mode 100644
index 0d1cd76..0000000
--- a/menpofit/lucaskanade/appearance/projectout.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import numpy as np
-from scipy.linalg import norm
-
-from .base import AppearanceLucasKanade
-
-
-class PIC(AppearanceLucasKanade):
-    r"""
-    Project-Out Inverse Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'ProjectOut-IC'
-
-    def _set_up(self):
-        # Compute warp Jacobian
-        dW_dp = np.rollaxis(self.transform.d_dp(self.template.indices()), -1)
-
-        # Compute steepest descent images, VT_dW_dp
-        J = self.residual.steepest_descent_images(
-            self.template, dW_dp)
-
-        # Project out appearance model from VT_dW_dp
-        self._J = self.appearance_model.project_out_vectors(J.T).T
-
-        # Compute Hessian and inverse
-        self._H = self.residual.calculate_hessian(self._J)
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        n_iters = 0
-
-        # Baker-Matthews, Inverse Compositional Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, IWxp, self.template)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Request the pesudoinverse vector from the transform
-            inv_delta_p = self.transform.pseudoinverse_vector(delta_p)
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(inv_delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
diff --git a/menpofit/lucaskanade/appearance/simultaneous.py b/menpofit/lucaskanade/appearance/simultaneous.py
deleted file mode 100644
index a29d35f..0000000
--- a/menpofit/lucaskanade/appearance/simultaneous.py
+++ /dev/null
@@ -1,184 +0,0 @@
-from scipy.linalg import norm
-import numpy as np
-
-from .base import AppearanceLucasKanade
-
-
-class SFA(AppearanceLucasKanade):
-    r"""
-    Simultaneous Forward Additive algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Simultaneous-FA'
-
-    def _fit(self, fitting_result, max_iters=20, project=True):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        fitting_result.weights = []
-        n_iters = 0
-
-        # Forward Additive Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute warp Jacobian
-            dW_dp = np.rollaxis(
-                self.transform.d_dp(self.template.indices()), -1)
-
-            # Compute steepest descent images, VI_dW_dp
-            J_aux = self.residual.steepest_descent_images(
-                image, dW_dp, forward=(self.template, self.transform))
-
-            # Project out appearance model from VT_dW_dp
-            self._J = self.appearance_model.project_out_vectors(J_aux.T).T
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, self.template, IWxp)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Update warp weights
-            parameters = self.transform.as_vector() + delta_p
-            self.transform.from_vector_inplace(parameters)
-            fitting_result.parameters.append(parameters)
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
-
-
-class SFC(AppearanceLucasKanade):
-    r"""
-    Simultaneous Forward Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Simultaneous-FC'
-
-    def _set_up(self):
-        # Compute warp Jacobian
-        self._dW_dp = np.rollaxis(
-            self.transform.d_dp(self.template.indices()), -1)
-
-    def _fit(self, fitting_result, max_iters=20, project=True):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        fitting_result.weights = []
-        n_iters = 0
-
-        # Forward Additive Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute steepest descent images, VI_dW_dp
-            J_aux = self.residual.steepest_descent_images(IWxp, self._dW_dp)
-
-            # Project out appearance model from VT_dW_dp
-            self._J = self.appearance_model.project_out_vectors(J_aux.T).T
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, self.template, IWxp)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
-
-
-class SIC(AppearanceLucasKanade):
-    r"""
-    Simultaneous Inverse Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Simultaneous-IC'
-
-    def _set_up(self):
-        # Compute warp Jacobian
-        self._dW_dp = np.rollaxis(
-            self.transform.d_dp(self.template.indices()), -1)
-
-    def _fit(self, fitting_result, max_iters=20, project=True):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        fitting_result.weights = []
-        n_iters = 0
-
-        mean = self.appearance_model.mean()
-
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            if n_iters == 0:
-                # Project image onto the model bases
-                weights = self.appearance_model.project(IWxp)
-            else:
-                # Compute Gauss-Newton appearance parameters updates
-                diff = (self.template.as_vector() - mean.as_vector())
-                self.template.from_vector_inplace(IWxp.as_vector() - diff -
-                                                  np.dot(J_aux, delta_p))
-                delta_weights = self.appearance_model.project(self.template)
-                weights += delta_weights
-
-            # Reconstruct appearance
-            self.template = self.appearance_model.instance(weights)
-            fitting_result.weights.append(weights)
-
-            # Compute steepest descent images, VT_dW_dp
-            J_aux = self.residual.steepest_descent_images(self.template,
-                                                          self._dW_dp)
-
-            # Project out appearance model from VT_dW_dp
-            self._J = self.appearance_model.project_out_vectors(J_aux.T).T
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, IWxp, mean)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Request the pesudoinverse vector from the transform
-            inv_delta_p = self.transform.pseudoinverse_vector(delta_p)
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(inv_delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        return fitting_result
diff --git a/menpofit/lucaskanade/base.py b/menpofit/lucaskanade/base.py
deleted file mode 100644
index 9184805..0000000
--- a/menpofit/lucaskanade/base.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from __future__ import division
-import numpy as np
-
-from menpofit.fitter import Fitter
-from menpofit.fittingresult import ParametricFittingResult
-
-
-class LucasKanade(Fitter):
-    r"""
-    An abstract base class for implementations of Lucas-Kanade [1]_
-    type algorithms.
-
-    This is to abstract away optimisation specific functionality such as the
-    calculation of the Hessian (which could be derived using a number of
-    techniques, currently only Gauss-Newton).
-
-    Parameters
-    ----------
-    image : :map:`Image`
-        The image to perform the alignment upon.
-
-        .. note:: Only the image is expected within the base class because
-            different algorithms expect different kinds of template
-            (image/model)
-
-    residual : :map:`Residual`
-        The kind of residual to be calculated. This is used to quantify the
-        error between the input image and the reference object.
-
-    transform : :map:`Alignment`
-        The transformation type used to warp the image in to the appropriate
-        reference frame. This is used by the warping function to calculate
-        sub-pixel coordinates of the input image in the reference frame.
-
-    eps : float, optional
-        The convergence value. When calculating the level of convergence, if
-        the norm of the delta parameter updates is less than `eps`, the
-        algorithm is considered to have converged.
-
-        Default: 1**-10
-
-    Notes
-    -----
-    The type of optimisation technique chosen will determine properties such
-    as the convergence rate of the algorithm. The supported optimisation
-    techniques are detailed below:
-
-    ===== ==================== ===============================================
-    type  full name            hessian approximation
-    ===== ==================== ===============================================
-    'GN'  Gauss-Newton         :math:`\mathbf{J^T J}`
-    ===== ==================== ===============================================
-
-    Attributes
-    ----------
-    transform
-    weights
-    n_iters
-
-    References
-    ----------
-    .. [1] Lucas, Bruce D., and Takeo Kanade.
-       "An iterative image registration technique with an application to
-       stereo vision." IJCAI. Vol. 81. 1981.
-    """
-    def __init__(self, residual, transform, eps=10**-10):
-        # set basic state for all Lucas Kanade algorithms
-        self.transform = transform
-        self.residual = residual
-        self.eps = eps
-        # setup the optimisation approach
-        self._calculate_delta_p = self._gauss_newton_update
-
-    def _gauss_newton_update(self, sd_delta_p):
-        return np.linalg.solve(self._H, sd_delta_p)
-
-    def _set_up(self, **kwargs):
-        pass
-
-    def _create_fitting_result(self, image, parameters, gt_shape=None):
-        return ParametricFittingResult(image, self, parameters=[parameters],
-                                       gt_shape=gt_shape)
-
-    def fit(self, image, initial_parameters, gt_shape=None, **kwargs):
-        self.transform.from_vector_inplace(initial_parameters)
-        return Fitter.fit(self, image, initial_parameters, gt_shape=gt_shape,
-                          **kwargs)
-
-    def get_parameters(self, shape):
-        self.transform.set_target(shape)
-        return self.transform.as_vector()
diff --git a/menpofit/lucaskanade/image.py b/menpofit/lucaskanade/image.py
deleted file mode 100644
index 76d68e9..0000000
--- a/menpofit/lucaskanade/image.py
+++ /dev/null
@@ -1,184 +0,0 @@
-from scipy.linalg import norm
-import numpy as np
-
-from .base import LucasKanade
-
-
-class ImageLucasKanade(LucasKanade):
-
-    def __init__(self, template, residual, transform, eps=10 ** -6):
-        super(ImageLucasKanade, self).__init__(residual, transform, eps=eps)
-        # in image alignment, we align a template image to the target image
-        self.template = template
-        # pre-compute
-        self._set_up()
-
-
-class FA(ImageLucasKanade):
-    r"""
-    Forward Additive algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Image-FA'
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        n_iters = 0
-
-        # Forward Additive Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute the Jacobian of the warp
-            dW_dp = np.rollaxis(
-                self.transform.d_dp(self.template.indices()), -1)
-
-            # TODO: rename kwarg "forward" to "forward_additive"
-            # Compute steepest descent images, VI_dW_dp
-            self._J = self.residual.steepest_descent_images(
-                image, dW_dp, forward=(self.template, self.transform))
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, self.template, IWxp)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Update warp weights
-            parameters = self.transform.as_vector() + delta_p
-            self.transform.from_vector_inplace(parameters)
-            fitting_result.parameters.append(parameters)
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        fitting_result.fitted = True
-        return fitting_result
-
-
-class FC(ImageLucasKanade):
-    r"""
-    Forward Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Image-FC'
-
-    def _set_up(self):
-        r"""
-        The forward compositional algorithm pre-computes the Jacobian of the
-        warp. This is set as an attribute on the class.
-        """
-        # Compute the Jacobian of the warp
-        self._dW_dp = np.rollaxis(
-            self.transform.d_dp(self.template.indices()), -1)
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        n_iters = 0
-
-        # Forward Compositional Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # TODO: add "forward_compositional" kwarg with options
-            # In the forward compositional algorithm there are two different
-            # ways of computing the steepest descent images:
-            #   1. V[I(x)](W(x,p)) * dW/dx * dW/dp
-            #   2. V[I(W(x,p))] * dW/dp -> this is what is currently used
-            # Compute steepest descent images, VI_dW_dp
-            self._J = self.residual.steepest_descent_images(IWxp, self._dW_dp)
-
-            # Compute Hessian and inverse
-            self._H = self.residual.calculate_hessian(self._J)
-
-            # Compute steepest descent parameter updates
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, self.template, IWxp)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        fitting_result.fitted = True
-        return fitting_result
-
-
-class IC(ImageLucasKanade):
-    r"""
-    Inverse Compositional algorithm
-    """
-    @property
-    def algorithm(self):
-        return 'Image-IC'
-
-    def _set_up(self):
-        r"""
-        The Inverse Compositional algorithm pre-computes the Jacobian of the
-        warp, the steepest descent images and the Hessian. These are all
-        stored as attributes on the class.
-        """
-        # Compute the Jacobian of the warp
-        dW_dp = np.rollaxis(self.transform.d_dp(self.template.indices()), -1)
-
-        # Compute steepest descent images, VT_dW_dp
-        self._J = self.residual.steepest_descent_images(
-            self.template, dW_dp)
-
-        # TODO: Pre-compute the inverse
-        # Compute Hessian and inverse
-        self._H = self.residual.calculate_hessian(self._J)
-
-    def _fit(self, fitting_result, max_iters=20):
-        # Initial error > eps
-        error = self.eps + 1
-        image = fitting_result.image
-        n_iters = 0
-
-        # Baker-Matthews, Inverse Compositional Algorithm
-        while n_iters < max_iters and error > self.eps:
-            # Compute warped image with current weights
-            IWxp = image.warp_to_mask(self.template.mask, self.transform,
-                                      warp_landmarks=False)
-
-            # Compute steepest descent parameter updates.
-            sd_delta_p = self.residual.steepest_descent_update(
-                self._J, IWxp, self.template)
-
-            # Compute gradient descent parameter updates
-            delta_p = np.real(self._calculate_delta_p(sd_delta_p))
-
-            # Request the pesudoinverse vector from the transform
-            inv_delta_p = self.transform.pseudoinverse_vector(delta_p)
-
-            # Update warp weights
-            self.transform.compose_after_from_vector_inplace(inv_delta_p)
-            fitting_result.parameters.append(self.transform.as_vector())
-
-            # Test convergence
-            error = np.abs(norm(delta_p))
-            n_iters += 1
-
-        fitting_result.fitted = True
-        return fitting_result
diff --git a/menpofit/math/__init__.py b/menpofit/math/__init__.py
new file mode 100644
index 0000000..fa310c7
--- /dev/null
+++ b/menpofit/math/__init__.py
@@ -0,0 +1 @@
+from .regression import IRLRegression, IIRLRegression
diff --git a/menpofit/math/correlationfilter.py b/menpofit/math/correlationfilter.py
new file mode 100644
index 0000000..753a55e
--- /dev/null
+++ b/menpofit/math/correlationfilter.py
@@ -0,0 +1,373 @@
+import numpy as np
+from numpy.fft import fft2, ifft2, ifftshift
+from scipy.sparse import spdiags, eye as speye
+from scipy.sparse.linalg import spsolve
+from menpofit.math.fft_utils import pad, crop
+
+
+# TODO: Document me!
+def mosse(X, y, l=0.01, boundary='constant', crop_filter=True):
+    r"""
+    Minimum Output Sum of Squared Errors (MOSSE) filter.
+
+    Parameters
+    ----------
+    X : ``(n_images, n_channels, height, width)`` `ndarray`
+        Training images.
+    y : ``(1, height, width)`` `ndarray`
+        Desired response.
+    l: `float`, optional
+        Regularization parameter.
+    boundary: str {`constant`, `symmetric`}, optional
+        Determines how the image is padded.
+    crop_filter: `bool`, optional
+        If ``True``, the shape of the MOSSE filter is the same as the shape
+        of the desired response. If ``False``, the filter's shape is equal to:
+        ``X[0].shape + y.shape - 1``
+
+    Returns
+    -------
+    mosse: ``(1, height, width)`` `ndarray`
+        Minimum Output Sum od Squared Errors (MOSSE) filter associated to
+        the training images.
+
+    References
+    ----------
+    .. [1] David S. Bolme, J. Ross Beveridge,  Bruce A. Draper and Yui Man Lui.
+    "Visual Object Tracking using Adaptive Correlation Filters". CVPR, 2010.
+    """
+    # number of images, number of channels, height and width
+    n, k, hx, wx = X.shape
+
+    # height and width of desired responses
+    _, hy, wy = y.shape
+    y_shape = (hy, wy)
+
+    # extended shape
+    ext_h = hx + hy - 1
+    ext_w = wx + wy - 1
+    ext_shape = (ext_h, ext_w)
+
+    # extend desired response
+    ext_y = pad(y, ext_shape)
+    # fft of extended desired response
+    fft_ext_y = fft2(ext_y)
+
+    # auto and cross spectral energy matrices
+    sXX = 0
+    sXY = 0
+    # for each training image and desired response
+    for x in X:
+        # extend image
+        ext_x = pad(x, ext_shape, boundary=boundary)
+        # fft of extended image
+        fft_ext_x = fft2(ext_x)
+
+        # update auto and cross spectral energy matrices
+        sXX += fft_ext_x.conj() * fft_ext_x
+        sXY += fft_ext_x.conj() * fft_ext_y
+
+    # compute desired correlation filter
+    fft_ext_f = sXY / (sXX + l)
+    # reshape extended filter to extended image shape
+    fft_ext_f = fft_ext_f.reshape((k, ext_h, ext_w))
+
+    # compute extended filter inverse fft
+    f = np.real(ifftshift(ifft2(fft_ext_f), axes=(-2, -1)))
+
+    if crop_filter:
+        # crop extended filter to match desired response shape
+        f = crop(f, y_shape)
+
+    return f, sXY, sXX
+
+
+def imosse(A, B, n_ab, X, y, l=0.01, boundary='constant',
+           crop_filter=True, f=1.0):
+    r"""
+    Incremental Minimum Output Sum od Squared Errors (iMOSSE) filter
+
+    Parameters
+    ----------
+    A :
+    B :
+    n_ab : `int`
+        Total number of samples used to produce A and B.
+    X : ``(n_images, n_channels, height, width)`` `ndarray`
+        Training images.
+    y : ``(1, height, width)`` `ndarray`
+        Desired response.
+    l : `float`, optional
+        Regularization parameter.
+    boundary : str {`constant`, `symmetric`}, optional
+        Determines how the image is padded.
+    crop_filter : `bool`, optional
+    f : ``[0, 1]`` `float`, optional
+        Forgetting factor that weights the relative contribution of new
+        samples vs old samples. If 1.0, all samples are weighted equally.
+        If <1.0, more emphasis is put on the new samples.
+
+    Returns
+    -------
+    mccf : ``(1, height, width)`` `ndarray`
+        Multi-Channel Correlation Filter (MCCF) filter associated to the
+        training images.
+    sXY :
+    sXX :
+
+    References
+    ----------
+    .. [1] David S. Bolme, J. Ross Beveridge,  Bruce A. Draper and Yui Man Lui.
+    "Visual Object Tracking using Adaptive Correlation Filters". CVPR, 2010.
+    """
+    # number of images; number of channels, height and width
+    n_x, k, hz, wz = X.shape
+
+    # height and width of desired responses
+    _, hy, wy = y.shape
+    y_shape = (hy, wy)
+
+    # multiply the number of samples used to produce the auto and cross
+    # spectral energy matrices A and B by forgetting factor
+    n_ab *= f
+    # total number of samples
+    n = n_ab + n_x
+    # compute weighting factors
+    nu_ab = n_ab / n
+    nu_x = n_x / n
+
+    # extended shape
+    ext_h = hz + hy - 1
+    ext_w = wz + wy - 1
+    ext_shape = (ext_h, ext_w)
+
+    # extend desired response
+    ext_y = pad(y, ext_shape)
+    # fft of extended desired response
+    fft_ext_y = fft2(ext_y)
+
+    # extend images
+    ext_X = pad(X, ext_shape, boundary=boundary)
+
+    # auto and cross spectral energy matrices
+    sXX = 0
+    sXY = 0
+    # for each training image and desired response
+    for ext_x in ext_X:
+        # fft of extended image
+        fft_ext_x = fft2(ext_x)
+
+        # update auto and cross spectral energy matrices
+        sXX += fft_ext_x.conj() * fft_ext_x
+        sXY += fft_ext_x.conj() * fft_ext_y
+
+    # combine old and new auto and cross spectral energy matrices
+    sXY = nu_ab * A + nu_x * sXY
+    sXX = nu_ab * B + nu_x * sXX
+    # compute desired correlation filter
+    fft_ext_f = sXY / (sXX + l)
+    # reshape extended filter to extended image shape
+    fft_ext_f = fft_ext_f.reshape((k, ext_h, ext_w))
+
+    # compute filter inverse fft
+    f = np.real(ifftshift(ifft2(fft_ext_f), axes=(-2, -1)))
+
+    if crop_filter:
+        # crop extended filter to match desired response shape
+        f = crop(f, y_shape)
+
+    return f, sXY, sXX
+
+
+# TODO: Document me!
+def mccf(X, y, l=0.01, boundary='constant', crop_filter=True):
+    r"""
+    Multi-Channel Correlation Filter (MCCF).
+
+    Parameters
+    ----------
+    X : ``(n_images, n_channels, height, width)`` `ndarray`
+        Training images.
+    y : ``(1, height, width)`` `ndarray`
+        Desired response.
+    l : `float`, optional
+        Regularization parameter.
+    boundary : str {`constant`, `symmetric`}, optional
+        Determines how the image is padded.
+    crop_filter : `bool`, optional
+
+    Returns
+    -------
+    mccf: ``(1, height, width)`` `ndarray`
+        Multi-Channel Correlation Filter (MCCF) filter associated to the
+        training images.
+    sXY :
+    sXX :
+
+    References
+    ----------
+    .. [1] Hamed Kiani Galoogahi, Terence Sim,  Simon Lucey. "Multi-Channel
+    Correlation Filters". ICCV, 2013.
+    """
+    # number of images; number of channels, height and width
+    n, k, hx, wx = X.shape
+
+    # height and width of desired responses
+    _, hy, wy = y.shape
+    y_shape = (hy, wy)
+
+    # extended shape
+    ext_h = hx + hy - 1
+    ext_w = wx + wy - 1
+    ext_shape = (ext_h, ext_w)
+    # extended dimensionality
+    ext_d = ext_h * ext_w
+
+    # extend desired response
+    ext_y = pad(y, ext_shape)
+    # fft of extended desired response
+    fft_ext_y = fft2(ext_y)
+
+    # extend images
+    ext_X = pad(X, ext_shape, boundary=boundary)
+
+    # auto and cross spectral energy matrices
+    sXX = 0
+    sXY = 0
+    # for each training image and desired response
+    for ext_x in ext_X:
+        # fft of extended image
+        fft_ext_x = fft2(ext_x)
+
+        # store extended image fft as sparse diagonal matrix
+        diag_fft_x = spdiags(fft_ext_x.reshape((k, -1)),
+                             -np.arange(0, k) * ext_d, ext_d * k, ext_d).T
+        # vectorize extended desired response fft
+        diag_fft_y = fft_ext_y.ravel()
+
+        # update auto and cross spectral energy matrices
+        sXX += diag_fft_x.conj().T.dot(diag_fft_x)
+        sXY += diag_fft_x.conj().T.dot(diag_fft_y)
+
+    # solve ext_d independent k x k linear systems (with regularization)
+    # to obtain desired extended multi-channel correlation filter
+    fft_ext_f = spsolve(sXX + l * speye(sXX.shape[-1]), sXY)
+    # reshape extended filter to extended image shape
+    fft_ext_f = fft_ext_f.reshape((k, ext_h, ext_w))
+
+    # compute filter inverse fft
+    f = np.real(ifftshift(ifft2(fft_ext_f), axes=(-2, -1)))
+
+    if crop_filter:
+        # crop extended filter to match desired response shape
+        f = crop(f, y_shape)
+
+    return f, sXY, sXX
+
+
+# TODO: Document me!
+def imccf(A, B, n_ab, X, y, l=0.01, boundary='constant', crop_filter=True,
+          f=1.0):
+    r"""
+    Incremental Multi-Channel Correlation Filter (MCCF)
+
+    Parameters
+    ----------
+    A :
+    B :
+    n_ab : `int`
+        Total number of samples used to produce A and B.
+    X : ``(n_images, n_channels, height, width)`` `ndarray`
+        Training images.
+    y : ``(1, height, width)`` `ndarray`
+        Desired response.
+    l : `float`, optional
+        Regularization parameter.
+    boundary : str {`constant`, `symmetric`}, optional
+        Determines how the image is padded.
+    crop_filter : `bool`, optional
+    f : ``[0, 1]`` `float`, optional
+        Forgetting factor that weights the relative contribution of new
+        samples vs old samples. If 1.0, all samples are weighted equally.
+        If <1.0, more emphasis is put on the new samples.
+
+    Returns
+    -------
+    mccf : ``(1, height, width)`` `ndarray`
+        Multi-Channel Correlation Filter (MCCF) filter associated to the
+        training images.
+    sXY :
+    sXX :
+
+    References
+    ----------
+    .. [1] David S. Bolme, J. Ross Beveridge,  Bruce A. Draper and Yui Man Lui.
+    "Visual Object Tracking using Adaptive Correlation Filters". CVPR, 2010.
+    .. [2] Hamed Kiani Galoogahi, Terence Sim,  Simon Lucey. "Multi-Channel
+    Correlation Filters". ICCV, 2013.
+    """
+    # number of images; number of channels, height and width
+    n_x, k, hz, wz = X.shape
+
+    # height and width of desired responses
+    _, hy, wy = y.shape
+    y_shape = (hy, wy)
+
+    # multiply the number of samples used to produce the auto and cross
+    # spectral energy matrices A and B by forgetting factor
+    n_ab *= f
+    # total number of samples
+    n = n_ab + n_x
+    # compute weighting factors
+    nu_ab = n_ab / n
+    nu_x = n_x / n
+
+    # extended shape
+    ext_h = hz + hy - 1
+    ext_w = wz + wy - 1
+    ext_shape = (ext_h, ext_w)
+    # extended dimensionality
+    ext_d = ext_h * ext_w
+
+    # extend desired response
+    ext_y = pad(y, ext_shape)
+    # fft of extended desired response
+    fft_ext_y = fft2(ext_y)
+
+    # extend images
+    ext_X = pad(X, ext_shape, boundary=boundary)
+
+    # auto and cross spectral energy matrices
+    sXX = 0
+    sXY = 0
+    # for each training image and desired response
+    for ext_x in ext_X:
+        # fft of extended image
+        fft_ext_x = fft2(ext_x)
+
+        # store extended image fft as sparse diagonal matrix
+        diag_fft_x = spdiags(fft_ext_x.reshape((k, -1)),
+                             -np.arange(0, k) * ext_d, ext_d * k, ext_d).T
+        # vectorize extended desired response fft
+        diag_fft_y = fft_ext_y.ravel()
+
+        # update auto and cross spectral energy matrices
+        sXX += diag_fft_x.conj().T.dot(diag_fft_x)
+        sXY += diag_fft_x.conj().T.dot(diag_fft_y)
+
+    # combine old and new auto and cross spectral energy matrices
+    sXY = nu_ab * A + nu_x * sXY
+    sXX = nu_ab * B + nu_x * sXX
+    # solve ext_d independent k x k linear systems (with regularization)
+    # to obtain desired extended multi-channel correlation filter
+    fft_ext_f = spsolve(sXX + l * speye(sXX.shape[-1]), sXY)
+    # reshape extended filter to extended image shape
+    fft_ext_f = fft_ext_f.reshape((k, ext_h, ext_w))
+
+    # compute filter inverse fft
+    f = np.real(ifftshift(ifft2(fft_ext_f), axes=(-2, -1)))
+    if crop_filter:
+        # crop extended filter to match desired response shape
+        f = crop(f, y_shape)
+
+    return f, sXY, sXX
diff --git a/menpofit/math/fft_utils.py b/menpofit/math/fft_utils.py
new file mode 100644
index 0000000..a3e9a85
--- /dev/null
+++ b/menpofit/math/fft_utils.py
@@ -0,0 +1,244 @@
+from __future__ import division
+import warnings
+import numpy as np
+from functools import wraps
+from menpo.feature.base import rebuild_feature_image
+try:
+    # try importing pyfftw
+    from pyfftw.interfaces.numpy_fft import fft2, ifft2, fftshift, ifftshift
+
+    try:
+        # try calling fft2 on a 4-dimensional array (this is known to have
+        # problem in some linux distributions)
+        fft2(np.zeros((1, 1, 1, 1)))
+    except RuntimeError:
+        warnings.warn("pyfftw is known to be buggy on your system, numpy.fft "
+                      "will be used instead. Consequently, all algorithms "
+                      "using ffts will be running at a slower speed.",
+                      RuntimeWarning)
+        from numpy.fft import fft2, ifft2, fftshift, ifftshift
+except ImportError:
+    warnings.warn("pyfftw is not installed on your system, numpy.fft will be "
+                  "used instead. Consequently, all algorithms using ffts "
+                  "will be running at a slower speed. Consider installing "
+                  "pyfftw (pip install pyfftw) to speed up your ffts.",
+                  ImportWarning)
+    from numpy.fft import fft2, ifft2, fftshift, ifftshift
+
+
+# TODO: Document me!
+def pad(pixels, ext_shape, boundary='constant'):
+    r"""
+    """
+    h, w = pixels.shape[-2:]
+
+    h_margin = (ext_shape[0] - h) // 2
+    w_margin = (ext_shape[1] - w) // 2
+
+    h_margin2 = h_margin
+    if h + 2 * h_margin < ext_shape[0]:
+        h_margin += 1
+
+    w_margin2 = w_margin
+    if w + 2 * w_margin < ext_shape[1]:
+        w_margin += 1
+
+    pad_width = []
+    for _ in pixels.shape[:-2]:
+        pad_width.append((0, 0))
+    pad_width += [(h_margin, h_margin2), (w_margin, w_margin2)]
+    pad_width = tuple(pad_width)
+
+    return np.lib.pad(pixels, pad_width, mode=boundary)
+
+
+# TODO: Document me!
+def crop(pixels, shape):
+    r"""
+    """
+    h, w = pixels.shape[-2:]
+
+    h_margin = (h - shape[0]) // 2
+    w_margin = (w - shape[1]) // 2
+
+    h_corrector = 1 if np.remainder(h - shape[0], 2) != 0 else 0
+    w_corrector = 1 if np.remainder(w - shape[1], 2) != 0 else 0
+
+    return pixels[...,
+                  h_margin + h_corrector:-h_margin,
+                  w_margin + w_corrector:-w_margin]
+
+
+# TODO: Document me!
+def ndconvolution(wrapped):
+    r"""
+    """
+    @wraps(wrapped)
+    def wrapper(image, filter, *args, **kwargs):
+        if not isinstance(image, np.ndarray) and not isinstance(filter, np.ndarray):
+            # Both image and filter are menpo images
+            feature = wrapped(image.pixels, filter.pixels, *args, **kwargs)
+            return rebuild_feature_image(image, feature)
+        elif not isinstance(image, np.ndarray):
+            # Image is menpo image
+            feature = wrapped(image.pixels, filter, *args, **kwargs)
+            return rebuild_feature_image(image, feature)
+        elif not isinstance(filter, np.ndarray):
+            # filter is menpo image
+            return wrapped(image, filter, *args, **kwargs)
+        else:
+            return wrapped(image, filter, *args, **kwargs)
+    return wrapper
+
+
+# TODO: Document me!
+@ndconvolution
+def fft_convolve2d(x, f, mode='same', boundary='constant', fft_filter=False):
+    r"""
+    Performs fast 2d convolution in the frequency domain convolving each image
+    channel with its corresponding filter channel.
+
+    Parameters
+    ----------
+    x : ``(channels, height, width)`` `ndarray`
+        Image.
+    f : ``(channels, height, width)`` `ndarray`
+        Filter.
+    mode : str {`full`, `same`, `valid`}, optional
+        Determines the shape of the resulting convolution.
+    boundary: str {`constant`, `symmetric`}, optional
+        Determines how the image is padded.
+    fft_filter: `bool`, optional
+        If `True`, the filter is assumed to be defined on the frequency
+        domain. If `False` the filter is assumed to be defined on the
+        spatial domain.
+
+    Returns
+    -------
+    c: ``(channels, height, width)`` `ndarray`
+        Result of convolving each image channel with its corresponding
+        filter channel.
+    """
+    if fft_filter:
+        # extended shape is filter shape
+        ext_shape = np.asarray(f.shape[-2:])
+
+        # extend image and filter
+        ext_x = pad(x, ext_shape, boundary=boundary)
+
+        # compute ffts of extended image
+        fft_ext_x = fft2(ext_x)
+        fft_ext_f = f
+    else:
+        # extended shape
+        x_shape = np.asarray(x.shape[-2:])
+        f_shape = np.asarray(f.shape[-2:])
+        f_half_shape = (f_shape / 2).astype(int)
+        ext_shape = x_shape + f_half_shape - 1
+
+        # extend image and filter
+        ext_x = pad(x, ext_shape, boundary=boundary)
+        ext_f = pad(f, ext_shape)
+
+        # compute ffts of extended image and extended filter
+        fft_ext_x = fft2(ext_x)
+        fft_ext_f = fft2(ext_f)
+
+    # compute extended convolution in Fourier domain
+    fft_ext_c = fft_ext_f * fft_ext_x
+
+    # compute ifft of extended convolution
+    ext_c = np.real(ifftshift(ifft2(fft_ext_c), axes=(-2, -1)))
+
+    if mode is 'full':
+        return ext_c
+    elif mode is 'same':
+        return crop(ext_c, x_shape)
+    elif mode is 'valid':
+        return crop(ext_c, x_shape - f_half_shape + 1)
+    else:
+        raise ValueError(
+            "mode={}, is not supported. The only supported "
+            "modes are: 'full', 'same' and 'valid'.".format(mode))
+
+
+# TODO: Document me!
+@ndconvolution
+def fft_convolve2d_sum(x, f, mode='same', boundary='constant',
+                       fft_filter=False, axis=0, keepdims=True):
+    r"""
+    Performs fast 2d convolution in the frequency domain convolving each image
+    channel with its corresponding filter channel and summing across the
+    channel axis.
+
+    Parameters
+    ----------
+    x : ``(channels, height, width)`` `ndarray`
+        Image.
+    f : ``(channels, height, width)`` `ndarray`
+        Filter.
+    mode : str {`full`, `same`, `valid`}, optional
+        Determines the shape of the resulting convolution.
+    boundary: str {`constant`, `symmetric`}, optional
+        Determines how the image is padded.
+    fft_filter: `bool`, optional
+        If `True`, the filter is assumed to be defined on the frequency
+        domain. If `False` the filter is assumed to be defined on the
+        spatial domain.
+    axis : `int`, optional
+        The axis across to which the summation is performed.
+    keepdims: `boolean`, optional
+        If `True` the number of dimensions of the result is the same as the
+        number of dimensions of the filter. If `False` the channel dimension
+        is lost in the result.
+    Returns
+    -------
+    c: ``(1, height, width)`` `ndarray`
+        Result of convolving each image channel with its corresponding
+        filter channel and summing across the channel axis.
+    """
+    if fft_filter:
+        fft_ext_f = f
+
+        # extended shape is fft_ext_filter shape
+        x_shape = np.asarray(x.shape[-2:])
+        f_shape = ((np.asarray(fft_ext_f.shape[-2:]) + 1) / 1.5).astype(int)
+        f_half_shape = (f_shape / 2).astype(int)
+        ext_shape = np.asarray(f.shape[-2:])
+
+        # extend image and filter
+        ext_x = pad(x, ext_shape, boundary=boundary)
+
+        # compute ffts of extended image
+        fft_ext_x = fft2(ext_x)
+    else:
+        # extended shape
+        x_shape = np.asarray(x.shape[-2:])
+        f_shape = np.asarray(f.shape[-2:])
+        f_half_shape = (f_shape / 2).astype(int)
+        ext_shape = x_shape + f_half_shape - 1
+
+        # extend image and filter
+        ext_x = pad(x, ext_shape, boundary=boundary)
+        ext_f = pad(f, ext_shape)
+
+        # compute ffts of extended image and extended filter
+        fft_ext_x = fft2(ext_x)
+        fft_ext_f = fft2(ext_f)
+
+    # compute extended convolution in Fourier domain
+    fft_ext_c = np.sum(fft_ext_f * fft_ext_x, axis=axis, keepdims=keepdims)
+
+    # compute ifft of extended convolution
+    ext_c = np.real(ifftshift(ifft2(fft_ext_c), axes=(-2, -1)))
+
+    if mode is 'full':
+        return ext_c
+    elif mode is 'same':
+        return crop(ext_c, x_shape)
+    elif mode is 'valid':
+        return crop(ext_c, x_shape - f_half_shape + 1)
+    else:
+        raise ValueError(
+            "mode={}, is not supported. The only supported "
+            "modes are: 'full', 'same' and 'valid'.".format(mode))
diff --git a/menpofit/math/regression.py b/menpofit/math/regression.py
new file mode 100644
index 0000000..0ec4ac6
--- /dev/null
+++ b/menpofit/math/regression.py
@@ -0,0 +1,81 @@
+import numpy as np
+
+
+# TODO: document me!
+class IRLRegression(object):
+    r"""
+    Incremental Regularized Linear Regression
+    """
+    def __init__(self, alpha=0, bias=True):
+        self.alpha = alpha
+        self.bias = bias
+        self.V = None
+        self.W = None
+
+    def train(self, X, Y):
+        if self.bias:
+            # add bias
+            X = np.hstack((X, np.ones((X.shape[0], 1))))
+
+        # regularized linear regression
+        XX = X.T.dot(X)
+        np.fill_diagonal(XX, self.alpha + np.diag(XX))
+        self.V = np.linalg.inv(XX)
+        self.W = self.V.dot(X.T.dot(Y))
+
+    def increment(self, X, Y):
+        if self.bias:
+            # add bias
+            X = np.hstack((X, np.ones((X.shape[0], 1))))
+
+        # incremental regularized linear regression
+        U = X.dot(self.V).dot(X.T)
+        np.fill_diagonal(U, 1 + np.diag(U))
+        U = np.linalg.inv(U)
+        Q = self.V.dot(X.T).dot(U).dot(X)
+        self.V = self.V - Q.dot(self.V)
+        self.W = self.W - Q.dot(self.W) + self.V.dot(X.T.dot(Y))
+
+    def predict(self, x):
+        if self.bias:
+            if len(x.shape) == 1:
+                x = np.hstack((x, np.ones(1)))
+            else:
+                x = np.hstack((x, np.ones((x.shape[0], 1))))
+        return np.dot(x, self.W)
+
+
+# TODO: document me!
+class IIRLRegression(IRLRegression):
+    r"""
+    Indirect Incremental Regularized Linear Regression
+    """
+    def __init__(self, alpha=0, bias=False, alpha2=0):
+        # TODO: Can we model the bias? May need to slice off of prediction?
+        super(IIRLRegression, self).__init__(alpha=alpha, bias=False)
+        self.alpha2 = alpha2
+
+    def train(self, X, Y):
+        # regularized linear regression exchanging the roles of X and Y
+        super(IIRLRegression, self).train(Y, X)
+        J = self.W
+        # solve the original problem by computing the pseudo-inverse of the
+        # previous solution
+        # Note that everything is transposed from the above exchanging of roles
+        H = J.dot(J.T)
+        np.fill_diagonal(H, self.alpha2 + np.diag(H))
+        self.W = np.linalg.solve(H, J)
+
+    def increment(self, X, Y):
+        # incremental least squares exchanging the roles of X and Y
+        super(IIRLRegression, self).increment(Y, X)
+        J = self.W
+        # solve the original problem by computing the pseudo-inverse of the
+        # previous solution
+        # Note that everything is transposed from the above exchanging of roles
+        H = J.dot(J.T)
+        np.fill_diagonal(H, self.alpha2 + np.diag(H))
+        self.W = np.linalg.solve(H, J)
+
+    def predict(self, x):
+        return self.W.dot(x.T).T
diff --git a/menpofit/modelinstance.py b/menpofit/modelinstance.py
index cf6924a..e18c540 100644
--- a/menpofit/modelinstance.py
+++ b/menpofit/modelinstance.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 from menpo.base import Targetable, Vectorizable
 from menpo.model import MeanInstanceLinearModel
 from menpofit.differentiable import DP
@@ -185,7 +184,7 @@ def d_dp(self, points):
         return d_dp.swapaxes(0, 1)
 
 
-# TODO: document me
+# TODO: document me!
 class GlobalPDM(PDM):
     r"""
     """
@@ -247,7 +246,6 @@ def _weights_for_target(self, target):
             Weights of the statistical model that generate the closest
             PointCloud to the requested target
         """
-
         self._update_global_transform(target)
         projected_target = self.global_transform.pseudoinverse().apply(target)
         # now we have the target in model space, project it to recover the
@@ -319,11 +317,11 @@ def _global_transform_d_dp(self, points):
         return self.global_transform.d_dp(points)
 
 
-# TODO: document me
+# TODO: document me!
 class OrthoPDM(GlobalPDM):
     r"""
     """
-    def __init__(self, model, global_transform_cls):
+    def __init__(self, model):
         # 1. Construct similarity model from the mean of the model
         self.similarity_model = similarity_2d_instance_model(model.mean())
         # 2. Orthonormalize model and similarity model
@@ -331,7 +329,9 @@ def __init__(self, model, global_transform_cls):
         model_cpy.orthonormalize_against_inplace(self.similarity_model)
         self.similarity_weights = self.similarity_model.project(
             model_cpy.mean())
-        super(OrthoPDM, self).__init__(model_cpy, global_transform_cls)
+        from menpofit.transform import DifferentiableAlignmentSimilarity
+        super(OrthoPDM, self).__init__(model_cpy,
+                                       DifferentiableAlignmentSimilarity)
 
     @property
     def global_parameters(self):
@@ -354,3 +354,4 @@ def _update_global_weights(self, global_weights):
     def _global_transform_d_dp(self, points):
         return self.similarity_model.components.reshape(
             self.n_global_parameters, -1, self.n_dims).swapaxes(0, 1)
+
diff --git a/menpofit/regression/__init__.py b/menpofit/regression/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/menpofit/regression/base.py b/menpofit/regression/base.py
deleted file mode 100644
index edd2dcf..0000000
--- a/menpofit/regression/base.py
+++ /dev/null
@@ -1,295 +0,0 @@
-import abc
-
-from menpofit.fitter import Fitter
-from menpofit.fittingresult import (NonParametricFittingResult,
-                                    SemiParametricFittingResult,
-                                    ParametricFittingResult)
-
-
-class Regressor(Fitter):
-    r"""
-    An abstract base class for fitting Regressors.
-
-    Parameters
-    ----------
-    regressor : callable
-        The regressor to be used from
-        `menpo.fit.regression.regressioncallables`.
-    features : function
-        The feature function used to regress.
-    """
-    def __init__(self, regressor, features):
-        self.regressor = regressor
-        self.features = features
-
-    def _set_up(self):
-        r"""
-        Abstract method that sets up the fitter object.
-        """
-        pass
-
-    def _fit(self, fitting_result, max_iters=1):
-        r"""
-        Abstract method to fit an image.
-
-        Parameters
-        ----------
-        fitting_result : `menpo.fit.fittingresult`
-            The fitting result object.
-        max_iters : int
-            The maximum number of iterations.
-        """
-        image = fitting_result.image
-        initial_shape = fitting_result.initial_shape
-        n_iters = 0
-
-        while n_iters < max_iters:
-            features = self.features(image, initial_shape)
-            delta_p = self.regressor(features)
-
-            fitted_shape, parameters = self.update(delta_p, initial_shape)
-            fitting_result.parameters.append(parameters)
-            n_iters += 1
-
-        return fitting_result
-
-    @abc.abstractmethod
-    def update(self, delta_p, initial_shape):
-        r"""
-        Abstract method to update the parameters.
-        """
-        pass
-
-
-class NonParametricRegressor(Regressor):
-    r"""
-    Fitter of Non-Parametric Regressor.
-
-    Parameters
-    ----------
-    regressor : callable
-        The regressor to be used from
-        `menpo.fit.regression.regressioncallables`.
-    features : function
-        The feature function used to regress.
-    """
-    def __init__(self, regressor, features):
-        super(NonParametricRegressor, self).__init__(
-            regressor, features)
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns the regression type.
-        """
-        return "Non-Parametric"
-
-    def _create_fitting_result(self, image, shapes, gt_shape=None):
-        r"""
-        Creates the fitting result object.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image..
-        shape : :map:`PointCloud`
-            The current shape.
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-        """
-        return NonParametricFittingResult(image, self, parameters=[shapes],
-                                          gt_shape=gt_shape)
-
-    def update(self, delta_shape, initial_shape):
-        r"""
-        Updates the shape.
-
-        Parameters
-        ----------
-        delta_shape : :map:`PointCloud`
-            The shape increment.
-        initial_shape : :map:`PointCloud`
-            The current shape.
-        """
-        fitted_shape = initial_shape.from_vector(
-            initial_shape.as_vector() + delta_shape)
-        return fitted_shape, fitted_shape
-
-    def get_parameters(self, shape):
-        r"""
-        Method that makes sure that the parameter passed to the fit method is
-        the shape.
-
-        Parameters
-        ----------
-        shape: :map:`PointCloud`
-            The current shape.
-        """
-        return shape
-
-
-class SemiParametricRegressor(Regressor):
-    r"""
-    Fitter of Semi-Parametric Regressor.
-
-    Parameters
-    ----------
-    regressor : callable
-        The regressor to be used from
-        `menpo.fit.regression.regressioncallables`.
-    features : function
-        The feature function used to regress.
-    """
-    def __init__(self, regressor, features, transform, update='composition'):
-        super(SemiParametricRegressor, self).__init__(
-            regressor, features)
-        self.transform = transform
-        self._update = self._select_update(update)
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns the regression type.
-        """
-        return "Semi-Parametric"
-
-    def _create_fitting_result(self, image, parameters, gt_shape=None):
-        r"""
-        Creates the fitting result object.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image..
-        shape : :map:`PointCloud`
-            The current shape.
-        gt_shape : :map:`PointCloud`, optional
-            The ground truth shape.
-        """
-        self.transform.from_vector_inplace(parameters)
-        return SemiParametricFittingResult(
-            image, self, parameters=[self.transform.as_vector()],
-            gt_shape=gt_shape)
-
-    def fit(self, image, initial_parameters, gt_shape=None, **kwargs):
-        self.transform.from_vector_inplace(initial_parameters)
-        return Fitter.fit(self, image, initial_parameters, gt_shape=gt_shape,
-                          **kwargs)
-
-    def _select_update(self, update):
-        r"""
-        Select the way to update the parameters.
-
-        Parameters
-        ----------
-        update : {'compositional', 'additive'}
-            The update method.
-
-        Returns
-        -------
-        update : `function`
-            The correct function to apply the update chosen.
-        """
-        if update == 'additive':
-            return self._additive
-        elif update == 'compositional':
-            return self._compositional
-        else:
-            raise ValueError('Unknown update string selected. Valid'
-                             'options are: additive, compositional')
-
-    def _additive(self, delta_p):
-        r"""
-        Updates the parameters in the additive way.
-
-        Parameters
-        ----------
-        delta_p : `ndarray`
-            The parameters increment
-        """
-        parameters = self.transform.as_vector() + delta_p
-        self.transform.from_vector_inplace(parameters)
-
-    def _compositional(self, delta_p):
-        r"""
-        Updates the parameters in the compositional way.
-
-        Parameters
-        ----------
-        delta_p : `ndarray`
-            The parameters increment
-        """
-        self.transform.compose_after_from_vector_inplace(delta_p)
-
-    def update(self, delta_p, initial_shape):
-        r"""
-        Updates the parameters of the shape model.
-
-        Parameters
-        ----------
-        delta_p : `ndarray`
-            The parameters increment.
-
-        initial_shape : :map:`PointCloud`
-            The current shape.
-        """
-        self._update(delta_p)
-        return self.transform.target, self.transform.as_vector()
-
-    def get_parameters(self, shape):
-        r"""
-        Method that makes sure that the parameter passed to the fit method is
-        the model parameters.
-
-        Parameters
-        ----------
-        shape : :map:`PointCloud`
-            The current shape.
-        """
-        self.transform.set_target(shape)
-        return self.transform.as_vector()
-
-
-class ParametricRegressor(SemiParametricRegressor):
-    r"""
-    Fitter of Parametric Regressor.
-
-    Parameters
-    ----------
-    regressor : callable
-        The regressor to be used from
-        `menpo.fit.regression.regressioncallables`.
-    features : function
-        The feature function used to regress.
-    """
-    def __init__(self, regressor, features, appearance_model, transform,
-                 update='composition'):
-        super(ParametricRegressor, self).__init__(
-            regressor, features, transform, update=update)
-        self.appearance_model = appearance_model
-        self.template = appearance_model.mean()
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns the regression type.
-        """
-        return "Parametric"
-
-    def _create_fitting_result(self, image, parameters, gt_shape=None):
-        r"""
-        Creates the fitting result object.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image..
-        shape : :map:`PointCloud`
-            The current shape.
-        gt_shape : :map:`PointCloud`, optional
-            The ground truth shape.
-        """
-        self.transform.from_vector_inplace(parameters)
-        return ParametricFittingResult(
-            image, self, parameters=[self.transform.as_vector()],
-            gt_shape=gt_shape)
diff --git a/menpofit/regression/parametricfeatures.py b/menpofit/regression/parametricfeatures.py
deleted file mode 100644
index eedbe89..0000000
--- a/menpofit/regression/parametricfeatures.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import numpy as np
-
-
-def extract_parametric_features(appearance_model, warped_image,
-                                rergession_features):
-    r"""
-    Extracts a particular parametric feature given an appearance model and
-    a warped image.
-
-    Parameters
-    ----------
-    appearance_model : :map:`PCAModel`
-        The appearance model based on which the parametric features will be
-        computed.
-    warped_image : :map:`MaskedImage`
-        The warped image.
-    rergession_features : callable
-        Defines the function from which the parametric features will be
-        extracted.
-
-        Non-default regression feature options and new experimental features
-        can be used by defining a callable. In this case, the callable must
-        define a constructor that receives as an input an appearance model and
-        a warped masked image and on calling returns a particular parametric
-        feature representation.
-
-    Returns
-    -------
-    features : `ndarray`
-        The resulting parametric features.
-    """
-    if rergession_features is None:
-        features = weights(appearance_model, warped_image)
-    elif hasattr(rergession_features, '__call__'):
-        features = rergession_features(appearance_model, warped_image)
-    else:
-        raise ValueError("regression_features can only be: (1) None "
-                         "or (2) a callable defining a non-standard "
-                         "feature computation (see `menpo.fit.regression."
-                         "parametricfeatures`")
-    return features
-
-
-def weights(appearance_model, warped_image):
-    r"""
-    Returns the resulting weights after projecting the warped image to the
-    appearance PCA model.
-
-    Parameters
-    ----------
-    appearance_model : :map:`PCAModel`
-        The appearance model based on which the parametric features will be
-        computed.
-    warped_image : :map:`MaskedImage`
-        The warped image.
-    """
-    return appearance_model.project(warped_image)
-
-
-def whitened_weights(appearance_model, warped_image):
-    r"""
-    Returns the sheared weights after projecting the warped image to the
-    appearance PCA model.
-
-    Parameters
-    ----------
-    appearance_model : :map:`PCAModel`
-        The appearance model based on which the parametric features will be
-        computed.
-    warped_image : :map:`MaskedImage`
-        The warped image.
-    """
-    return appearance_model.project_whitened(warped_image)
-
-
-def appearance(appearance_model, warped_image):
-    r"""
-    Projects the warped image onto the appearance model and rebuilds from the
-    weights found.
-
-    Parameters
-    ----------
-    appearance_model : :map:`PCAModel`
-        The appearance model based on which the parametric features will be
-        computed.
-    warped_image : :map:`MaskedImage`
-        The warped image.
-    """
-    return appearance_model.reconstruct(warped_image).as_vector()
-
-
-def difference(appearance_model, warped_image):
-    r"""
-    Returns the difference between the warped image and the image constructed
-    by projecting the warped image onto the appearance model and rebuilding it
-    from the weights found.
-
-    Parameters
-    ----------
-    appearance_model : :map:`PCAModel`
-        The appearance model based on which the parametric features will be
-        computed.
-    warped_image : :map:`MaskedImage`
-        The warped image.
-    """
-    return (warped_image.as_vector() -
-            appearance(appearance_model, warped_image))
-
-
-def project_out(appearance_model, warped_image):
-    r"""
-    Returns a version of the whitened warped image where all the basis of the
-    model have been projected out and which has been scaled by the inverse of
-    the appearance model's noise_variance.
-
-    Parameters
-    ----------
-    appearance_model: :class:`menpo.model.pca`
-        The appearance model based on which the parametric features will be
-        computed.
-    warped_image: :class:`menpo.image.masked`
-        The warped image.
-    """
-    diff = warped_image.as_vector() - appearance_model.mean().as_vector()
-    return appearance_model.distance_to_subspace_vector(diff).ravel()
-
-
-class nonparametric_regression_features(object):
-
-    def __init__(self, patch_shape, feature_patch_length, regression_features):
-        self.patch_shape = patch_shape
-        self.feature_patch_length = feature_patch_length
-        self.regression_features = regression_features
-
-    def __call__(self, image, shape):
-        # extract patches
-        patches = image.extract_patches(shape, patch_size=self.patch_shape)
-
-        features = np.zeros((shape.n_points, self.feature_patch_length))
-        for j, patch in enumerate(patches):
-            # compute features
-            features[j, ...] = self.regression_features(patch).as_vector()
-
-        return np.hstack((features.ravel(), 1))
-
-
-class parametric_regression_features(object):
-
-    def __init__(self, transform, template, appearance_model,
-                 regression_features):
-        self.transform = transform
-        self.template = template
-        self.appearance_model = appearance_model
-        self.regression_features = regression_features
-
-    def __call__(self, image, shape):
-        self.transform.set_target(shape)
-        # TODO should the template be a mask or a shape? warp_to_shape here
-        warped_image = image.warp_to_mask(self.template.mask, self.transform,
-                                          warp_landmarks=False)
-        features = extract_parametric_features(
-            self.appearance_model, warped_image, self.regression_features)
-        return np.hstack((features, 1))
-
-
-class semiparametric_classifier_regression_features(object):
-
-    def __init__(self, patch_shape, classifiers):
-        self.patch_shape = patch_shape
-        self.classifiers = classifiers
-
-    def __call__(self, image, shape):
-        patches = image.extract_patches(shape, patch_size=self.patch_shape)
-        features = [clf(patch.as_vector(keep_channels=True))
-                    for (clf, patch) in zip(self.classifiers, patches)]
-        return np.hstack((np.asarray(features).ravel(), 1))
diff --git a/menpofit/regression/regressors.py b/menpofit/regression/regressors.py
deleted file mode 100644
index 5f1b534..0000000
--- a/menpofit/regression/regressors.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from __future__ import division
-import numpy as np
-
-
-class mlr(object):
-    r"""
-    Multivariate Linear Regression
-
-    Parameters
-    ----------
-    X: numpy.array
-        The regression features used to create the coefficient matrix.
-    T: numpy.array
-        The shapes differential that denote the dependent variable.
-    """
-    def __init__(self, X, T, lmda=0):
-        XX = np.dot(X.T, X)
-        if lmda > 0:
-            np.fill_diagonal(XX, lmda + np.diag(XX))
-        XT = np.dot(X.T, T)
-        self.R = np.linalg.solve(XX, XT)
-
-    def __call__(self, x):
-        return np.dot(x, self.R)
-
-
-class mlr_svd(object):
-    r"""
-    Multivariate Linear Regression using SVD decomposition
-
-    Parameters
-    ----------
-    X: numpy.array
-        The regression features used to create the coefficient matrix.
-    T: numpy.array
-        The shapes differential that denote the dependent variable.
-    variance: float or None, Optional
-        The SVD variance.
-
-        Default: None
-
-    Raises
-    ------
-    ValueError
-        variance must be set to a number between 0 and 1
-    """
-    def __init__(self, X, T, variance=None):
-        self.R, _, _, _ = _svd_regression(X, T, variance=variance)
-
-    def __call__(self, x):
-        return np.dot(x, self.R)
-
-
-class mlr_pca(object):
-    r"""
-    Multivariate Linear Regression using PCA reconstructions
-
-    Parameters
-    ----------
-    X: numpy.array
-        The regression features used to create the coefficient matrix.
-    T: numpy.array
-        The shapes differential that denote the dependent variable.
-    variance: float or None, Optional
-        The SVD variance.
-
-        Default: None
-
-    Raises
-    ------
-    ValueError
-        variance must be set to a number between 0 and 1
-    """
-    def __init__(self, X, T, variance=None):
-        self.R, _, _, self.V = _svd_regression(X, T, variance=variance)
-
-    def _call__(self, x):
-        x = np.dot(np.dot(x, self.V.T), self.V)
-        return np.dot(x, self.R)
-
-
-class mlr_pca_weights(object):
-    r"""
-    Multivariate Linear Regression using PCA weights
-
-    Parameters
-    ----------
-    X: numpy.array
-        The regression features used to create the coefficient matrix.
-    T: numpy.array
-        The shapes differential that denote the dependent variable.
-    variance: float or None, Optional
-        The SVD variance.
-
-        Default: None
-
-    Raises
-    ------
-    ValueError
-        variance must be set to a number between 0 and 1
-    """
-    def __init__(self, X, T, variance=None):
-        _, _, _, self.V = _svd_regression(X, T, variance=variance)
-        W = np.dot(X, self.V.T)
-        self.R, _, _, _ = _svd_regression(W, T)
-
-    def __call__(self, x):
-        w = np.dot(x, self.V.T)
-        return np.dot(w, self.R)
-
-
-def _svd_regression(X, T, variance=None):
-    r"""
-    SVD decomposition for regression.
-
-    Parameters
-    ----------
-    X: numpy.array
-        The regression features used to create the coefficient matrix.
-    T: numpy.array
-        The shapes differential that denote the dependent variable.
-    variance: float or None, Optional
-        The SVD variance.
-
-        Default: None
-
-    Raises
-    ------
-    ValueError
-        variance must be set to a number between 0 and 1
-    """
-    if variance is not None and not (0 < variance <= 1):
-        raise ValueError("variance must be set to a number between 0 and 1.")
-
-    U, s, V = np.linalg.svd(X)
-    if variance:
-        total = sum(s)
-        acc = 0
-        for j, y in enumerate(s):
-            acc += y
-            if acc / total >= variance:
-                r = j+1
-                break
-    else:
-        tol = np.max(X.shape) * np.spacing(np.max(s))
-        r = np.sum(s > tol)
-    U = U[:, :r]
-    s = 1 / s[:r]
-    V = V[:r, :]
-    R = np.dot(np.dot(V.T * s, U.T), T)
-
-    return R, U, s, V
diff --git a/menpofit/regression/trainer.py b/menpofit/regression/trainer.py
deleted file mode 100644
index c1ea8cc..0000000
--- a/menpofit/regression/trainer.py
+++ /dev/null
@@ -1,649 +0,0 @@
-from __future__ import division, print_function
-import abc
-import numpy as np
-from menpo.image import Image
-from menpo.feature import sparse_hog
-from menpo.visualize import print_dynamic, progress_bar_str
-
-from menpofit.base import noisy_align, build_sampling_grid
-from menpofit.fittingresult import (NonParametricFittingResult,
-                                    SemiParametricFittingResult,
-                                    ParametricFittingResult)
-from .base import (NonParametricRegressor, SemiParametricRegressor,
-                   ParametricRegressor)
-from .parametricfeatures import extract_parametric_features, weights, \
-    nonparametric_regression_features, parametric_regression_features, \
-    semiparametric_classifier_regression_features
-from .regressors import mlr
-
-
-class RegressorTrainer(object):
-    r"""
-    An abstract base class for training regressors.
-
-    Parameters
-    ----------
-    reference_shape : :map:`PointCloud`
-        The reference shape that will be used.
-    regression_type : `callable`, optional
-        A `callable` that defines the regression technique to be used.
-        Examples of such callables can be found in
-        :ref:`regression_callables`
-    regression_features : ``None`` or `string` or `function`, optional
-        The features that are used during the regression.
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : boolean, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int`, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-    """
-    __metaclass__ = abc.ABCMeta
-
-    def __init__(self, reference_shape, regression_type=mlr,
-                 regression_features=None, noise_std=0.04, rotation=False,
-                 n_perturbations=10):
-        self.reference_shape = reference_shape
-        self.regression_type = regression_type
-        self.regression_features = regression_features
-        self.rotation = rotation
-        self.noise_std = noise_std
-        self.n_perturbations = n_perturbations
-
-    def _regression_data(self, images, gt_shapes, perturbed_shapes,
-                         verbose=False):
-        r"""
-        Method that generates the regression data : features and delta_ps.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images.
-
-        gt_shapes : :map:`PointCloud` list
-            List of the ground truth shapes that correspond to the images.
-
-        perturbed_shapes : :map:`PointCloud` list
-            List of the perturbed shapes in order to regress.
-
-        verbose : `boolean`, optional
-            If ``True``, the progress is printed.
-        """
-        if verbose:
-            print_dynamic('- Generating regression data')
-
-        n_images = len(images)
-        features = []
-        delta_ps = []
-        for j, (i, s, p_shape) in enumerate(zip(images, gt_shapes,
-                                                perturbed_shapes)):
-            if verbose:
-                print_dynamic('- Generating regression data - {}'.format(
-                    progress_bar_str((j + 1.) / n_images, show_bar=False)))
-            for ps in p_shape:
-                features.append(self.features(i, ps))
-                delta_ps.append(self.delta_ps(s, ps))
-        return np.asarray(features), np.asarray(delta_ps)
-
-    @abc.abstractmethod
-    def features(self, image, shape):
-        r"""
-        Abstract method to generate the features for the regression.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image.
-
-        shape : :map:`PointCloud`
-            The current shape.
-        """
-        pass
-
-    @abc.abstractmethod
-    def get_features_function(self):
-        r"""
-        Abstract method to return the function that computes the features for
-        the regression.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image.
-
-        shape : :map:`PointCloud`
-            The current shape.
-        """
-        pass
-
-    @abc.abstractmethod
-    def delta_ps(self, gt_shape, perturbed_shape):
-        r"""
-        Abstract method to generate the delta_ps for the regression.
-
-        Parameters
-        ----------
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-
-        perturbed_shape : :map:`PointCloud`
-            The perturbed shape.
-        """
-        pass
-
-    def train(self, images, shapes, perturbed_shapes=None, verbose=False,
-              **kwargs):
-        r"""
-        Trains a Regressor given a list of landmarked images.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images from which to train the regressor.
-
-        shapes : :map:`PointCloud` list
-            List of the shapes that correspond to the images.
-
-        perturbed_shapes : :map:`PointCloud` list, optional
-            List of the perturbed shapes used for the regressor training.
-
-        verbose : `boolean`, optional
-            Flag that controls information and progress printing.
-
-        Returns
-        -------
-        regressor : :map:`Regressor`
-            A regressor object.
-
-        Raises
-        ------
-        ValueError
-            The number of shapes must be equal to the number of images.
-        ValueError
-            The number of perturbed shapes must be equal or multiple to
-            the number of images.
-        """
-        n_images = len(images)
-        n_shapes = len(shapes)
-
-        # generate regression data
-        if n_images != n_shapes:
-            raise ValueError("The number of shapes must be equal to "
-                             "the number of images.")
-        elif not perturbed_shapes:
-            perturbed_shapes = self.perturb_shapes(shapes)
-            features, delta_ps = self._regression_data(
-                images, shapes, perturbed_shapes, verbose=verbose)
-        elif n_images == len(perturbed_shapes):
-            features, delta_ps = self._regression_data(
-                images, shapes, perturbed_shapes, verbose=verbose)
-        else:
-            raise ValueError("The number of perturbed shapes must be "
-                             "equal or multiple to the number of images.")
-
-        # perform regression
-        if verbose:
-            print_dynamic('- Performing regression...')
-        # Expected to be a callable
-        regressor = self.regression_type(features, delta_ps, **kwargs)
-
-        # compute regressor RMSE
-        estimated_delta_ps = regressor(features)
-        error = np.sqrt(np.mean(np.sum((delta_ps - estimated_delta_ps) ** 2,
-                                       axis=1)))
-        if verbose:
-            print_dynamic('- Regression RMSE is {0:.5f}.\n'.format(error))
-        return self._build_regressor(regressor, self.get_features_function())
-
-    def perturb_shapes(self, gt_shape):
-        r"""
-        Perturbs the given shapes. The number of perturbations is defined by
-        ``n_perturbations``.
-
-        Parameters
-        ----------
-        gt_shape : :map:`PointCloud` list
-            List of the shapes that correspond to the images.
-            will be perturbed.
-
-        Returns
-        -------
-        perturbed_shapes : :map:`PointCloud` list
-            List of the perturbed shapes.
-        """
-        return [[self._perturb_shape(s) for _ in range(self.n_perturbations)]
-                for s in gt_shape]
-
-    def _perturb_shape(self, gt_shape):
-        r"""
-        Method that performs noisy alignment between the given ground truth
-        shape and the reference shape.
-
-        Parameters
-        ----------
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-        """
-        return noisy_align(self.reference_shape, gt_shape,
-                           noise_std=self.noise_std
-                           ).apply(self.reference_shape)
-
-    @abc.abstractmethod
-    def _build_regressor(self, regressor, features):
-        r"""
-        Abstract method to build a regressor model.
-        """
-        pass
-
-
-class NonParametricRegressorTrainer(RegressorTrainer):
-    r"""
-    Class for training a Non-Parametric Regressor.
-
-    Parameters
-    ----------
-    reference_shape : :map:`PointCloud`
-        The reference shape that will be used.
-    regression_type : `callable`, optional
-        A `callable` that defines the regression technique to be used.
-        Examples of such callables can be found in
-        :ref:`regression_callables`
-    regression_features : `function`, optional
-        The features that are used during the regression.
-
-        See `menpo.features` for details more details on
-        Menpo's standard image features and feature options.
-        See :ref:`feature_functions` for non standard
-        features definitions.
-    patch_shape : tuple, optional
-        The shape of the patches that will be extracted.
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int`, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-
-    """
-    def __init__(self, reference_shape, regression_type=mlr,
-                 regression_features=sparse_hog, patch_shape=(16, 16),
-                 noise_std=0.04, rotation=False, n_perturbations=10):
-        super(NonParametricRegressorTrainer, self).__init__(
-            reference_shape, regression_type=regression_type,
-            regression_features=regression_features, noise_std=noise_std,
-            rotation=rotation, n_perturbations=n_perturbations)
-        self.patch_shape = patch_shape
-        self._set_up()
-
-    def _set_up(self):
-        # work out feature length per patch
-        patch_img = Image.init_blank(self.patch_shape, fill=0)
-        self._feature_patch_length = self.regression_features(patch_img).n_parameters
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns the algorithm name.
-        """
-        return "Non-Parametric"
-
-    def _create_fitting(self, image, shapes, gt_shape=None):
-        r"""
-        Method that creates the fitting result object.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The image object.
-
-        shapes : :map:`PointCloud` list
-            The shapes.
-
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-        """
-        return NonParametricFittingResult(image, self, parameters=[shapes],
-                                          gt_shape=gt_shape)
-
-    def get_features_function(self):
-        return nonparametric_regression_features(self.patch_shape,
-                                              self._feature_patch_length,
-                                              self.regression_features)
-
-    def features(self, image, shape):
-        r"""
-        Method that extracts the features for the regression, which in this
-        case are patch based.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image.
-
-        shape : :map:`PointCloud`
-            The current shape.
-        """
-        return self.get_features_function()(image, shape)
-
-    def delta_ps(self, gt_shape, perturbed_shape):
-        r"""
-        Method to generate the delta_ps for the regression.
-
-        Parameters
-        ----------
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-
-        perturbed_shape : :map:`PointCloud`
-            The perturbed shape.
-        """
-        return (gt_shape.as_vector() -
-                perturbed_shape.as_vector())
-
-    def _build_regressor(self, regressor, features):
-        r"""
-        Method to build the NonParametricRegressor regressor object.
-        """
-        return NonParametricRegressor(regressor, features)
-
-
-class SemiParametricRegressorTrainer(NonParametricRegressorTrainer):
-    r"""
-    Class for training a Semi-Parametric Regressor.
-
-    This means that a parametric shape model and a non-parametric appearance
-    representation are employed.
-
-    Parameters
-    ----------
-    reference_shape : PointCloud
-        The reference shape that will be used.
-    regression_type : `callable`, optional
-        A `callable` that defines the regression technique to be used.
-        Examples of such callables can be found in
-        :ref:`regression_callables`
-    regression_features : `function`, optional
-        The features that are used during the regression.
-
-        See :ref:`menpo.features` for details more details on
-        Menpos standard image features and feature options.
-    patch_shape : tuple, optional
-        The shape of the patches that will be extracted.
-    update : 'compositional' or 'additive'
-        Defines the way to update the warp.
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int`, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-
-    """
-    def __init__(self, transform, reference_shape, regression_type=mlr,
-                 regression_features=sparse_hog, patch_shape=(16, 16),
-                 update='compositional', noise_std=0.04, rotation=False,
-                 n_perturbations=10):
-        super(SemiParametricRegressorTrainer, self).__init__(
-            reference_shape, regression_type=regression_type,
-            regression_features=regression_features, patch_shape=patch_shape,
-            noise_std=noise_std, rotation=rotation,
-            n_perturbations=n_perturbations)
-        self.transform = transform
-        self.update = update
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns the algorithm name.
-        """
-        return "Semi-Parametric"
-
-    def _create_fitting(self, image, shapes, gt_shape=None):
-        r"""
-        Method that creates the fitting result object.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The image object.
-
-        shapes : :map:`PointCloud` list
-            The shapes.
-
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-        """
-        return SemiParametricFittingResult(image, self, parameters=[shapes],
-                                           gt_shape=gt_shape)
-
-    def delta_ps(self, gt_shape, perturbed_shape):
-        r"""
-        Method to generate the delta_ps for the regression.
-
-        Parameters
-        ----------
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-
-        perturbed_shape : :map:`PointCloud`
-            The perturbed shape.
-        """
-        self.transform.set_target(gt_shape)
-        gt_ps = self.transform.as_vector()
-        self.transform.set_target(perturbed_shape)
-        perturbed_ps = self.transform.as_vector()
-        return gt_ps - perturbed_ps
-
-    def _build_regressor(self, regressor, features):
-        r"""
-        Method to build the NonParametricRegressor regressor object.
-        """
-        return SemiParametricRegressor(regressor, features, self.transform,
-                                       self.update)
-
-
-class ParametricRegressorTrainer(RegressorTrainer):
-    r"""
-    Class for training a Parametric Regressor.
-
-    Parameters
-    ----------
-    appearance_model : :map:`PCAModel`
-        The appearance model to be used.
-    transform : :map:`Affine`
-        The transform used for warping.
-    reference_shape : :map:`PointCloud`
-        The reference shape that will be used.
-    regression_type : `callable`, optional
-        A `callable` that defines the regression technique to be used.
-        Examples of such callables can be found in
-        :ref:`regression_callables`
-    regression_features : ``None`` or `function`, optional
-        The parametric features that are used during the regression.
-
-        If ``None``, the reconstruction appearance weights will be used as
-        feature.
-
-        If `string` or `function`, the feature representation will be
-        computed using one of the function in:
-
-            If `string`, the feature representation will be extracted by
-            executing a parametric feature function.
-
-            Note that this feature type can only be one of the parametric
-            feature functions defined :ref:`parametric_features`.
-    patch_shape : tuple, optional
-        The shape of the patches that will be extracted.
-    update : 'compositional' or 'additive'
-        Defines the way to update the warp.
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int`, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-
-    """
-    def __init__(self, appearance_model, transform, reference_shape,
-                 regression_type=mlr, regression_features=weights,
-                 update='compositional', noise_std=0.04, rotation=False,
-                 n_perturbations=10):
-        super(ParametricRegressorTrainer, self).__init__(
-            reference_shape, regression_type=regression_type,
-            regression_features=regression_features, noise_std=noise_std,
-            rotation=rotation, n_perturbations=n_perturbations)
-        self.appearance_model = appearance_model
-        self.template = appearance_model.mean()
-        self.regression_features = regression_features
-        self.transform = transform
-        self.update = update
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns the algorithm name.
-        """
-        return "Parametric"
-
-    def _create_fitting(self, image, shapes, gt_shape=None):
-        r"""
-        Method that creates the fitting result object.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The image object.
-
-        shapes : :map:`PointCloud` list
-            The shapes.
-
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-        """
-        return ParametricFittingResult(image, self, parameters=[shapes],
-                                       gt_shape=gt_shape)
-
-    def get_features_function(self):
-        return parametric_regression_features(self.transform, self.template,
-                                              self.appearance_model,
-                                              self.regression_features)
-
-    def features(self, image, shape):
-        r"""
-        Method that extracts the features for the regression, which in this
-        case are patch based.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image.
-
-        shape : :map:`PointCloud`
-            The current shape.
-        """
-        return self.get_features_function()(image, shape)
-
-    def delta_ps(self, gt_shape, perturbed_shape):
-        r"""
-        Method to generate the delta_ps for the regression.
-
-        Parameters
-        ----------
-        gt_shape : :map:`PointCloud`
-            The ground truth shape.
-
-        perturbed_shape : :map:`PointCloud`
-            The perturbed shape.
-        """
-        self.transform.set_target(gt_shape)
-        gt_ps = self.transform.as_vector()
-        self.transform.set_target(perturbed_shape)
-        perturbed_ps = self.transform.as_vector()
-        return gt_ps - perturbed_ps
-
-    def _build_regressor(self, regressor, features):
-        r"""
-        Method to build the NonParametricRegressor regressor object.
-        """
-        return ParametricRegressor(
-            regressor, features, self.appearance_model, self.transform,
-            self.update)
-
-
-class SemiParametricClassifierBasedRegressorTrainer(
-        SemiParametricRegressorTrainer):
-    r"""
-    Class for training a Semi-Parametric Classifier-Based Regressor. This means
-    that the classifiers are used instead of features.
-
-    Parameters
-    ----------
-    classifiers : list of :map:`classifiers`
-        List of classifiers.
-    transform : :map:`Affine`
-        The transform used for warping.
-    reference_shape : :map:`PointCloud`
-        The reference shape that will be used.
-    regression_type : `callable`, optional
-        A `callable` that defines the regression technique to be used.
-        Examples of such callables can be found in
-        :ref:`regression_callables`
-    patch_shape : tuple, optional
-        The shape of the patches that will be extracted.
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int`, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-    """
-    def __init__(self, classifiers, transform, reference_shape,
-                 regression_type=mlr, patch_shape=(16, 16),
-                 update='compositional', noise_std=0.04, rotation=False,
-                 n_perturbations=10):
-        super(SemiParametricClassifierBasedRegressorTrainer, self).__init__(
-            transform, reference_shape, regression_type=regression_type,
-            patch_shape=patch_shape, update=update,
-            noise_std=noise_std,  rotation=rotation,
-            n_perturbations=n_perturbations)
-        self.classifiers = classifiers
-
-    def _set_up(self):
-        # TODO: CLMs should use slices instead of sampling grid, and the
-        # need of the _set_up method will probably disappear
-        # set up sampling grid
-        self.sampling_grid = build_sampling_grid(self.patch_shape)
-
-    def get_features_function(self):
-        return semiparametric_classifier_regression_features(self.patch_shape,
-                                                             self.classifiers)
-
-    def features(self, image, shape):
-        r"""
-        Method that extracts the features for the regression, which in this
-        case are patch based.
-
-        Parameters
-        ----------
-        image : :map:`MaskedImage`
-            The current image.
-
-        shape : :map:`PointCloud`
-            The current shape.
-        """
-        return self.get_features_function()(image, shape)
diff --git a/menpofit/fittingresult.py b/menpofit/result.py
similarity index 63%
rename from menpofit/fittingresult.py
rename to menpofit/result.py
index 7fdedef..20cd791 100644
--- a/menpofit/fittingresult.py
+++ b/menpofit/result.py
@@ -1,81 +1,16 @@
 from __future__ import division
-
 import abc
-from itertools import chain
+from functools import wraps
 import numpy as np
-
-from menpo.shape.pointcloud import PointCloud
-from menpo.image import Image
 from menpo.transform import Scale
-from menpofit.base import name_of_callable
+from menpo.shape import PointCloud
+from menpo.image import Image
 
 
-class FittingResult(object):
+# TODO: document me!
+class Result(object):
     r"""
-    Object that holds the state of a single fitting object, during and after it
-    has fitted a particular image.
-
-    Parameters
-    -----------
-    image : :map:`Image` or subclass
-        The fitted image.
-    gt_shape : :map:`PointCloud`
-        The ground truth shape associated to the image.
     """
-
-    def __init__(self, image, gt_shape=None):
-        self.image = image
-        self._gt_shape = gt_shape
-
-    @property
-    def n_iters(self):
-        return len(self.shapes) - 1
-
-    @abc.abstractproperty
-    def shapes(self):
-        r"""
-        A list containing the shapes obtained at each fitting iteration.
-
-        :type: `list` of :map:`PointCloud`
-        """
-
-    def displacements(self):
-        r"""
-        A list containing the displacement between the shape of each iteration
-        and the shape of the previous one.
-
-        :type: `list` of ndarray
-        """
-        return [np.linalg.norm(s1.points - s2.points, axis=1)
-                for s1, s2 in zip(self.shapes, self.shapes[1:])]
-
-    def displacements_stats(self, stat_type='mean'):
-        r"""
-        A list containing the a statistical metric on the displacement between
-        the shape of each iteration and the shape of the previous one.
-
-        Parameters
-        -----------
-        stat_type : `str` ``{'mean', 'median', 'min', 'max'}``, optional
-            Specifies a statistic metric to be extracted from the displacements.
-
-        Returns
-        -------
-        :type: `list` of `float`
-            The statistical metric on the points displacements for each
-            iteration.
-        """
-        if stat_type == 'mean':
-            return [np.mean(d) for d in self.displacements()]
-        elif stat_type == 'median':
-            return [np.median(d) for d in self.displacements()]
-        elif stat_type == 'max':
-            return [np.max(d) for d in self.displacements()]
-        elif stat_type == 'min':
-            return [np.min(d) for d in self.displacements()]
-        else:
-            raise ValueError("type must be 'mean', 'median', 'min' or 'max'")
-
     @abc.abstractproperty
     def final_shape(self):
         r"""
@@ -115,111 +50,128 @@ def fitted_image(self):
             image.landmarks['ground'] = self.gt_shape
         return image
 
-    @property
-    def iter_image(self):
-        r"""
-        Returns a copy of the fitted image with as many landmark groups as
-        iteration run by fitting procedure:
-            - ``iter_0``, containing the initial shape.
-            - ``iter_1``, containing the the fitted shape at the first
-            iteration.
-            - ``...``
-            - ``iter_n``, containing the final fitted shape.
-
-        :type: :map:`Image`
-        """
-        image = Image(self.image.pixels)
-        for j, s in enumerate(self.shapes):
-            key = 'iter_{}'.format(j)
-            image.landmarks[key] = s
-        return image
-
-    def errors(self, error_type='me_norm'):
-        r"""
-        Returns a list containing the error at each fitting iteration.
-
-        Parameters
-        -----------
-        error_type : `str` ``{'me_norm', 'me', 'rmse'}``, optional
-            Specifies the way in which the error between the fitted and
-            ground truth shapes is to be computed.
-
-        Returns
-        -------
-        errors : `list` of `float`
-            The errors at each iteration of the fitting process.
-        """
-        if self.gt_shape is not None:
-            return [compute_error(t, self.gt_shape, error_type)
-                    for t in self.shapes]
-        else:
-            raise ValueError('Ground truth has not been set, errors cannot '
-                             'be computed')
-
-    def final_error(self, error_type='me_norm'):
+    def final_error(self, compute_error=None):
         r"""
         Returns the final fitting error.
 
         Parameters
         -----------
-        error_type : `str` ``{'me_norm', 'me', 'rmse'}``, optional
-            Specifies the way in which the error between the fitted and
-            ground truth shapes is to be computed.
+        compute_error: `callable`, optional
+            Callable that computes the error between the fitted and
+            ground truth shapes.
 
         Returns
         -------
         final_error : `float`
             The final error at the end of the fitting procedure.
         """
+        if compute_error is None:
+            compute_error = compute_normalise_point_to_point_error
         if self.gt_shape is not None:
-            return compute_error(self.final_shape, self.gt_shape, error_type)
+            return compute_error(self.final_shape, self.gt_shape)
         else:
             raise ValueError('Ground truth has not been set, final error '
                              'cannot be computed')
 
-    def initial_error(self, error_type='me_norm'):
+    def initial_error(self, compute_error=None):
         r"""
         Returns the initial fitting error.
 
         Parameters
         -----------
-        error_type : `str` ``{'me_norm', 'me', 'rmse'}``, optional
-            Specifies the way in which the error between the fitted and
-            ground truth shapes is to be computed.
+        compute_error: `callable`, optional
+            Callable that computes the error between the fitted and
+            ground truth shapes.
 
         Returns
         -------
         initial_error : `float`
             The initial error at the start of the fitting procedure.
         """
+        if compute_error is None:
+            compute_error = compute_normalise_point_to_point_error
         if self.gt_shape is not None:
-            return compute_error(self.initial_shape, self.gt_shape, error_type)
+            return compute_error(self.initial_shape, self.gt_shape)
         else:
             raise ValueError('Ground truth has not been set, final error '
                              'cannot be computed')
 
-    def view_widget(self, browser_style='buttons', figure_size=(10, 8),
-                    style='coloured'):
+    def as_serializableresult(self):
+        return SerializableIterativeResult(
+            self.image, self.initial_shape, self.final_shape,
+            gt_shape=self.gt_shape)
+
+    def __str__(self):
+        out = "Initial error: {0:.4f}\nFinal error: {1:.4f}".format(
+            self.initial_error(), self.final_error())
+        return out
+
+
+# TODO: document me!
+class IterativeResult(Result):
+    r"""
+    """
+    @abc.abstractproperty
+    def n_iters(self):
+        r"""
+        Returns the number of iterations.
+        """
+
+    @abc.abstractproperty
+    def shapes(self):
+        r"""
+        Generates a list containing the shapes obtained at each fitting
+        iteration.
+
+        Returns
+        -------
+        shapes : :map:`PointCloud`s or ndarray list
+            A list containing the shapes obtained at each fitting iteration.
+        """
+
+    @property
+    def iter_image(self):
+        r"""
+        Returns a copy of the fitted image with a as many landmark groups as
+        iteration run by fitting procedure:
+            - ``iter_0``, containing the initial shape.
+            - ``iter_1``, containing the the fitted shape at the first
+            iteration.
+            - ``...``
+            - ``iter_n``, containing the final fitted shape.
+
+        :type: :map:`Image`
+        """
+        image = Image(self.image.pixels)
+        for j, s in enumerate(self.shapes):
+            image.landmarks['iter_'+str(j)] = s
+        return image
+
+    def errors(self, compute_error=None):
         r"""
-        Visualizes the multilevel fitting result object using the
-        `menpo.visualize.widgets.visualize_fitting_result` widget.
+        Returns a list containing the error at each fitting iteration.
 
         Parameters
         -----------
-        browser_style : {``'buttons'``, ``'slider'``}, optional
-            It defines whether the selector of the fitting results will have the
-            form of plus/minus buttons or a slider.
-        figure_size : (`int`, `int`), optional
-            The initial size of the rendered figure.
-        style : {``'coloured'``, ``'minimal'``}, optional
-            If ``'coloured'``, then the style of the widget will be coloured. If
-            ``minimal``, then the style is simple using black and white colours.
+        compute_error: `callable`, optional
+            Callable that computes the error between the fitted and
+            ground truth shapes.
+
+        Returns
+        -------
+        errors : `list` of `float`
+            The errors at each iteration of the fitting process.
         """
-        from menpofit.visualize import visualize_fitting_result
-        visualize_fitting_result(self, figure_size=figure_size,
-                                 browser_style=browser_style, style=style)
+        if compute_error is None:
+            compute_error = compute_normalise_point_to_point_error
+        if self.gt_shape is not None:
+            return [compute_error(t, self.gt_shape)
+                    for t in self.shapes]
+        else:
+            raise ValueError('Ground truth has not been set, errors cannot '
+                             'be computed')
 
-    def plot_errors(self, error_type='me_norm', figure_id=None,
+    def plot_errors(self, error_type=None, figure_id=None,
                     new_figure=False, render_lines=True, line_colour='b',
                     line_style='-', line_width=2, render_markers=True,
                     marker_style='o', marker_size=4, marker_face_colour='b',
@@ -231,7 +183,6 @@ def plot_errors(self, error_type='me_norm', figure_id=None,
                     grid_line_width=0.5):
         r"""
         Plot of the error evolution at each fitting iteration.
-
         Parameters
         ----------
         error_type : {``me_norm``, ``me``, ``rmse``}, optional
@@ -288,14 +239,13 @@ def plot_errors(self, error_type='me_norm', figure_id=None,
             The style of the grid lines.
         grid_line_width : `float`, optional
             The width of the grid lines.
-
         Returns
         -------
         viewer : :map:`GraphPlotter`
             The viewer object.
         """
         from menpo.visualize import GraphPlotter
-        errors_list = self.errors(error_type=error_type)
+        errors_list = self.errors(compute_error=error_type)
         return GraphPlotter(figure_id=figure_id, new_figure=new_figure,
                             x_axis=range(len(errors_list)),
                             y_axis=[errors_list],
@@ -315,6 +265,40 @@ def plot_errors(self, error_type='me_norm', figure_id=None,
             grid_line_style=grid_line_style, grid_line_width=grid_line_width,
             figure_size=figure_size)
 
+    def displacements(self):
+        r"""
+        A list containing the displacement between the shape of each iteration
+        and the shape of the previous one.
+        :type: `list` of ndarray
+        """
+        return [np.linalg.norm(s1.points - s2.points, axis=1)
+                for s1, s2 in zip(self.shapes, self.shapes[1:])]
+
+    def displacements_stats(self, stat_type='mean'):
+        r"""
+        A list containing the a statistical metric on the displacement between
+        the shape of each iteration and the shape of the previous one.
+        Parameters
+        -----------
+        stat_type : `str` ``{'mean', 'median', 'min', 'max'}``, optional
+            Specifies a statistic metric to be extracted from the displacements.
+        Returns
+        -------
+        :type: `list` of `float`
+            The statistical metric on the points displacements for each
+            iteration.
+        """
+        if stat_type == 'mean':
+            return [np.mean(d) for d in self.displacements()]
+        elif stat_type == 'median':
+            return [np.median(d) for d in self.displacements()]
+        elif stat_type == 'max':
+            return [np.max(d) for d in self.displacements()]
+        elif stat_type == 'min':
+            return [np.min(d) for d in self.displacements()]
+        else:
+            raise ValueError("type must be 'mean', 'median', 'min' or 'max'")
+
     def plot_displacements(self, stat_type='mean', figure_id=None,
                            new_figure=False, render_lines=True, line_colour='b',
                            line_style='-', line_width=2, render_markers=True,
@@ -328,7 +312,6 @@ def plot_displacements(self, stat_type='mean', figure_id=None,
         r"""
         Plot of a statistical metric of the displacement between the shape of
         each iteration and the shape of the previous one.
-
         Parameters
         ----------
         stat_type : {``mean``, ``median``, ``min``, ``max``}, optional
@@ -385,7 +368,6 @@ def plot_displacements(self, stat_type='mean', figure_id=None,
             The style of the grid lines.
         grid_line_width : `float`, optional
             The width of the grid lines.
-
         Returns
         -------
         viewer : :map:`GraphPlotter`
@@ -429,98 +411,24 @@ def plot_displacements(self, stat_type='mean', figure_id=None,
             grid_line_style=grid_line_style, grid_line_width=grid_line_width,
             figure_size=figure_size)
 
-    def as_serializable(self):
-        r""""
-        Returns a serializable version of the fitting result. This is a much
-        lighter weight object than the initial fitting result. For example,
-        it won't contain the original fitting object.
-
-        Returns
-        -------
-        serializable_fitting_result : :map:`SerializableFittingResult`
-            The lightweight serializable version of this fitting result.
-        """
-        if self.parameters is not None:
-            parameters = [p.copy() for p in self.parameters]
-        else:
-            parameters = []
-        gt_shape = self.gt_shape.copy() if self.gt_shape else None
-        return SerializableFittingResult(self.image.copy(),
-                                         parameters,
-                                         [s.copy() for s in self.shapes],
-                                         gt_shape)
+    def as_serializableresult(self):
+        return SerializableIterativeResult(
+            self.image, self.shapes, self.n_iters, gt_shape=self.gt_shape)
 
 
-class NonParametricFittingResult(FittingResult):
+# TODO: document me!
+class ParametricAlgorithmResult(IterativeResult):
     r"""
-    Object that holds the state of a Non Parametric :map:`Fitter` object
-    before, during and after it has fitted a particular image.
-
-    Parameters
-    -----------
-    image : :map:`Image`
-        The fitted image.
-    fitter : :map:`Fitter`
-        The Fitter object used to fitter the image.
-    shapes : `list` of :map:`PointCloud`
-        The list of fitted shapes per iteration of the fitting procedure.
-    gt_shape : :map:`PointCloud`
-        The ground truth shape associated to the image.
     """
-
-    def __init__(self, image, fitter, parameters=None, gt_shape=None):
-        super(NonParametricFittingResult, self).__init__(image,
-                                                         gt_shape=gt_shape)
-        self.fitter = fitter
-        # The parameters are the shapes for Non-Parametric algorithms
-        self.parameters = parameters
-
-    @property
-    def shapes(self):
-        return self.parameters
-
-    @property
-    def final_shape(self):
-        return self.parameters[-1].copy()
+    def __init__(self, image, algorithm, shape_parameters, gt_shape=None):
+        self.image = image
+        self.algorithm = algorithm
+        self.shape_parameters = shape_parameters
+        self._gt_shape = gt_shape
 
     @property
-    def initial_shape(self):
-        return self.parameters[0].copy()
-
-    @FittingResult.gt_shape.setter
-    def gt_shape(self, value):
-        r"""
-        Setter for the ground truth shape associated to the image.
-        """
-        if isinstance(value, PointCloud):
-            self._gt_shape = value
-        else:
-            raise ValueError("Accepted values for gt_shape setter are "
-                             "PointClouds.")
-
-
-class SemiParametricFittingResult(FittingResult):
-    r"""
-    Object that holds the state of a Semi Parametric :map:`Fitter` object
-    before, during and after it has fitted a particular image.
-
-    Parameters
-    -----------
-    image : :map:`Image`
-        The fitted image.
-    fitter : :map:`Fitter`
-        The Fitter object used to fitter the image.
-    parameters : `list` of `ndarray`
-        The list of optimal transform parameters per iteration of the fitting
-        procedure.
-    gt_shape : :map:`PointCloud`
-        The ground truth shape associated to the image.
-    """
-
-    def __init__(self, image, fitter, parameters=None, gt_shape=None):
-        FittingResult.__init__(self, image, gt_shape=gt_shape)
-        self.fitter = fitter
-        self.parameters = parameters
+    def n_iters(self):
+        return len(self.shapes) - 1
 
     @property
     def transforms(self):
@@ -528,26 +436,27 @@ def transforms(self):
         Generates a list containing the transforms obtained at each fitting
         iteration.
         """
-        return [self.fitter.transform.from_vector(p) for p in self.parameters]
+        return [self.algorithm.transform.from_vector(p)
+                for p in self.shape_parameters]
 
     @property
     def final_transform(self):
         r"""
         Returns the final transform.
         """
-        return self.fitter.transform.from_vector(self.parameters[-1])
+        return self.algorithm.transform.from_vector(self.shape_parameters[-1])
 
     @property
     def initial_transform(self):
         r"""
         Returns the initial transform from which the fitting started.
         """
-        return self.fitter.transform.from_vector(self.parameters[0])
+        return self.algorithm.transform.from_vector(self.shape_parameters[0])
 
     @property
     def shapes(self):
-        return [self.fitter.transform.from_vector(p).target
-                for p in self.parameters]
+        return [self.algorithm.transform.from_vector(p).target
+                for p in self.shape_parameters]
 
     @property
     def final_shape(self):
@@ -557,178 +466,58 @@ def final_shape(self):
     def initial_shape(self):
         return self.initial_transform.target
 
-    @FittingResult.gt_shape.setter
-    def gt_shape(self, value):
-        r"""
-        Setter for the ground truth shape associated to the image.
-        """
-        if type(value) is PointCloud:
-            self._gt_shape = value
-        elif type(value) is list and value[0] is float:
-            transform = self.fitter.transform.from_vector(value)
-            self._gt_shape = transform.target
-        else:
-            raise ValueError("Accepted values for gt_shape setter are "
-                             "PointClouds or float lists "
-                             "specifying transform shapes.")
 
-
-class ParametricFittingResult(SemiParametricFittingResult):
+# TODO: document me!
+class NonParametricAlgorithmResult(IterativeResult):
     r"""
-    Object that holds the state of a Fully Parametric :map:`Fitter` object
-    before, during and after it has fitted a particular image.
-
-    Parameters
-    -----------
-    image : :map:`Image`
-        The fitted image.
-    fitter : :map:`Fitter`
-        The Fitter object used to fitter the image.
-    parameters : `list` of `ndarray`
-        The list of optimal transform parameters per iteration of the fitting
-        procedure.
-    weights : `list` of `ndarray`
-        The list of optimal appearance parameters per iteration of the fitting
-        procedure.
-    gt_shape : :map:`PointCloud`
-        The ground truth shape associated to the image.
     """
-    def __init__(self, image, fitter, parameters=None, weights=None,
-                 gt_shape=None):
-        SemiParametricFittingResult.__init__(self, image, fitter, parameters,
-                                             gt_shape=gt_shape)
-        self.weights = weights
-
-    @property
-    def warped_images(self):
-        r"""
-        The list containing the warped images obtained at each fitting
-        iteration.
-
-        :type: `list` of :map:`Image` or subclass
-        """
-        mask = self.fitter.template.mask
-        transform = self.fitter.transform
-        return [self.image.warp_to_mask(mask, transform.from_vector(p))
-                for p in self.parameters]
-
-    @property
-    def appearance_reconstructions(self):
-        r"""
-        The list containing the appearance reconstruction obtained at
-        each fitting iteration.
-
-        :type: list` of :map:`Image` or subclass
-        """
-        if self.weights:
-            return [self.fitter.appearance_model.instance(w)
-                    for w in self.weights]
-        else:
-            return [self.fitter.template for _ in self.shapes]
+    def __init__(self, image, shapes, gt_shape=None):
+        self.image = image
+        self._shapes = shapes
+        self._gt_shape = gt_shape
 
     @property
-    def error_images(self):
-        r"""
-        The list containing the error images obtained at
-        each fitting iteration.
-
-        :type: list` of :map:`Image` or subclass
-        """
-        template = self.fitter.template
-        warped_images = self.warped_images
-        appearances = self.appearance_reconstructions
-
-        error_images = []
-        for a, i in zip(appearances, warped_images):
-            error = a.as_vector() - i.as_vector()
-            error_image = template.from_vector(error)
-            error_images.append(error_image)
-
-        return error_images
-
-
-class SerializableFittingResult(FittingResult):
-    r"""
-    Designed to allow the fitting results to be easily serializable. In
-    comparison to the other fitting result objects, the serializable fitting
-    results contain a much stricter set of data. For example, the major data
-    components of a serializable fitting result are the fitted shapes, the
-    parameters and the fitted image.
-
-    Parameters
-    -----------
-    image : :map:`Image`
-        The fitted image.
-    parameters : `list` of `ndarray`
-        The list of optimal transform parameters per iteration of the fitting
-        procedure.
-    shapes : `list` of :map:`PointCloud`
-        The list of fitted shapes per iteration of the fitting procedure.
-    gt_shape : :map:`PointCloud`
-        The ground truth shape associated to the image.
-    """
-    def __init__(self, image, parameters, shapes, gt_shape):
-        FittingResult.__init__(self, image, gt_shape=gt_shape)
-
-        self.parameters = parameters
-        self._shapes = shapes
+    def n_iters(self):
+        return len(self.shapes) - 1
 
     @property
     def shapes(self):
         return self._shapes
 
     @property
-    def initial_shape(self):
-        return self._shapes[0]
+    def final_shape(self):
+        return self.shapes[-1]
 
     @property
-    def final_shape(self):
-        return self._shapes[-1]
+    def initial_shape(self):
+        return self.shapes[0]
 
 
-class MultilevelFittingResult(FittingResult):
+# TODO: document me!
+class MultiFitterResult(IterativeResult):
     r"""
-    Class that holds the state of a :map:`MultilevelFitter` object before,
-    during and after it has fitted a particular image.
-
-    Parameters
-    -----------
-    image : :map:`Image` or subclass
-        The fitted image.
-    multilevel_fitter : :map:`MultilevelFitter`
-        The multilevel fitter object used to fit the image.
-    fitting_results : `list` of :map:`FittingResult`
-        The list of fitting results.
-    affine_correction : :map:`Affine`
-        The affine transform between the initial shape of the highest
-        pyramidal level and the initial shape of the original image
-    gt_shape : class:`PointCloud`, optional
-        The ground truth shape associated to the image.
     """
-    def __init__(self, image, multiple_fitter, fitting_results,
-                 affine_correction, gt_shape=None):
-        super(MultilevelFittingResult, self).__init__(image, gt_shape=gt_shape)
-        self.fitter = multiple_fitter
-        self.fitting_results = fitting_results
+    def __init__(self, image, fitter, algorithm_results, affine_correction,
+                 gt_shape=None):
+        super(MultiFitterResult, self).__init__()
+        self.image = image
+        self.fitter = fitter
+        self.algorithm_results = algorithm_results
         self._affine_correction = affine_correction
+        self._gt_shape = gt_shape
 
     @property
-    def n_levels(self):
+    def n_scales(self):
         r"""
         The number of levels of the fitter object.
 
         :type: `int`
         """
-        return self.fitter.n_levels
+        return self.fitter.n_scales
 
     @property
-    def downscale(self):
-        r"""
-        The downscale factor used by the multiple fitter.
-
-        :type: `float`
-        """
-        return self.fitter.downscale
+    def scales(self):
+        return self.fitter.scales
 
     @property
     def n_iters(self):
@@ -738,20 +527,30 @@ def n_iters(self):
         :type: `int`
         """
         n_iters = 0
-        for f in self.fitting_results:
+        for f in self.algorithm_results:
             n_iters += f.n_iters
         return n_iters
 
     @property
     def shapes(self):
         r"""
-        A list containing the shapes obtained at each fitting iteration.
+        Generates a list containing the shapes obtained at each fitting
+        iteration.
+
+        Parameters
+        -----------
+        as_points : `boolean`, optional
+            Whether the result is returned as a `list` of :map:`PointCloud` or
+            a `list` of `ndarrays`.
 
-        :type: `list` of :map:`PointCloud`
+        Returns
+        -------
+        shapes : `list` of :map:`PointCoulds` or `list` of `ndarray`
+            A list containing the fitted shapes at each iteration of
+            the fitting procedure.
         """
-        return _rescale_shapes_to_reference(self.fitting_results, self.n_levels,
-                                            self.downscale,
-                                            self._affine_correction)
+        return _rescale_shapes_to_reference(
+            self.algorithm_results, self.scales, self._affine_correction)
 
     @property
     def final_shape(self):
@@ -760,234 +559,114 @@ def final_shape(self):
 
         :type: :map:`PointCloud`
         """
-        return self._affine_correction.apply(
-            self.fitting_results[-1].final_shape)
+        final_shape = self.algorithm_results[-1].final_shape
+        return self._affine_correction.apply(final_shape)
 
     @property
     def initial_shape(self):
-        r"""
-        The initial shape from which the fitting started.
-
-        :type: :map:`PointCloud`
-        """
-        n = self.n_levels - 1
-        initial_shape = self.fitting_results[0].initial_shape
-        Scale(self.downscale ** n, initial_shape.n_dims).apply_inplace(
-            initial_shape)
-
+        initial_shape = self.algorithm_results[0].initial_shape
+        initial_shape = Scale(self.scales[-1]/self.scales[0],
+                              initial_shape.n_dims).apply(initial_shape)
         return self._affine_correction.apply(initial_shape)
 
-    @FittingResult.gt_shape.setter
-    def gt_shape(self, value):
-        r"""
-        Setter for the ground truth shape associated to the image.
-
-        type: :map:`PointCloud`
-        """
-        self._gt_shape = value
-
-    def __str__(self):
-        if self.fitter.pyramid_on_features:
-            feat_str = name_of_callable(self.fitter.features)
-        else:
-            feat_str = []
-            for j in range(self.n_levels):
-                if isinstance(self.fitter.features[j], str):
-                    feat_str.append(self.fitter.features[j])
-                elif self.fitter.features[j] is None:
-                    feat_str.append("none")
-                else:
-                    feat_str.append(name_of_callable(self.fitter.features[j]))
-        out = "Fitting Result\n" \
-              " - Initial error: {0:.4f}\n" \
-              " - Final error: {1:.4f}\n" \
-              " - {2} method with {3} pyramid levels, {4} iterations " \
-              "and using {5} features.".format(
-              self.initial_error(), self.final_error(), self.fitter.algorithm,
-              self.n_levels, self.n_iters, feat_str)
-        return out
-
-    def as_serializable(self):
-        r""""
-        Returns a serializable version of the fitting result. This is a much
-        lighter weight object than the initial fitting result. For example,
-        it won't contain the original fitting object.
-
-        Returns
-        -------
-        serializable_fitting_result : :map:`SerializableFittingResult`
-            The lightweight serializable version of this fitting result.
-        """
-        gt_shape = self.gt_shape.copy() if self.gt_shape else None
-        fr_copies = [fr.as_serializable() for fr in self.fitting_results]
-
-        return SerializableMultilevelFittingResult(
-            self.image.copy(), fr_copies,
-            gt_shape, self.n_levels, self.downscale, self.n_iters,
-            self._affine_correction.copy())
-
-
-class AMMultilevelFittingResult(MultilevelFittingResult):
-    r"""
-    Class that holds the state of an Active Model (either AAM or ATM).
-    """
-    @property
-    def costs(self):
-        r"""
-        Returns a list containing the cost at each fitting iteration.
-
-        :type: `list` of `float`
-        """
-        raise ValueError('costs not implemented yet.')
-
-    @property
-    def final_cost(self):
-        r"""
-        Returns the final fitting cost.
-
-        :type: `float`
-        """
-        raise ValueError('costs not implemented yet.')
-
-    @property
-    def initial_cost(self):
-        r"""
-        Returns the initial fitting cost.
-
-        :type: `float`
-        """
-        raise ValueError('costs not implemented yet.')
-
-    @property
-    def warped_images(self):
-        r"""
-        The list containing the warped images obtained at each fitting
-        iteration.
-
-        :type: `list` of :map:`Image` or subclass
-        """
-        mask = self.fitting_results[-1].fitter.template.mask
-        transform = self.fitting_results[-1].fitter.transform
-        warped_images = []
-        for s in self.shapes():
-            transform.set_target(s)
-            image = self.image.warp_to_mask(mask, transform)
-            warped_images.append(image)
-
-        return warped_images
-
-    @property
-    def error_images(self):
-        r"""
-        The list containing the error images obtained at each fitting
-        iteration.
-
-        :type: `list` of :map:`Image` or subclass
-        """
-        return list(chain(
-            *[f.error_images for f in self.fitting_results]))
-
 
-class SerializableMultilevelFittingResult(FittingResult):
+# TODO: document me!
+class SerializableIterativeResult(IterativeResult):
     r"""
-    Designed to allow the fitting results to be easily serializable. In
-    comparison to the other fitting result objects, the serializable fitting
-    results contain a much stricter set of data. For example, the major data
-    components of a serializable fitting result are the fitted shapes, the
-    parameters and the fitted image.
-
-    Parameters
-    -----------
-    image : :map:`Image`
-        The fitted image.
-    shapes : `list` of :map:`PointCloud`
-        The list of fitted shapes per iteration of the fitting procedure.
-    gt_shape : :map:`PointCloud`
-        The ground truth shape associated to the image.
-    n_levels : `int`
-        Number of levels within the multilevel fitter.
-    downscale : `int`
-        Scale of downscaling applied to the image.
-    n_iters : `int`
-        Number of iterations the fitter performed.
     """
-    def __init__(self, image, fitting_results, gt_shape, n_levels,
-                 downscale, n_iters, affine_correction):
-        FittingResult.__init__(self, image, gt_shape=gt_shape)
-        self.fitting_results = fitting_results
-        self.n_levels = n_levels
+    def __init__(self, image, shapes, n_iters, gt_shape=None):
+        self.image = image
+        self._gt_shape = gt_shape
+        self._shapes = shapes
         self._n_iters = n_iters
-        self.downscale = downscale
-        self.affine_correction = affine_correction
 
     @property
     def n_iters(self):
         return self._n_iters
 
     @property
-    def final_shape(self):
-        return self.shapes[-1]
+    def shapes(self):
+        return self._shapes
 
     @property
     def initial_shape(self):
-        return self.shapes[0]
+        return self._shapes[0]
 
     @property
-    def shapes(self):
-        return _rescale_shapes_to_reference(self.fitting_results, self.n_levels,
-                                            self.downscale,
-                                            self.affine_correction)
+    def final_shape(self):
+        return self._shapes[-1]
 
 
-def _rescale_shapes_to_reference(fitting_results, n_levels, downscale,
-                                 affine_correction):
-    n = n_levels - 1
+# TODO: Document me!
+def _rescale_shapes_to_reference(algorithm_results, scales, affine_correction):
+    r"""
+    """
     shapes = []
-    for j, f in enumerate(fitting_results):
-        transform = Scale(downscale ** (n - j), f.final_shape.n_dims)
-        for t in f.shapes:
-            t = transform.apply(t)
-            shapes.append(affine_correction.apply(t))
+    for j, (alg, scale) in enumerate(zip(algorithm_results, scales)):
+        transform = Scale(scales[-1]/scale, alg.final_shape.n_dims)
+        for shape in alg.shapes:
+            shape = transform.apply(shape)
+            shapes.append(affine_correction.apply(shape))
     return shapes
 
 
-def compute_error(target, ground_truth, error_type='me_norm'):
+# TODO: Document me!
+def pointcloud_to_points(wrapped):
+
+    @wraps(wrapped)
+    def wrapper(*args, **kwargs):
+        args = list(args)
+        for index, arg in enumerate(args):
+            if isinstance(arg, PointCloud):
+                args[index] = arg.points
+        for key in kwargs:
+            if isinstance(kwargs[key], PointCloud):
+                kwargs[key] = kwargs[key].points
+        return wrapped(*args, **kwargs)
+    return wrapper
+
+
+# TODO: Document me!
+@pointcloud_to_points
+def compute_root_mean_square_error(shape, gt_shape):
     r"""
     """
-    gt_points = ground_truth.points
-    target_points = target.points
+    return np.sqrt(np.mean((shape.flatten() - gt_shape.flatten()) ** 2))
 
-    if error_type == 'me_norm':
-        return _compute_me_norm(target_points, gt_points)
-    elif error_type == 'me':
-        return _compute_me(target_points, gt_points)
-    elif error_type == 'rmse':
-        return _compute_rmse(target_points, gt_points)
-    else:
-        raise ValueError("Unknown error_type string selected. Valid options "
-                         "are: me_norm, me, rmse'")
 
-
-def _compute_me(target, ground_truth):
+# TODO: Document me!
+@pointcloud_to_points
+def compute_point_to_point_error(shape, gt_shape):
     r"""
     """
-    return np.mean(np.sqrt(np.sum((target - ground_truth) ** 2, axis=-1)))
+    return np.mean(np.sqrt(np.sum((shape - gt_shape) ** 2, axis=-1)))
 
 
-def _compute_rmse(target, ground_truth):
+# TODO: Document me!
+@pointcloud_to_points
+def compute_normalise_root_mean_square_error(shape, gt_shape, norm_shape=None):
     r"""
     """
-    return np.sqrt(np.mean((target.flatten() - ground_truth.flatten()) ** 2))
+    if norm_shape is None:
+        norm_shape = gt_shape
+    normalizer = np.mean(np.max(norm_shape, axis=0) -
+                         np.min(norm_shape, axis=0))
+    return compute_root_mean_square_error(shape, gt_shape) / normalizer
 
 
-def _compute_me_norm(target, ground_truth):
+# TODO: Document me!
+@pointcloud_to_points
+def compute_normalise_point_to_point_error(shape, gt_shape, norm_shape=None):
     r"""
     """
-    normalizer = np.mean(np.max(ground_truth, axis=0) -
-                         np.min(ground_truth, axis=0))
-    return _compute_me(target, ground_truth) / normalizer
+    if norm_shape is None:
+        norm_shape = gt_shape
+    normalizer = np.mean(np.max(norm_shape, axis=0) -
+                         np.min(norm_shape, axis=0))
+    return compute_point_to_point_error(shape, gt_shape) / normalizer
 
 
+# TODO: Document me!
 def compute_cumulative_error(errors, x_axis):
     r"""
     """
@@ -995,8 +674,8 @@ def compute_cumulative_error(errors, x_axis):
     return [np.count_nonzero([errors <= x]) / n_errors for x in x_axis]
 
 
-def plot_cumulative_error_distribution(errors, error_range=None, figure_id=None,
-                                       new_figure=False,
+def plot_cumulative_error_distribution(errors, error_range=None,
+                                       figure_id=None, new_figure=False,
                                        title='Cumulative Error Distribution',
                                        x_label='Normalized Point-to-Point Error',
                                        y_label='Images Proportion',
@@ -1255,4 +934,3 @@ def plot_cumulative_error_distribution(errors, error_range=None, figure_id=None,
         axes_font_style=axes_font_style, axes_font_weight=axes_font_weight,
         figure_size=figure_size, render_grid=render_grid,
         grid_line_style=grid_line_style, grid_line_width=grid_line_width)
-
diff --git a/menpofit/sdm/__init__.py b/menpofit/sdm/__init__.py
index 9661b9d..8a616c7 100644
--- a/menpofit/sdm/__init__.py
+++ b/menpofit/sdm/__init__.py
@@ -1,2 +1,2 @@
-from .trainer import SDMTrainer, SDAAMTrainer, SDCLMTrainer
-from .fitter import SDMFitter, SDAAMFitter, SDCLMFitter
+from .algorithm import Newton, GaussNewton
+from .fitter import SupervisedDescentFitter, SDM
diff --git a/menpofit/sdm/algorithm.py b/menpofit/sdm/algorithm.py
new file mode 100644
index 0000000..37cab04
--- /dev/null
+++ b/menpofit/sdm/algorithm.py
@@ -0,0 +1,250 @@
+from __future__ import division
+from functools import partial
+import numpy as np
+from menpo.feature import no_op
+from menpo.visualize import print_dynamic
+from menpofit.visualize import print_progress
+from menpofit.result import (
+    NonParametricAlgorithmResult, compute_normalise_point_to_point_error)
+from menpofit.math import IRLRegression, IIRLRegression
+
+
+# TODO: document me!
+class SupervisedDescentAlgorithm(object):
+    r"""
+    """
+
+    def __init__(self):
+        self.regressors = []
+
+    def train(self, images, gt_shapes, current_shapes, prefix='',
+              verbose=False):
+        return self._train(images, gt_shapes, current_shapes, increment=False,
+                           prefix=prefix, verbose=verbose)
+
+    def increment(self, images, gt_shapes, current_shapes, prefix='',
+                  verbose=False):
+        return self._train(images, gt_shapes, current_shapes, increment=True,
+                           prefix=prefix, verbose=verbose)
+
+    def _train(self, images, gt_shapes, current_shapes, increment=False,
+               prefix='', verbose=False):
+
+        if not increment:
+            # Reset the regressors
+            self.regressors = []
+
+        n_perturbations = len(current_shapes[0])
+        template_shape = gt_shapes[0]
+
+        # obtain delta_x and gt_x
+        delta_x, gt_x = obtain_delta_x(gt_shapes, current_shapes)
+
+        # Cascaded Regression loop
+        for k in range(self.n_iterations):
+            # generate regression data
+            features = features_per_image(
+                images, current_shapes, self.patch_size, self.patch_features,
+                prefix='{}(Iteration {}) - '.format(prefix, k),
+                verbose=verbose)
+
+            if verbose:
+                print_dynamic('{}(Iteration {}) - Performing regression'.format(
+                    prefix, k))
+
+            if not increment:
+                r = self._regressor_cls()
+                r.train(features, delta_x)
+                self.regressors.append(r)
+            else:
+                self.regressors[k].increment(features, delta_x)
+
+            # Estimate delta_points
+            estimated_delta_x = self.regressors[k].predict(features)
+            if verbose:
+                self._print_regression_info(template_shape, gt_shapes,
+                                            n_perturbations, delta_x,
+                                            estimated_delta_x, k,
+                                            prefix=prefix)
+
+            j = 0
+            for shapes in current_shapes:
+                for s in shapes:
+                    # update current x
+                    current_x = s.as_vector() + estimated_delta_x[j]
+                    # update current shape inplace
+                    s.from_vector_inplace(current_x)
+                    # update delta_x
+                    delta_x[j] = gt_x[j] - current_x
+                    # increase index
+                    j += 1
+
+        return current_shapes
+
+    def run(self, image, initial_shape, gt_shape=None, **kwargs):
+        # set current shape and initialize list of shapes
+        current_shape = initial_shape
+        shapes = [initial_shape]
+
+        # Cascaded Regression loop
+        for r in self.regressors:
+            # compute regression features
+            features = features_per_patch(image, current_shape, self.patch_size,
+                                          self.patch_features)
+
+            # solve for increments on the shape vector
+            dx = r.predict(features)
+
+            # update current shape
+            current_shape = current_shape.from_vector(
+                current_shape.as_vector() + dx)
+            shapes.append(current_shape)
+
+        # return algorithm result
+        return NonParametricAlgorithmResult(image, shapes,
+                                            gt_shape=gt_shape)
+
+    def _print_regression_info(self, template_shape, gt_shapes, n_perturbations,
+                               delta_x, estimated_delta_x, level_index,
+                               prefix=''):
+        print_dynamic('{}(Iteration {}) - Calculating errors'.format(
+            prefix, level_index))
+        errors = []
+        for j, (dx, edx) in enumerate(zip(delta_x, estimated_delta_x)):
+            s1 = template_shape.from_vector(dx)
+            s2 = template_shape.from_vector(edx)
+            gt_s = gt_shapes[np.floor_divide(j, n_perturbations)]
+            errors.append(self._compute_error(s1, s2, gt_s))
+        mean = np.mean(errors)
+        std = np.std(errors)
+        median = np.median(errors)
+        print_dynamic('{}(Iteration {}) - Training error -> '
+                      'mean: {:.4f}, std: {:.4f}, median: {:.4f}.\n'.
+                      format(prefix, level_index, mean, std, median))
+
+
+# TODO: document me!
+class Newton(SupervisedDescentAlgorithm):
+    r"""
+    """
+    def __init__(self, patch_features=no_op, patch_size=(17, 17),
+                 n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, bias=True):
+        super(Newton, self).__init__()
+
+        self._regressor_cls = partial(IRLRegression, alpha=alpha, bias=bias)
+        self.patch_size = patch_size
+        self.patch_features = patch_features
+        self.n_iterations = n_iterations
+        self._compute_error = compute_error
+        self.eps = eps
+
+
+# TODO: document me!
+class GaussNewton(SupervisedDescentAlgorithm):
+    r"""
+    """
+    def __init__(self, patch_features=no_op, patch_size=(17, 17), n_iterations=3,
+                 compute_error=compute_normalise_point_to_point_error,
+                 eps=10**-5, alpha=0, bias=True, alpha2=0):
+        super(GaussNewton, self).__init__()
+
+        self._regressor_cls = partial(IIRLRegression, alpha=alpha, bias=bias,
+                                      alpha2=alpha2)
+        self.patch_size = patch_size
+        self.patch_features = patch_features
+        self.n_iterations = n_iterations
+        self._compute_error = compute_error
+        self.eps = eps
+
+
+# TODO: document me!
+def features_per_patch(image, shape, patch_size, features_callable):
+    """r
+    """
+    patches = image.extract_patches(shape, patch_size=patch_size,
+                                    as_single_array=True)
+
+    patch_features = [features_callable(p[0]).ravel() for p in patches]
+    return np.hstack(patch_features)
+
+
+# TODO: document me!
+def features_per_shape(image, shapes, patch_size, features_callable):
+    """r
+    """
+    patch_features = [features_per_patch(image, s, patch_size,
+                                         features_callable)
+                      for s in shapes]
+
+    return np.vstack(patch_features)
+
+
+# TODO: document me!
+def features_per_image(images, shapes, patch_size, features_callable,
+                       prefix='', verbose=False):
+    """r
+    """
+    wrap = partial(print_progress,
+                   prefix='{}Extracting patches'.format(prefix),
+                   end_with_newline=not prefix, verbose=verbose)
+
+    patch_features = [features_per_shape(i, shapes[j], patch_size,
+                                         features_callable)
+                      for j, i in enumerate(wrap(images))]
+    return np.vstack(patch_features)
+
+
+def compute_delta_x(gt_shape, current_shapes):
+    r"""
+    """
+    n_x = gt_shape.n_parameters
+    n_current_shapes = len(current_shapes)
+
+    # initialize ground truth and delta shape vectors
+    gt_x = np.empty((n_current_shapes, n_x))
+    delta_x = np.empty((n_current_shapes, n_x))
+
+    for j, s in enumerate(current_shapes):
+        # compute ground truth shape vector
+        gt_x[j] = gt_shape.as_vector()
+        # compute delta shape vector
+        delta_x[j] = gt_x[j] - s.as_vector()
+
+    return delta_x, gt_x
+
+
+def obtain_delta_x(gt_shapes, current_shapes):
+    r"""
+    """
+    n_x = gt_shapes[0].n_parameters
+    n_gt_shapes = len(gt_shapes)
+    n_current_shapes = len(current_shapes[0])
+
+    # initialize current, ground truth and delta parameters
+    gt_x = np.empty((n_gt_shapes, n_current_shapes, n_x))
+    delta_x = np.empty((n_gt_shapes, n_current_shapes, n_x))
+
+    # obtain ground truth points and compute delta points
+    for j, (gt_s, shapes) in enumerate(zip(gt_shapes, current_shapes)):
+        # compute ground truth par
+        delta_x[j], gt_x[j] = compute_delta_x(gt_s, shapes)
+
+    return delta_x.reshape((-1, n_x)), gt_x.reshape((-1, n_x))
+
+
+def compute_features_info(image, shape, features_callable,
+                          patch_size=(17, 17)):
+    # TODO: include offsets support?
+    patches = image.extract_patches(shape, patch_size=patch_size,
+                                    as_single_array=True)
+
+    # TODO: include offsets support?
+    features_patch_size = features_callable(patches[0, 0]).shape
+    features_patch_length = np.prod(features_patch_size)
+    features_shape = patches.shape[:1] + features_patch_size
+    features_length = np.prod(features_shape)
+
+    return (features_patch_size, features_patch_length,
+            features_shape, features_length)
diff --git a/menpofit/sdm/fitter.py b/menpofit/sdm/fitter.py
index 3993c75..cd81ca7 100644
--- a/menpofit/sdm/fitter.py
+++ b/menpofit/sdm/fitter.py
@@ -1,304 +1,320 @@
+from __future__ import division
 import numpy as np
-from menpo.image import Image
-
-from menpofit.base import name_of_callable
-from menpofit.aam.fitter import AAMFitter
-from menpofit.clm.fitter import CLMFitter
-from menpofit.fitter import MultilevelFitter
-
-
-class SDFitter(MultilevelFitter):
+from functools import partial
+import warnings
+from menpo.transform import Scale
+from menpo.feature import no_op
+from menpofit.visualize import print_progress
+from menpofit.base import batch, name_of_callable
+from menpofit.builder import (scale_images, rescale_images_to_reference_shape,
+                              compute_reference_shape, MenpoFitBuilderWarning,
+                              compute_features)
+from menpofit.fitter import (MultiFitter, noisy_shape_from_bounding_box,
+                             align_shape_with_bounding_box)
+from menpofit.result import MultiFitterResult
+import menpofit.checks as checks
+from .algorithm import Newton
+
+
+# TODO: document me!
+class SupervisedDescentFitter(MultiFitter):
     r"""
-    Abstract Supervised Descent Fitter.
     """
-    def _set_up(self):
-        r"""
-        Sets up the SD fitter object.
-        """
-
-    def fit(self, image, initial_shape, max_iters=None, gt_shape=None,
-            **kwargs):
-        r"""
-        Fits a single image.
-
-        Parameters
-        -----------
-        image : :map:`MaskedImage`
-            The image to be fitted.
-        initial_shape : :map:`PointCloud`
-            The initial shape estimate from which the fitting procedure
-            will start.
-        max_iters :  int  or `list`, optional
-            The maximum number of iterations.
-
-            If `int`, then this will be the overall maximum number of iterations
-            for all the pyramidal levels.
-
-            If `list`, then a maximum number of iterations is specified for each
-            pyramidal level.
-
-        gt_shape : :map:`PointCloud`
-            The ground truth shape of the image.
-
-        **kwargs : `dict`
-            optional arguments to be passed through.
-
-        Returns
-        -------
-        fitting_list : :map:`FittingResultList`
-            A fitting result object.
-        """
-        if max_iters is None:
-            max_iters = self.n_levels
-        return MultilevelFitter.fit(self, image, initial_shape,
-                                    max_iters=max_iters, gt_shape=gt_shape,
-                                    **kwargs)
-
-
-class SDMFitter(SDFitter):
-    r"""
-    Supervised Descent Method.
-
-    Parameters
-    -----------
-    regressors : :map:`RegressorTrainer`
-        The trained regressors.
-
-    n_training_images : `int`
-        The number of images that were used to train the SDM fitter. It is
-        only used for informational reasons.
-
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-    reference_shape : :map:`PointCloud`
-        The reference shape that was used to resize all training images to a
-        consistent object size.
-
-    downscale : `float`
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(n_levels)
-
-    References
-    ----------
-    .. [XiongD13] Supervised Descent Method and its Applications to
-       Face Alignment
-       Xuehan Xiong and Fernando De la Torre Fernando
-       IEEE International Conference on Computer Vision and Pattern Recognition
-       May, 2013
-    """
-    def __init__(self, regressors, n_training_images, features,
-                 reference_shape, downscale):
-        self._fitters = regressors
-        self._features = features
-        self._reference_shape = reference_shape
-        self._downscale = downscale
-        self._n_training_images = n_training_images
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns a string containing the algorithm used from the SDM family.
-
-        : str
-        """
-        return 'SDM-' + self._fitters[0].algorithm
-
-    @property
-    def reference_shape(self):
-        r"""
-        The reference shape used during training.
-
-        :type: :map:`PointCloud`
-        """
-        return self._reference_shape
-
-    @property
-    def features(self):
+    def __init__(self, images, group=None, bounding_box_group=None,
+                 reference_shape=None, sd_algorithm_cls=Newton,
+                 holistic_features=no_op, patch_features=no_op,
+                 patch_size=(17, 17), diagonal=None, scales=(0.5, 1.0),
+                 n_iterations=6, n_perturbations=30,
+                 perturb_from_bounding_box=noisy_shape_from_bounding_box,
+                 batch_size=None, verbose=False):
+        # check parameters
+        checks.check_diagonal(diagonal)
+        scales = checks.check_scales(scales)
+        n_scales = len(scales)
+        patch_features = checks.check_features(patch_features, n_scales)
+        holistic_features = checks.check_features(holistic_features, n_scales)
+        patch_size = checks.check_patch_size(patch_size, n_scales)
+        # set parameters
+        self.algorithms = []
+        self.reference_shape = reference_shape
+        self._sd_algorithm_cls = sd_algorithm_cls
+        self.holistic_features = holistic_features
+        self.patch_features = patch_features
+        self.patch_size = patch_size
+        self.diagonal = diagonal
+        self.scales = scales
+        self.n_perturbations = n_perturbations
+        self.n_iterations = checks.check_max_iters(n_iterations, n_scales)
+        self._perturb_from_bounding_box = perturb_from_bounding_box
+        # set up algorithms
+        self._setup_algorithms()
+
+        # Now, train the model!
+        self._train(images,increment=False,  group=group,
+                    bounding_box_group=bounding_box_group, verbose=verbose,
+                    batch_size=batch_size)
+
+    def _setup_algorithms(self):
+        for j in range(self.n_scales):
+            self.algorithms.append(self._sd_algorithm_cls(
+                patch_features=self.patch_features[j],
+                patch_size=self.patch_size[j],
+                n_iterations=self.n_iterations[j]))
+
+    def _train(self, images, increment=False, group=None,
+               bounding_box_group=None, verbose=False, batch_size=None):
         r"""
-        The feature type per pyramid level. Note that they are stored from
-        lowest to highest level resolution.
-
-        :type: `list`
         """
-        return self._features
-
-    @property
-    def n_levels(self):
-        r"""
-        The number of pyramidal levels used during training.
-
-        : int
-        """
-        return len(self._fitters)
-
-    @property
-    def downscale(self):
-        r"""
-        The downscale per pyramidal level used during building the AAM.
-        The scale factor is: (downscale ** k) for k in range(n_levels)
-
-        :type: `float`
-        """
-        return self._downscale
-
-    def __str__(self):
-        out = "Supervised Descent Method\n" \
-              " - Non-Parametric '{}' Regressor\n" \
-              " - {} training images.\n".format(
-            name_of_callable(self._fitters[0].regressor),
-            self._n_training_images)
-        # small strings about number of channels, channels string and downscale
-        down_str = []
-        for j in range(self.n_levels):
-            if j == self.n_levels - 1:
-                down_str.append('(no downscale)')
-            else:
-                down_str.append('(downscale by {})'.format(
-                    self.downscale**(self.n_levels - j - 1)))
-        temp_img = Image(image_data=np.random.rand(40, 40))
-        if self.pyramid_on_features:
-            temp = self.features(temp_img)
-            n_channels = [temp.n_channels] * self.n_levels
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        # If batch_size is not None, then we may have a generator, else we
+        # assume we have a list.
+        if batch_size is not None:
+            # Create a generator of fixed sized batches. Will still work even
+            # on an infinite list.
+            image_batches = batch(images, batch_size)
         else:
-            n_channels = []
-            for j in range(self.n_levels):
-                temp = self.features[j](temp_img)
-                n_channels.append(temp.n_channels)
-        # string about features and channels
-        if self.pyramid_on_features:
-            feat_str = "- Feature is {} with ".format(
-                name_of_callable(self.features))
-            if n_channels[0] == 1:
-                ch_str = ["channel"]
-            else:
-                ch_str = ["channels"]
+            image_batches = [list(images)]
+
+        for k, image_batch in enumerate(image_batches):
+            if k == 0:
+                if self.reference_shape is None:
+                    # If no reference shape was given, use the mean of the first
+                    # batch
+                    if batch_size is not None:
+                        warnings.warn('No reference shape was provided. The '
+                                      'mean of the first batch will be the '
+                                      'reference shape. If the batch mean is '
+                                      'not representative of the true mean, '
+                                      'this may cause issues.',
+                                      MenpoFitBuilderWarning)
+                    self.reference_shape = compute_reference_shape(
+                        [i.landmarks[group].lms for i in image_batch],
+                        self.diagonal, verbose=verbose)
+            # We set landmarks on the images to archive the perturbations, so
+            # when the default 'None' is used, we need to grab the actual
+            # label to sort out the ambiguity
+            if group is None:
+                group = image_batch[0].landmarks.group_labels[0]
+
+            # After the first batch, we are incrementing the model
+            if k > 0:
+                increment = True
+
+            if verbose:
+                print('Computing batch {}'.format(k))
+
+            # Train each batch
+            self._train_batch(
+                image_batch, increment=increment, group=group,
+                bounding_box_group=bounding_box_group,
+                verbose=verbose)
+
+    def _train_batch(self, image_batch, increment=False, group=None,
+                     bounding_box_group=None, verbose=False):
+        # Rescale to existing reference shape
+        image_batch = rescale_images_to_reference_shape(
+            image_batch, group, self.reference_shape,
+            verbose=verbose)
+
+        # No bounding box is given, so we will use the ground truth box
+        if bounding_box_group is None:
+            # It's important to use bb_group for batching, so that we
+            # generate ground truth bounding boxes for each batch, every
+            # time
+            bb_group = '__gt_bb_'
+            for i in image_batch:
+                gt_s = i.landmarks[group].lms
+                perturb_bbox_group = bb_group + '0'
+                i.landmarks[perturb_bbox_group] = gt_s.bounding_box()
         else:
-            feat_str = []
-            ch_str = []
-            for j in range(self.n_levels):
-                if isinstance(self.features[j], str):
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j]))
-                elif self.features[j] is None:
-                    feat_str.append("- No features extracted. ")
-                else:
-                    feat_str.append("- Feature is {} with ".format(
-                        self.features[j].__name__))
-                if n_channels[j] == 1:
-                    ch_str.append("channel")
+            bb_group = bounding_box_group
+
+        # Find all bounding boxes on the images with the given bounding
+        # box key
+        all_bb_keys = list(image_batch[0].landmarks.keys_matching(
+            '*{}*'.format(bb_group)))
+        n_perturbations = len(all_bb_keys)
+
+        # If there is only one example bounding box, then we will generate
+        # more perturbations based on the bounding box.
+        if n_perturbations == 1:
+            msg = '- Generating {} new initial bounding boxes ' \
+                  'per image'.format(self.n_perturbations)
+            wrap = partial(print_progress, prefix=msg, verbose=verbose)
+
+            for i in wrap(image_batch):
+                # We assume that the first bounding box is a valid
+                # perturbation thus create n_perturbations - 1 new bounding
+                # boxes
+                for j in range(1, self.n_perturbations):
+                    gt_s = i.landmarks[group].lms.bounding_box()
+                    bb = i.landmarks[all_bb_keys[0]].lms
+
+                    # This is customizable by passing in the correct method
+                    p_s = self._perturb_from_bounding_box(gt_s, bb)
+                    perturb_bbox_group = '{}_{}'.format(bb_group, j)
+                    i.landmarks[perturb_bbox_group] = p_s
+        elif n_perturbations != self.n_perturbations:
+            warnings.warn('The original value of n_perturbation {} '
+                          'will be reset to {} in order to agree with '
+                          'the provided bounding_box_group.'.
+                          format(self.n_perturbations, n_perturbations),
+                          MenpoFitBuilderWarning)
+            self.n_perturbations = n_perturbations
+
+        # Re-grab all the bounding box keys for iterating over when
+        # calculating perturbations
+        all_bb_keys = list(image_batch[0].landmarks.keys_matching(
+            '*{}*'.format(bb_group)))
+
+        # for each scale (low --> high)
+        current_shapes = []
+        for j in range(self.n_scales):
+            if verbose:
+                if len(self.scales) > 1:
+                    scale_prefix = '  - Scale {}: '.format(j)
                 else:
-                    ch_str.append("channels")
-        if self.n_levels > 1:
-            out = "{} - Gaussian pyramid with {} levels and downscale " \
-                  "factor of {}.\n".format(out, self.n_levels,
-                                           self.downscale)
-            if self.pyramid_on_features:
-                out = "{}   - Pyramid was applied on feature space.\n   " \
-                      "{}{} {} per image.\n".format(out, feat_str,
-                                                    n_channels[0], ch_str[0])
+                    scale_prefix = '  - '
             else:
-                out = "{}   - Features were extracted at each pyramid " \
-                      "level.\n".format(out)
-                for i in range(self.n_levels - 1, -1, -1):
-                    out = "{}   - Level {} {}: \n     {}{} {} per " \
-                          "image.\n".format(
-                        out, self.n_levels - i, down_str[i], feat_str[i],
-                        n_channels[i], ch_str[i])
-        else:
-            if self.pyramid_on_features:
-                feat_str = [feat_str]
-            out = "{0} - No pyramid used:\n   {1}{2} {3} per image.\n".format(
-                out, feat_str[0], n_channels[0], ch_str[0])
-        return out
-
-
-class SDAAMFitter(AAMFitter, SDFitter):
-    r"""
-    Supervised Descent Fitter for AAMs.
-
-    Parameters
-    -----------
-    aam : :map:`AAM`
-        The Active Appearance Model to be used.
-
-    regressors : :map:``RegressorTrainer`
-        The trained regressors.
-
-    n_training_images : `int`
-        The number of training images used to train the SDM fitter.
-    """
-    def __init__(self, aam, regressors, n_training_images):
-        super(SDAAMFitter, self).__init__(aam)
-        self._fitters = regressors
-        self._n_training_images = n_training_images
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns a string containing the algorithm used from the SDM family.
-
-        :type: `string`
-        """
-        return 'SD-AAM-' + self._fitters[0].algorithm
-
-    def __str__(self):
-        return "{}Supervised Descent Method for AAMs:\n" \
-               " - Parametric '{}' Regressor\n" \
-               " - {} training images.\n".format(
-            self.aam.__str__(), name_of_callable(self._fitters[0].regressor),
-            self._n_training_images)
-
-
-class SDCLMFitter(CLMFitter, SDFitter):
-    r"""
-    Supervised Descent Fitter for CLMs.
-
-    Parameters
-    -----------
-    clm : :map:`CLM`
-        The Constrained Local Model to be used.
-
-    regressors : :map:`RegressorTrainer`
-        The trained regressors.
-
-    n_training_images : `int`
-        The number of training images used to train the SDM fitter.
-
-    References
-    ----------
-    .. [Asthana13] Robust Discriminative Response Map Fitting with Constrained
-       Local Models
-       A. Asthana, S. Zafeiriou, S. Cheng, M. Pantic.
-       IEEE Conference onComputer Vision and Pattern Recognition.
-       Portland, Oregon, USA, June 2013.
-    """
-    def __init__(self, clm, regressors, n_training_images):
-        super(SDCLMFitter, self).__init__(clm)
-        self._fitters = regressors
-        self._n_training_images = n_training_images
-
-    @property
-    def algorithm(self):
-        r"""
-        Returns a string containing the algorithm used from the SDM family.
-
-        :type: `string`
-        """
-        return 'SD-CLM-' + self._fitters[0].algorithm
+                scale_prefix = None
+
+            # Handle holistic features
+            if j == 0 and self.holistic_features[j] == no_op:
+                # Saves a lot of memory
+                feature_images = image_batch
+            elif j == 0 or self.holistic_features[j] is not self.holistic_features[j - 1]:
+                # Compute features only if this is the first pass through
+                # the loop or the features at this scale are different from
+                # the features at the previous scale
+                feature_images = compute_features(image_batch,
+                                                  self.holistic_features[j],
+                                                  prefix=scale_prefix,
+                                                  verbose=verbose)
+            # handle scales
+            if self.scales[j] != 1:
+                # Scale feature images only if scale is different than 1
+                scaled_images = scale_images(feature_images, self.scales[j],
+                                             prefix=scale_prefix,
+                                             verbose=verbose)
+            else:
+                scaled_images = feature_images
+
+            # Extract scaled ground truth shapes for current scale
+            scaled_shapes = [i.landmarks[group].lms for i in scaled_images]
+
+            if j == 0:
+                msg = '{}Generating {} perturbations per image'.format(
+                    scale_prefix, self.n_perturbations)
+                wrap = partial(print_progress, prefix=msg,
+                               end_with_newline=False, verbose=verbose)
+
+                # Extract perturbations at the very bottom level
+                for i in wrap(scaled_images):
+                    c_shapes = []
+                    for perturb_bbox_group in all_bb_keys:
+                        bbox = i.landmarks[perturb_bbox_group].lms
+                        c_s = align_shape_with_bounding_box(
+                            self.reference_shape, bbox)
+                        c_shapes.append(c_s)
+                    current_shapes.append(c_shapes)
+
+            # train supervised descent algorithm
+            if not increment:
+                current_shapes = self.algorithms[j].train(
+                    scaled_images, scaled_shapes, current_shapes,
+                    prefix=scale_prefix, verbose=verbose)
+            else:
+                current_shapes = self.algorithms[j].increment(
+                    scaled_images, scaled_shapes, current_shapes,
+                    prefix=scale_prefix, verbose=verbose)
+
+            # Scale current shapes to next resolution, don't bother
+            # scaling final level
+            if j != (self.n_scales - 1):
+                transform = Scale(self.scales[j + 1] / self.scales[j],
+                                  n_dims=2)
+                for image_shapes in current_shapes:
+                    for shape in image_shapes:
+                        transform.apply_inplace(shape)
+
+    def increment(self, images, group=None, bounding_box_group=None,
+                  verbose=False, batch_size=None):
+        return self._train(images, group=group,
+                           bounding_box_group=bounding_box_group,
+                           verbose=verbose,
+                           increment=True, batch_size=batch_size)
+
+    def perturb_from_bounding_box(self, bounding_box):
+        return self._perturb_from_bounding_box(self.reference_shape,
+                                               bounding_box)
+
+    def _fitter_result(self, image, algorithm_results, affine_correction,
+                       gt_shape=None):
+        return MultiFitterResult(image, self, algorithm_results,
+                                 affine_correction, gt_shape=gt_shape)
 
     def __str__(self):
-        return "{}Supervised Descent Method for CLMs:\n" \
-               " - Parametric '{}' Regressor\n" \
-               " - {} training images.\n".format(
-            self.clm.__str__(), name_of_callable(self._fitters[0].regressor),
-            self._n_training_images)
+        if self.diagonal is not None:
+            diagonal = self.diagonal
+        else:
+            y, x = self.reference_shape.range()
+            diagonal = np.sqrt(x ** 2 + y ** 2)
+        is_custom_perturb_func = (self._perturb_from_bounding_box !=
+                                  noisy_shape_from_bounding_box)
+        regressor_cls = self.algorithms[0]._regressor_cls
+
+        # Compute scale info strings
+        scales_info = []
+        lvl_str_tmplt = r"""  - Scale {}
+   - {} iterations
+   - Patch size: {}
+   - Holistic feature: {}
+   - Patch feature: {}"""
+        for k, s in enumerate(self.scales):
+            scales_info.append(lvl_str_tmplt.format(
+                s, self.n_iterations[k], self.patch_size[k],
+                name_of_callable(self.holistic_features[k]),
+                name_of_callable(self.patch_features[k])))
+        scales_info = '\n'.join(scales_info)
+
+        cls_str = r"""Supervised Descent Method
+ - Regression performed using the {reg_alg} algorithm
+   - Regression class: {reg_cls}
+ - Perturbations generated per shape: {n_perturbations}
+ - Images scaled to diagonal: {diagonal:.2f}
+ - Custom perturbation scheme used: {is_custom_perturb_func}
+ - Scales: {scales}
+{scales_info}
+""".format(
+            reg_alg=name_of_callable(self._sd_algorithm_cls),
+            reg_cls=name_of_callable(regressor_cls),
+            n_perturbations=self.n_perturbations,
+            diagonal=diagonal,
+            is_custom_perturb_func=is_custom_perturb_func,
+            scales=self.scales,
+            scales_info=scales_info)
+        return cls_str
+
+
+# Aliases for common combinations of supervised descent fitting
+SDM = partial(SupervisedDescentFitter, sd_algorithm_cls=Newton)
+
+class RegularizedSDM(SupervisedDescentFitter):
+
+    def __init__(self, images, group=None, bounding_box_group=None,
+                 alpha=1.0, reference_shape=None,
+                 holistic_features=no_op, patch_features=no_op,
+                 patch_size=(17, 17), diagonal=None, scales=(0.5, 1.0),
+                 n_iterations=6, n_perturbations=30,
+                 perturb_from_bounding_box=noisy_shape_from_bounding_box,
+                 batch_size=None, verbose=False):
+        super(RegularizedSDM, self).__init__(
+            images, group=group,  bounding_box_group=bounding_box_group,
+            reference_shape=reference_shape,
+            sd_algorithm_cls=partial(Newton, alpha=alpha),
+            holistic_features=holistic_features, patch_features=patch_features,
+            patch_size=patch_size, diagonal=diagonal, scales=scales,
+            n_iterations=n_iterations, n_perturbations=n_perturbations,
+            perturb_from_bounding_box=perturb_from_bounding_box,
+            batch_size=batch_size, verbose=verbose)
diff --git a/menpofit/sdm/trainer.py b/menpofit/sdm/trainer.py
deleted file mode 100644
index 83a9ec2..0000000
--- a/menpofit/sdm/trainer.py
+++ /dev/null
@@ -1,981 +0,0 @@
-from __future__ import division, print_function
-import abc
-import numpy as np
-from menpo.transform import Scale
-from menpo.shape import mean_pointcloud
-from menpo.feature import sparse_hog, no_op
-from menpofit.modelinstance import PDM, OrthoPDM
-from menpo.visualize import print_dynamic, progress_bar_str
-
-from menpofit import checks
-from menpofit.transform import (ModelDrivenTransform, OrthoMDTransform,
-                                DifferentiableAlignmentSimilarity)
-from menpofit.regression.trainer import (
-    NonParametricRegressorTrainer, ParametricRegressorTrainer,
-    SemiParametricClassifierBasedRegressorTrainer)
-from menpofit.regression.regressors import mlr
-from menpofit.regression.parametricfeatures import weights
-from menpofit.base import DeformableModel, create_pyramid
-from .fitter import SDMFitter, SDAAMFitter, SDCLMFitter
-
-
-def check_regression_features(regression_features, n_levels):
-    try:
-        return checks.check_list_callables(regression_features, n_levels)
-    except ValueError:
-        raise ValueError("regression_features must be a callable or a list of "
-                         "{} callables".format(n_levels))
-
-
-def check_regression_type(regression_type, n_levels):
-    r"""
-    Checks the regression type (method) per level.
-
-    It must be a callable or a list of those from the family of
-    functions defined in :ref:`regression_functions`
-
-    Parameters
-    ----------
-    regression_type : `function` or list of those
-        The regression type to check.
-
-    n_levels : `int`
-        The number of pyramid levels.
-
-    Returns
-    -------
-    regression_type_list : `list`
-        A list of regression types that has length ``n_levels``.
-    """
-    try:
-        return checks.check_list_callables(regression_type, n_levels)
-    except ValueError:
-        raise ValueError("regression_type must be a callable or a list of "
-                         "{} callables".format(n_levels))
-
-
-def check_n_permutations(n_permutations):
-    if n_permutations < 1:
-        raise ValueError("n_permutations must be > 0")
-
-
-def apply_pyramid_on_images(generators, n_levels, verbose=False):
-    r"""
-    Exhausts the pyramid generators verbosely
-    """
-    all_images = []
-    for j in range(n_levels):
-
-        if verbose:
-            level_str = '- Apply pyramid: '
-            if n_levels > 1:
-                level_str = '- Apply pyramid: [Level {} - '.format(j + 1)
-
-        level_images = []
-        for c, g in enumerate(generators):
-            if verbose:
-                print_dynamic(
-                    '{}Computing feature space/rescaling - {}'.format(
-                        level_str,
-                        progress_bar_str((c + 1.) / len(generators),
-                                         show_bar=False)))
-            level_images.append(next(g))
-        all_images.append(level_images)
-    if verbose:
-        print_dynamic('- Apply pyramid: Done\n')
-    return all_images
-
-
-class SDTrainer(DeformableModel):
-    r"""
-    Mixin for Supervised Descent Trainers.
-
-    Parameters
-    ----------
-    regression_type : `callable`, or list of those, optional
-        If list of length ``n_levels``, then a regression type is defined per
-        level.
-
-        If not a list or a list with length ``1``, then the specified regression
-        type will be applied to all pyramid levels.
-
-        Examples of such callables can be found in :ref:`regression_callables`.
-    regression_features :`` None`` or `callable` or `[callable]`, optional
-        The features that are used during the regression.
-
-        If `list`, a regression feature is defined per level.
-
-        If not list or list with length ``1``, the specified regression feature
-        will be used for all levels.
-
-        Depending on the :map:`SDTrainer` object, this parameter can take
-        different types.
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-    n_levels : `int` > ``0``, optional
-        The number of multi-resolution pyramidal levels to be used.
-    downscale : `float` >= ``1``, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(n_levels)
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int` > ``0``, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-
-    Returns
-    -------
-    fitter : :map:`MultilevelFitter`
-        The fitter object.
-
-    Raises
-    ------
-    ValueError
-        ``regression_type`` must be a `function` or a list of those
-        containing ``1`` or ``n_levels`` elements
-    ValueError
-        n_levels must be `int` > ``0``
-    ValueError
-        ``downscale`` must be >= ``1``
-    ValueError
-        ``n_perturbations`` must be > 0
-    ValueError
-        ``features`` must be a `string` or a `function` or a list of those
-        containing ``1`` or ``n_levels`` elements
-    """
-    __metaclass__ = abc.ABCMeta
-
-    def __init__(self, regression_type=mlr, regression_features=None,
-                 features=no_op, n_levels=3, downscale=1.2, noise_std=0.04,
-                 rotation=False, n_perturbations=10):
-        features = checks.check_features(features, n_levels)
-        DeformableModel.__init__(self, features)
-
-        # general deformable model checks
-        checks.check_n_levels(n_levels)
-        checks.check_downscale(downscale)
-
-        # SDM specific checks
-        regression_type_list = check_regression_type(regression_type,
-                                                     n_levels)
-        regression_features = check_regression_features(regression_features,
-                                                        n_levels)
-        check_n_permutations(n_perturbations)
-
-        # store parameters
-        self.regression_type = regression_type_list
-        self.regression_features = regression_features
-        self.n_levels = n_levels
-        self.downscale = downscale
-        self.noise_std = noise_std
-        self.rotation = rotation
-        self.n_perturbations = n_perturbations
-
-    def train(self, images, group=None, label=None, verbose=False, **kwargs):
-        r"""
-        Trains a Supervised Descent Regressor given a list of landmarked
-        images.
-
-        Parameters
-        ----------
-        images: list of :map:`MaskedImage`
-            The set of landmarked images from which to build the SD.
-        group : `string`, optional
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-        label: `string`, optional
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-        verbose: `boolean`, optional
-            Flag that controls information and progress printing.
-        """
-        if verbose:
-            print_dynamic('- Computing reference shape')
-        self.reference_shape = self._compute_reference_shape(images, group,
-                                                             label)
-        # store number of training images
-        self.n_training_images = len(images)
-
-        # normalize the scaling of all images wrt the reference_shape size
-        self._rescale_reference_shape()
-        normalized_images = self._normalization_wrt_reference_shape(
-            images, group, label, self.reference_shape, verbose=verbose)
-
-        # create pyramid
-        generators = create_pyramid(normalized_images, self.n_levels,
-                                    self.downscale, self.features,
-                                    verbose=verbose)
-
-        # get feature images of all levels
-        images = apply_pyramid_on_images(generators, self.n_levels,
-                                         verbose=verbose)
-
-        # this .reverse sets the lowest resolution as the first level
-        images.reverse()
-
-        # extract the ground truth shapes
-        gt_shapes = [[i.landmarks[group][label] for i in img]
-                     for img in images]
-
-        # build the regressors
-        if verbose:
-            if self.n_levels > 1:
-                print_dynamic('- Building regressors for each of the {} '
-                              'pyramid levels\n'.format(self.n_levels))
-            else:
-                print_dynamic('- Building regressors\n')
-
-        regressors = []
-        # for each pyramid level (low --> high)
-        for j, (level_images, level_gt_shapes) in enumerate(zip(images,
-                                                                gt_shapes)):
-            if verbose:
-                if self.n_levels == 1:
-                    print_dynamic('\n')
-                elif self.n_levels > 1:
-                    print_dynamic('\nLevel {}:\n'.format(j + 1))
-
-            # build regressor
-            trainer = self._set_regressor_trainer(j)
-            if j == 0:
-                regressor = trainer.train(level_images, level_gt_shapes,
-                                          verbose=verbose, **kwargs)
-            else:
-                regressor = trainer.train(level_images, level_gt_shapes,
-                                          level_shapes, verbose=verbose,
-                                          **kwargs)
-
-            if verbose:
-                print_dynamic('- Perturbing shapes...')
-            level_shapes = trainer.perturb_shapes(gt_shapes[0])
-
-            regressors.append(regressor)
-            count = 0
-            total = len(regressors) * len(images[0]) * len(level_shapes[0])
-            for k, r in enumerate(regressors):
-
-                test_images = images[k]
-                test_gt_shapes = gt_shapes[k]
-
-                fitting_results = []
-                for (i, gt_s, level_s) in zip(test_images, test_gt_shapes,
-                                              level_shapes):
-                    fr_list = []
-                    for ls in level_s:
-                        parameters = r.get_parameters(ls)
-                        fr = r.fit(i, parameters)
-                        fr.gt_shape = gt_s
-                        fr_list.append(fr)
-                        count += 1
-
-                    fitting_results.append(fr_list)
-                    if verbose:
-                        print_dynamic('- Fitting shapes: {}'.format(
-                            progress_bar_str((count + 1.) / total,
-                                             show_bar=False)))
-
-                level_shapes = [[Scale(self.downscale,
-                                       n_dims=self.reference_shape.n_dims
-                                       ).apply(fr.final_shape)
-                                 for fr in fr_list]
-                                for fr_list in fitting_results]
-
-            if verbose:
-                print_dynamic('- Fitting shapes: computing mean error...')
-            mean_error = np.mean(np.array([fr.final_error()
-                                           for fr_list in fitting_results
-                                           for fr in fr_list]))
-            if verbose:
-                print_dynamic("- Fitting shapes: mean error "
-                              "is {0:.6f}.\n".format(mean_error))
-
-        return self._build_supervised_descent_fitter(regressors)
-
-    @classmethod
-    def _normalization_wrt_reference_shape(cls, images, group, label,
-                                           reference_shape, verbose=False):
-        r"""
-        Normalizes the images sizes with respect to the reference
-        shape (mean shape) scaling. This step is essential before building a
-        deformable model.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images from which to build the model.
-
-        group : `string`
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        reference_shape : :map:`PointCloud`
-            The reference shape that is used to resize all training images to
-            a consistent object size.
-
-        verbose: bool, optional
-            Flag that controls information and progress printing.
-
-        Returns
-        -------
-        normalized_images : :map:`MaskedImage` list
-            A list with the normalized images.
-        """
-        normalized_images = []
-        for c, i in enumerate(images):
-            if verbose:
-                print_dynamic('- Normalizing images size: {}'.format(
-                    progress_bar_str((c + 1.) / len(images),
-                                     show_bar=False)))
-            normalized_images.append(i.rescale_to_reference_shape(
-                reference_shape, group=group, label=label))
-
-        if verbose:
-            print_dynamic('- Normalizing images size: Done\n')
-        return normalized_images
-
-    @abc.abstractmethod
-    def _compute_reference_shape(self, images, group, label):
-        r"""
-        Function that computes the reference shape, given a set of images.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images.
-
-        group : `string`
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        Returns
-        -------
-        reference_shape : :map:`PointCloud`
-            The reference shape computed based on the given images shapes.
-        """
-        pass
-
-    def _rescale_reference_shape(self):
-        r"""
-        Function that rescales the reference shape w.r.t. to
-        ``normalization_diagonal`` parameter.
-        """
-        pass
-
-    @abc.abstractmethod
-    def _set_regressor_trainer(self, **kwargs):
-        r"""
-        Function that sets the regression object to be one from
-        :map:`RegressorTrainer`,
-        """
-        pass
-
-    @abc.abstractmethod
-    def _build_supervised_descent_fitter(self, regressors):
-        r"""
-        Builds an SDM fitter object.
-
-        Parameters
-        ----------
-        regressors : list of :map:`RegressorTrainer`
-            The list of regressors.
-
-        Returns
-        -------
-        fitter : :map:`SDMFitter`
-            The SDM fitter object.
-        """
-        pass
-
-
-class SDMTrainer(SDTrainer):
-    r"""
-    Class that trains Supervised Descent Method using Non-Parametric
-    Regression.
-
-    Parameters
-    ----------
-    regression_type : `callable` or list of those, optional
-        If list of length ``n_levels``, then a regression type is defined per
-        level.
-
-        If not a list or a list with length ``1``, then the specified regression
-        type will be applied to all pyramid levels.
-
-        The callable should be one of the methods defined in
-        :ref:`regression_callables`
-
-    regression_features: ``None`` or  `callable` or `[callable]`, optional
-        If list of length ``n_levels``, then a feature is defined per level.
-
-        If not a list, then the specified feature will be applied to all
-        pyramid levels.
-
-        Per level:
-            If ``None``, no features are extracted, thus specified
-            ``features`` is used in the regressor.
-
-            It is recommended to set the desired features using this option,
-            leaving ``features`` equal to :map:`no_op`. This means that the
-            images will remain in the intensities space and the features will
-            be extracted by the regressor.
-
-    patch_shape: tuple of `int`
-        The shape of the patches used by the SDM.
-
-    features : `callable` or ``[callable]``, optional
-        If list of length ``n_levels``, feature extraction is performed at
-        each level after downscaling of the image.
-        The first element of the list specifies the features to be extracted at
-        the lowest pyramidal level and so on.
-
-        If ``callable`` the specified feature will be applied to the original
-        image and pyramid generation will be performed on top of the feature
-        image. Also see the `pyramid_on_features` property.
-
-    n_levels : `int` > ``0``, optional
-        The number of multi-resolution pyramidal levels to be used.
-
-    downscale : `float` >= ``1``, optional
-        The downscale factor that will be used to create the different
-        pyramidal levels. The scale factor will be::
-
-            (downscale ** k) for k in range(n_levels)
-
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        initial shape.
-
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the initial shape.
-
-    n_perturbations : `int` > ``0``, optional
-        Defines the number of perturbations that will be applied to the shapes.
-
-    normalization_diagonal : `int` >= ``20``, optional
-        During training, all images are rescaled to ensure that the scale of
-        their landmarks matches the scale of the mean shape.
-
-        If `int`, it ensures that the mean shape is scaled so that the diagonal
-        of the bounding box containing it matches the normalization_diagonal
-        value.
-
-        If ``None``, the mean shape is not rescaled.
-
-        Note that, because the reference frame is computed from the mean
-        landmarks, this kwarg also specifies the diagonal length of the
-        reference frame (provided that features computation does not change
-        the image size).
-
-    Raises
-    ------
-    ValueError
-        ``regression_features`` must be ``None`` or a `string` or a `function`
-        or a list of those containing 1 or ``n_level`` elements
-    """
-    def __init__(self, regression_type=mlr, regression_features=sparse_hog,
-                 patch_shape=(16, 16), features=no_op, n_levels=3,
-                 downscale=1.5, noise_std=0.04,
-                 rotation=False, n_perturbations=10,
-                 normalization_diagonal=None):
-        super(SDMTrainer, self).__init__(
-            regression_type=regression_type,
-            regression_features=regression_features,
-            features=features, n_levels=n_levels, downscale=downscale,
-            noise_std=noise_std, rotation=rotation,
-            n_perturbations=n_perturbations)
-        self.patch_shape = patch_shape
-        self.normalization_diagonal = normalization_diagonal
-
-    def _compute_reference_shape(self, images, group, label):
-        r"""
-        Function that computes the reference shape, given a set of images.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images.
-
-        group : `string`
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        Returns
-        -------
-        reference_shape : :map:`PointCloud`
-            The reference shape computed based on the given images.
-        """
-        shapes = [i.landmarks[group][label] for i in images]
-        return mean_pointcloud(shapes)
-
-    def _rescale_reference_shape(self):
-        r"""
-        Function that rescales the reference shape w.r.t. to
-        ``normalization_diagonal`` parameter.
-        """
-        if self.normalization_diagonal:
-            x, y = self.reference_shape.range()
-            scale = self.normalization_diagonal / np.sqrt(x**2 + y**2)
-            Scale(scale, self.reference_shape.n_dims).apply_inplace(
-                self.reference_shape)
-
-    def _set_regressor_trainer(self, level):
-        r"""
-        Function that sets the regression class to be the
-        :map:`NonParametricRegressorTrainer`.
-
-        Parameters
-        ----------
-        level : `int`
-            The scale level.
-
-        Returns
-        -------
-        trainer : :map:`NonParametricRegressorTrainer`
-            The regressor object.
-        """
-        return NonParametricRegressorTrainer(
-            self.reference_shape, regression_type=self.regression_type[level],
-            regression_features=self.regression_features[level],
-            patch_shape=self.patch_shape, noise_std=self.noise_std,
-            rotation=self.rotation, n_perturbations=self.n_perturbations)
-
-    def _build_supervised_descent_fitter(self, regressors):
-        r"""
-        Builds an SDM fitter object.
-
-        Parameters
-        ----------
-        regressors : list of :map:`RegressorTrainer`
-            The list of regressors.
-
-        Returns
-        -------
-        fitter : :map:`SDMFitter`
-            The SDM fitter object.
-        """
-        return SDMFitter(regressors, self.n_training_images, self.features,
-                         self.reference_shape, self.downscale)
-
-
-class SDAAMTrainer(SDTrainer):
-    r"""
-    Class that trains Supervised Descent Regressor for a given Active
-    Appearance Model, thus uses Parametric Regression.
-
-    Parameters
-    ----------
-    aam : :map:`AAM`
-        The trained AAM object.
-    regression_type : `callable`, or list of those, optional
-        If list of length ``n_levels``, then a regression type is defined per
-        level.
-
-        If not a list or a list with length ``1``, then the specified regression
-        type will be applied to all pyramid levels.
-
-        Examples of such callables can be found in :ref:`regression_callables`.
-    regression_features: `function` or list of those, optional
-        If list of length ``n_levels``, then a feature is defined per level.
-
-        If not a list or a list with length ``1``, then the specified feature
-        will be applied to all pyramid levels.
-
-        The callable should be one of the methods defined in
-        :ref:`parametricfeatures`.
-    noise_std : `float`, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int` > ``0``, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-    update : {'additive', 'compositional'}
-        Defines the way that the warp will be updated.
-    md_transform: :map:`ModelDrivenTransform`, optional
-        The model driven transform class to be used.
-    n_shape : `int` > ``1`` or ``0`` <= `float` <= ``1`` or ``None``, or a list of those, optional
-        The number of shape components to be used per fitting level.
-
-        If list of length ``n_levels``, then a number of components is defined
-        per level. The first element of the list corresponds to the lowest
-        pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        components will be used for all levels.
-
-        Per level:
-            If ``None``, all the available shape components
-            (``n_active_components``)will be used.
-
-            If `int` > ``1``, a specific number of shape components is
-            specified.
-
-            If ``0`` <= `float` <= ``1``, it specifies the variance percentage
-            that is captured by the components.
-    n_appearance : `int` > ``1`` or ``0`` <= `float` <= ``1`` or ``None``, or a list of those, optional
-        The number of appearance components to be used per fitting level.
-
-        If list of length ``n_levels``, then a number of components is defined
-        per level. The first element of the list corresponds to the lowest
-        pyramidal level and so on.
-
-        If not a list or a list with length 1, then the specified number of
-        components will be used for all levels.
-
-        Per level:
-            If ``None``, all the available appearance components
-            (``n_active_components``) will be used.
-            
-            If `int > ``1``, a specific number of appearance components is
-            specified.
-            
-            If ``0`` <= `float` <= ``1``, it specifies the variance percentage
-            that is captured by the components.
-
-    Raises
-    -------
-    ValueError
-        n_shape can be an integer or a float or None or a list containing 1
-        or ``n_levels`` of those
-    ValueError
-        n_appearance can be an integer or a float or None or a list containing
-        1 or ``n_levels`` of those
-    ValueError
-        ``regression_features`` must be a `function` or a list of those
-        containing ``1`` or ``n_levels`` elements
-    """
-    def __init__(self, aam, regression_type=mlr, regression_features=weights,
-                 noise_std=0.04, rotation=False, n_perturbations=10,
-                 update='compositional', md_transform=OrthoMDTransform,
-                 n_shape=None, n_appearance=None):
-        super(SDAAMTrainer, self).__init__(
-            regression_type=regression_type,
-            regression_features=regression_features,
-            features=aam.features, n_levels=aam.n_levels,
-            downscale=aam.downscale, noise_std=noise_std,
-            rotation=rotation, n_perturbations=n_perturbations)
-        self.aam = aam
-        self.update = update
-        self.md_transform = md_transform
-        # hard coded for now as this is the only supported configuration.
-        self.global_transform = DifferentiableAlignmentSimilarity
-
-        # check n_shape parameter
-        if n_shape is not None:
-            if type(n_shape) is int or type(n_shape) is float:
-                for sm in self.aam.shape_models:
-                    sm.n_active_components = n_shape
-            elif len(n_shape) == 1 and self.aam.n_levels > 1:
-                for sm in self.aam.shape_models:
-                    sm.n_active_components = n_shape[0]
-            elif len(n_shape) == self.aam.n_levels:
-                for sm, n in zip(self.aam.shape_models, n_shape):
-                    sm.n_active_components = n
-            else:
-                raise ValueError('n_shape can be an integer or a float, '
-                                 'an integer or float list containing 1 '
-                                 'or {} elements or else '
-                                 'None'.format(self.aam.n_levels))
-
-        # check n_appearance parameter
-        if n_appearance is not None:
-            if type(n_appearance) is int or type(n_appearance) is float:
-                for am in self.aam.appearance_models:
-                    am.n_active_components = n_appearance
-            elif len(n_appearance) == 1 and self.aam.n_levels > 1:
-                for am in self.aam.appearance_models:
-                    am.n_active_components = n_appearance[0]
-            elif len(n_appearance) == self.aam.n_levels:
-                for am, n in zip(self.aam.appearance_models, n_appearance):
-                    am.n_active_components = n
-            else:
-                raise ValueError('n_appearance can be an integer or a float, '
-                                 'an integer or float list containing 1 '
-                                 'or {} elements or else '
-                                 'None'.format(self.aam.n_levels))
-
-    def _compute_reference_shape(self, images, group, label):
-        r"""
-        Function that returns the reference shape computed during AAM building.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images.
-
-        group : `string`
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        Returns
-        -------
-        reference_shape : :map:`PointCloud`
-            The reference shape computed based on.
-        """
-        return self.aam.reference_shape
-
-    def _normalize_object_size(self, images, group, label):
-        r"""
-        Function that normalizes the images sizes with respect to the reference
-        shape (mean shape) scaling.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images from which to build the model.
-
-        group : `string`
-            The key of the landmark set that should be used. If ```None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        Returns
-        -------
-        normalized_images : :map:`MaskedImage` list
-            A list with the normalized images.
-        """
-        return [i.rescale_to_reference_shape(self.reference_shape,
-                                             group=group, label=label)
-                for i in images]
-
-    def _set_regressor_trainer(self, level):
-        r"""
-        Function that sets the regression class to be the
-        :map:`ParametricRegressorTrainer`.
-
-        Parameters
-        ----------
-        level : `int`
-            The scale level.
-
-        Returns
-        -------
-        trainer: :map:`ParametricRegressorTrainer`
-            The regressor object.
-        """
-        am = self.aam.appearance_models[level]
-        sm = self.aam.shape_models[level]
-
-        if self.md_transform is not ModelDrivenTransform:
-            md_transform = self.md_transform(
-                sm, self.aam.transform, self.global_transform,
-                source=am.mean().landmarks['source'].lms)
-        else:
-            md_transform = self.md_transform(
-                sm, self.aam.transform,
-                source=am.mean().landmarks['source'].lms)
-
-        return ParametricRegressorTrainer(
-            am, md_transform, self.reference_shape,
-            regression_type=self.regression_type[level],
-            regression_features=self.regression_features[level],
-            update=self.update, noise_std=self.noise_std,
-            rotation=self.rotation, n_perturbations=self.n_perturbations)
-
-    def _build_supervised_descent_fitter(self, regressors):
-        r"""
-        Builds an SDM fitter object for AAMs.
-
-        Parameters
-        ----------
-        regressors : :map:`RegressorTrainer`
-            The regressor to build with.
-
-        Returns
-        -------
-        fitter : :map:`SDAAMFitter`
-            The SDM fitter object.
-        """
-        return SDAAMFitter(self.aam, regressors, self.n_training_images)
-
-
-class SDCLMTrainer(SDTrainer):
-    r"""
-    Class that trains Supervised Descent Regressor for a given Constrained
-    Local Model, thus uses Semi Parametric Classifier-Based Regression.
-
-    Parameters
-    ----------
-    clm : :map:`CLM`
-        The trained CLM object.
-    regression_type : `callable`, or list of those, optional
-        If list of length ``n_levels``, then a regression type is defined per
-        level.
-
-        If not a list or a list with length ``1``, then the specified regression
-        type will be applied to all pyramid levels.
-
-        Examples of such callables can be found in :ref:`regression_callables`.
-    noise_std: float, optional
-        The standard deviation of the gaussian noise used to produce the
-        training shapes.
-    rotation : `boolean`, optional
-        Specifies whether ground truth in-plane rotation is to be used
-        to produce the training shapes.
-    n_perturbations : `int` > ``0``, optional
-        Defines the number of perturbations that will be applied to the
-        training shapes.
-    pdm_transform : :map:`ModelDrivenTransform`, optional
-        The point distribution transform class to be used.
-    n_shape : `int` > ``1`` or ``0`` <= `float` <= ``1`` or ``None``, or a list of those, optional
-        The number of shape components to be used per fitting level.
-
-        If list of length ``n_levels``, then a number of components is defined
-        per level. The first element of the list corresponds to the lowest
-        pyramidal level and so on.
-
-        If not a list or a list with length ``1``, then the specified number of
-        components will be used for all levels.
-
-        Per level:
-            If ``None``, all the available shape components
-            (``n_active_components``) will be used.
-
-            If `int` > ``1``, a specific number of shape components is
-            specified.
-            
-            If ``0`` <= `float` <= ``1``, it specifies the variance percentage
-            that is captured by the components.
-
-    Raises
-    -------
-    ValueError
-        ``n_shape`` can be an integer or a `float` or ``None`` or a list
-        containing ``1`` or ``n_levels`` of those.
-    """
-    def __init__(self, clm, regression_type=mlr, noise_std=0.04,
-                 rotation=False, n_perturbations=10, pdm_transform=OrthoPDM,
-                 n_shape=None):
-        super(SDCLMTrainer, self).__init__(
-            regression_type=regression_type,
-            regression_features=[None] * clm.n_levels,
-            features=clm.features, n_levels=clm.n_levels,
-            downscale=clm.downscale, noise_std=noise_std,
-            rotation=rotation, n_perturbations=n_perturbations)
-        self.clm = clm
-        self.patch_shape = clm.patch_shape
-        self.pdm_transform = pdm_transform
-        # hard coded for now as this is the only supported configuration.
-        self.global_transform = DifferentiableAlignmentSimilarity
-
-        # check n_shape parameter
-        if n_shape is not None:
-            if type(n_shape) is int or type(n_shape) is float:
-                for sm in self.clm.shape_models:
-                    sm.n_active_components = n_shape
-            elif len(n_shape) == 1 and self.clm.n_levels > 1:
-                for sm in self.clm.shape_models:
-                    sm.n_active_components = n_shape[0]
-            elif len(n_shape) == self.clm.n_levels:
-                for sm, n in zip(self.clm.shape_models, n_shape):
-                    sm.n_active_components = n
-            else:
-                raise ValueError('n_shape can be an integer or a float or None'
-                                 'or a list containing 1 or {} of '
-                                 'those'.format(self.clm.n_levels))
-
-    def _compute_reference_shape(self, images, group, label):
-        r"""
-        Function that returns the reference shape computed during CLM building.
-
-        Parameters
-        ----------
-        images : list of :map:`MaskedImage`
-            The set of landmarked images.
-
-        group : `string`
-            The key of the landmark set that should be used. If ``None``,
-            and if there is only one set of landmarks, this set will be used.
-
-        label : `string`
-            The label of the landmark manager that you wish to use. If no
-            label is passed, the convex hull of all landmarks is used.
-
-        Returns
-        -------
-        reference_shape : :map:`PointCloud`
-            The reference shape.
-        """
-        return self.clm.reference_shape
-
-    def _set_regressor_trainer(self, level):
-        r"""
-        Function that sets the regression class to be the
-        :map:`SemiParametricClassifierBasedRegressorTrainer`
-
-        Parameters
-        ----------
-        level : `int`
-            The scale level.
-
-        Returns
-        -------
-        trainer: :map:`SemiParametricClassifierBasedRegressorTrainer`
-            The regressor object.
-        """
-        clfs = self.clm.classifiers[level]
-        sm = self.clm.shape_models[level]
-
-        if self.pdm_transform is not PDM:
-            pdm_transform = self.pdm_transform(sm, self.global_transform)
-        else:
-            pdm_transform = self.pdm_transform(sm)
-
-        return SemiParametricClassifierBasedRegressorTrainer(
-            clfs, pdm_transform, self.reference_shape,
-            regression_type=self.regression_type[level],
-            patch_shape=self.patch_shape, update='additive',
-            noise_std=self.noise_std, rotation=self.rotation,
-            n_perturbations=self.n_perturbations)
-
-    def _build_supervised_descent_fitter(self, regressors):
-        r"""
-        Builds an SDM fitter object for CLMs.
-
-        Parameters
-        ----------
-        regressors : :map:`RegressorTrainer`
-            Regressor to train with.
-
-        Returns
-        -------
-        fitter : :map:`SDCLMFitter`
-            The SDM fitter object.
-        """
-        return SDCLMFitter(self.clm, regressors, self.n_training_images)
diff --git a/menpofit/test/__init__.py b/menpofit/test/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/menpofit/test/aam_builder_test.py b/menpofit/test/aam_builder_test.py
deleted file mode 100644
index 6bcaf3a..0000000
--- a/menpofit/test/aam_builder_test.py
+++ /dev/null
@@ -1,199 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-import platform
-
-from mock import patch
-import numpy as np
-from numpy.testing import assert_allclose
-from nose.tools import raises
-from menpo.transform import PiecewiseAffine, ThinPlateSplines
-from menpo.feature import sparse_hog, igo, lbp, no_op
-
-import menpo.io as mio
-from menpo.landmark import ibug_face_68_trimesh
-from menpofit.aam import AAMBuilder, PatchBasedAAMBuilder
-
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    im.crop_to_landmarks_proportion_inplace(0.1)
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training.append(im)
-
-# build aams
-template_trilist_image = training[0].landmarks[None]
-trilist = ibug_face_68_trimesh(template_trilist_image)[1].lms.trilist
-aam1 = AAMBuilder(features=[igo, sparse_hog, no_op],
-                  transform=PiecewiseAffine,
-                  trilist=trilist,
-                  normalization_diagonal=150,
-                  n_levels=3,
-                  downscale=2,
-                  scaled_shape_models=False,
-                  max_shape_components=[1, 2, 3],
-                  max_appearance_components=[3, 3, 3],
-                  boundary=3).build(training)
-
-aam2 = AAMBuilder(features=[no_op, no_op],
-                  transform=ThinPlateSplines,
-                  trilist=None,
-                  normalization_diagonal=None,
-                  n_levels=2,
-                  downscale=1.2,
-                  scaled_shape_models=True,
-                  max_shape_components=None,
-                  max_appearance_components=1,
-                  boundary=0).build(training)
-
-aam3 = AAMBuilder(features=igo,
-                  transform=ThinPlateSplines,
-                  trilist=None,
-                  normalization_diagonal=None,
-                  n_levels=1,
-                  downscale=3,
-                  scaled_shape_models=True,
-                  max_shape_components=[2],
-                  max_appearance_components=10,
-                  boundary=2).build(training)
-
-aam4 = PatchBasedAAMBuilder(features=lbp,
-                            patch_shape=(10, 13),
-                            normalization_diagonal=200,
-                            n_levels=2,
-                            downscale=1.2,
-                            scaled_shape_models=True,
-                            max_shape_components=1,
-                            max_appearance_components=None,
-                            boundary=2).build(training)
-
-
-@raises(ValueError)
-def test_features_exception():
-    AAMBuilder(features=[igo, sparse_hog]).build(training)
-
-
-@raises(ValueError)
-def test_n_levels_exception():
-    AAMBuilder(n_levels=0).build(training)
-
-
-@raises(ValueError)
-def test_downscale_exception():
-    aam = AAMBuilder(downscale=1).build(training)
-    assert (aam.downscale == 1)
-    AAMBuilder(downscale=0).build(training)
-
-
-@raises(ValueError)
-def test_normalization_diagonal_exception():
-    aam = AAMBuilder(normalization_diagonal=100).build(training)
-    assert (aam.appearance_models[0].n_features == 382)
-    AAMBuilder(normalization_diagonal=10).build(training)
-
-
-@raises(ValueError)
-def test_max_shape_components_exception():
-    AAMBuilder(max_shape_components=[1, 0.2, 'a']).build(training)
-
-
-@raises(ValueError)
-def test_max_appearance_components_exception():
-    AAMBuilder(max_appearance_components=[1, 2]).build(training)
-
-
-@raises(ValueError)
-def test_boundary_exception():
-    AAMBuilder(boundary=-1).build(training)
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_verbose_mock(mock_stdout):
-    AAMBuilder().build(training, verbose=True)
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_str_mock(mock_stdout):
-    print(aam1)
-    print(aam2)
-    print(aam3)
-    print(aam4)
-
-
-def test_aam_1():
-    assert(aam1.n_training_images == 4)
-    assert(aam1.n_levels == 3)
-    assert(aam1.downscale == 2)
-    #assert(aam1.features[0] == igo and aam1.features[2] == no_op)
-    assert_allclose(np.around(aam1.reference_shape.range()), (109., 103.))
-    assert(not aam1.scaled_shape_models)
-    assert(not aam1.pyramid_on_features)
-    assert_allclose([aam1.shape_models[j].n_components
-                     for j in range(aam1.n_levels)], (1, 2, 3))
-    assert (np.all([aam1.appearance_models[j].n_components == 3
-                    for j in range(aam1.n_levels)]))
-    assert_allclose([aam1.appearance_models[j].template_instance.n_channels
-                     for j in range(aam1.n_levels)], (2, 36, 1))
-    assert_allclose([aam1.appearance_models[j].components.shape[1]
-                     for j in range(aam1.n_levels)], (14892, 268056, 7446))
-
-
-def test_aam_2():
-    assert (aam2.n_training_images == 4)
-    assert (aam2.n_levels == 2)
-    assert (aam2.downscale == 1.2)
-    #assert (aam2.features[0] == no_op and aam2.features[1] == no_op)
-    assert_allclose(np.around(aam2.reference_shape.range()), (169., 161.))
-    assert aam2.scaled_shape_models
-    assert (not aam2.pyramid_on_features)
-    assert (np.all([aam2.shape_models[j].n_components == 3
-                    for j in range(aam2.n_levels)]))
-    assert (np.all([aam2.appearance_models[j].n_components == 1
-                    for j in range(aam2.n_levels)]))
-    assert (np.all([aam2.appearance_models[j].template_instance.n_channels == 1
-                    for j in range(aam2.n_levels)]))
-    assert_allclose([aam2.appearance_models[j].components.shape[1]
-                     for j in range(aam2.n_levels)], (12827, 18518))
-
-
-def test_aam_3():
-    assert (aam3.n_training_images == 4)
-    assert (aam3.n_levels == 1)
-    assert (aam3.downscale == 3)
-    #assert (aam3.features[0] == igo and len(aam3.features) == 1)
-    assert_allclose(np.around(aam3.reference_shape.range()), (169., 161.))
-    assert aam3.scaled_shape_models
-    assert aam3.pyramid_on_features
-    assert (np.all([aam3.shape_models[j].n_components == 2
-                    for j in range(aam3.n_levels)]))
-    assert (np.all([aam3.appearance_models[j].n_components == 3
-                    for j in range(aam3.n_levels)]))
-    assert (np.all([aam3.appearance_models[j].template_instance.n_channels == 2
-                    for j in range(aam3.n_levels)]))
-    assert_allclose([aam3.appearance_models[j].components.shape[1]
-                     for j in range(aam3.n_levels)], 37036)
-
-
-def test_aam_4():
-    assert (aam4.n_training_images == 4)
-    assert (aam4.n_levels == 2)
-    assert (aam4.downscale == 1.2)
-    #assert (aam4.features[0] == lbp)
-    assert_allclose(np.around(aam4.reference_shape.range()), (145., 138.))
-    assert aam4.scaled_shape_models
-    assert aam4.pyramid_on_features
-    assert (np.all([aam4.shape_models[j].n_components == 1
-                    for j in range(aam4.n_levels)]))
-    assert (np.all([aam4.appearance_models[j].n_components == 3
-                    for j in range(aam4.n_levels)]))
-    assert (np.all([aam4.appearance_models[j].template_instance.n_channels == 4
-                    for j in range(aam4.n_levels)]))
-    if platform.system() != 'Windows':
-        # https://github.com/menpo/menpo/issues/450
-        assert_allclose([aam4.appearance_models[j].components.shape[1]
-                         for j in range(aam4.n_levels)], (23656, 25988))
diff --git a/menpofit/test/aam_fitter_test.py b/menpofit/test/aam_fitter_test.py
deleted file mode 100644
index 39f8fc9..0000000
--- a/menpofit/test/aam_fitter_test.py
+++ /dev/null
@@ -1,451 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-
-from mock import patch
-from nose.plugins.attrib import attr
-import numpy as np
-from numpy.testing import assert_allclose
-from nose.tools import raises
-from menpo.feature import igo
-from menpofit.transform import DifferentiablePiecewiseAffine
-
-
-import menpo.io as mio
-from menpo.shape.pointcloud import PointCloud
-from menpo.landmark import ibug_face_68_trimesh
-from menpofit.aam import AAMBuilder, LucasKanadeAAMFitter
-from menpofit.lucaskanade.appearance import (AFA, AFC, AIC,
-                                             SFA, SFC, SIC,
-                                             PIC)
-
-
-initial_shape = []
-initial_shape.append(PointCloud(np.array([[150.9737801, 1.85331141],
-                                          [191.20452708, 1.86714624],
-                                          [237.5088486, 7.16836457],
-                                          [280.68439528, 19.1356864],
-                                          [319.00988383, 36.18921029],
-                                          [351.31395982, 61.11002727],
-                                          [375.83681819, 86.68264647],
-                                          [401.50706656, 117.12858347],
-                                          [408.46977018, 156.72258055],
-                                          [398.49810436, 197.95690492],
-                                          [375.44584527, 234.437902],
-                                          [342.35427495, 267.96920594],
-                                          [299.04149064, 309.66693535],
-                                          [250.84207113, 331.07734674],
-                                          [198.46150259, 339.47188196],
-                                          [144.62222804, 337.84178783],
-                                          [89.92321435, 327.81734317],
-                                          [101.22474793, 26.90269773],
-                                          [89.23456877, 44.52571118],
-                                          [84.04683242, 66.6369272],
-                                          [86.36993557, 88.61559027],
-                                          [94.88123162, 108.04971327],
-                                          [88.08448274, 152.88439191],
-                                          [68.71150917, 176.94681489],
-                                          [55.7165906, 204.86028035],
-                                          [53.9169657, 232.87050281],
-                                          [69.08534014, 259.8486207],
-                                          [121.82883888, 130.79001073],
-                                          [152.30894887, 128.91266055],
-                                          [183.36381228, 128.04534764],
-                                          [216.59234031, 125.86784329],
-                                          [235.18182671, 93.18819461],
-                                          [242.46006172, 117.24575711],
-                                          [246.52987701, 142.46262589],
-                                          [240.51603561, 160.38006297],
-                                          [232.61083444, 175.36132625],
-                                          [137.35714406, 56.53012228],
-                                          [124.42060774, 67.0342585],
-                                          [121.98869265, 87.71006061],
-                                          [130.4421354, 105.16741493],
-                                          [139.32511836, 89.65144616],
-                                          [144.17935107, 69.97931719],
-                                          [125.04221953, 174.72789706],
-                                          [103.0127825, 188.96555839],
-                                          [97.38196408, 210.70911033],
-                                          [107.31622619, 232.4487582],
-                                          [119.12835959, 215.57040617],
-                                          [124.80355957, 193.64317941],
-                                          [304.3174261, 101.83559243],
-                                          [293.08249678, 116.76961123],
-                                          [287.11523488, 132.55435452],
-                                          [289.39839945, 148.49971074],
-                                          [283.59574087, 162.33458018],
-                                          [286.76478391, 187.30470094],
-                                          [292.65033117, 211.98694428],
-                                          [310.75841097, 187.33036207],
-                                          [319.06250309, 165.27131484],
-                                          [321.3339324, 148.86793045],
-                                          [321.82844973, 133.03866904],
-                                          [316.60228316, 115.15885333],
-                                          [303.45716953, 109.59946563],
-                                          [301.58563675, 135.32572565],
-                                          [298.16531481, 148.240518],
-                                          [295.39615418, 162.35992687],
-                                          [293.63384823, 201.35617245],
-                                          [301.95207707, 163.05299135],
-                                          [305.27555828, 148.48478086],
-                                          [306.41382116, 133.02994058]])))
-
-initial_shape.append(PointCloud(np.array([[33.08569962, 26.2373455],
-                                          [43.88613611, 26.24105964],
-                                          [56.31709803, 27.66423659],
-                                          [67.90810205, 30.87701063],
-                                          [78.19704859, 35.45523787],
-                                          [86.86947323, 42.14553624],
-                                          [93.45293474, 49.0108189],
-                                          [100.34442715, 57.18440338],
-                                          [102.21365016, 67.81389656],
-                                          [99.53663441, 78.88375569],
-                                          [93.34797327, 88.67752592],
-                                          [84.46413615, 97.67941492],
-                                          [72.83628901, 108.8736808],
-                                          [59.89656483, 114.62156782],
-                                          [45.83436002, 116.87518356],
-                                          [31.38054772, 116.43756484],
-                                          [16.69592792, 113.74637996],
-                                          [19.72996295, 32.96215989],
-                                          [16.51105259, 37.69327358],
-                                          [15.11834126, 43.62930018],
-                                          [15.74200674, 49.52974132],
-                                          [18.02696835, 54.74706954],
-                                          [16.20229791, 66.78348784],
-                                          [11.00138601, 73.24333984],
-                                          [7.51274105, 80.73705133],
-                                          [7.02960972, 88.25673842],
-                                          [11.10174551, 95.4993444],
-                                          [25.26138338, 60.85198075],
-                                          [33.44414202, 60.34798312],
-                                          [41.78120024, 60.11514235],
-                                          [50.70180534, 59.53056465],
-                                          [55.69238052, 50.75731293],
-                                          [57.6463118, 57.21586007],
-                                          [58.73890353, 63.98563718],
-                                          [57.12441419, 68.79579249],
-                                          [55.00216617, 72.817696],
-                                          [29.43014699, 40.91600468],
-                                          [25.95717546, 43.73596863],
-                                          [25.30429808, 49.2866408],
-                                          [27.57372827, 53.97328126],
-                                          [29.95847378, 49.80782952],
-                                          [31.26165197, 44.52660569],
-                                          [26.12405475, 72.64764418],
-                                          [20.20998272, 76.46991865],
-                                          [18.69832059, 82.30724133],
-                                          [21.36529486, 88.14351591],
-                                          [24.53640666, 83.6123157],
-                                          [26.05998356, 77.72568327],
-                                          [74.25267847, 53.07881273],
-                                          [71.23652416, 57.08803288],
-                                          [69.63453966, 61.32564044],
-                                          [70.24748314, 65.6063665],
-                                          [68.68968841, 69.32050656],
-                                          [69.54045681, 76.02404113],
-                                          [71.12050401, 82.6502915],
-                                          [75.9818397, 76.03093018],
-                                          [78.21117488, 70.10890893],
-                                          [78.82096788, 65.70521959],
-                                          [78.95372711, 61.4556606],
-                                          [77.55069872, 56.65560521],
-                                          [74.02173206, 55.16311953],
-                                          [73.51929617, 62.06964895],
-                                          [72.60106888, 65.53678304],
-                                          [71.85765381, 69.32731119],
-                                          [71.38454121, 79.79633067],
-                                          [73.61767156, 69.51337283],
-                                          [74.50990078, 65.60235839],
-                                          [74.81548138, 61.45331734]])))
-
-initial_shape.append(PointCloud(np.array([[46.63369884, 44.08764686],
-                                          [65.31491309, 44.09407109],
-                                          [86.81640178, 46.55570064],
-                                          [106.86503868, 52.11274643],
-                                          [124.66154301, 60.0315786],
-                                          [139.66199441, 71.6036014],
-                                          [151.04922447, 83.47828965],
-                                          [162.96924699, 97.61591112],
-                                          [166.20238999, 116.0014495],
-                                          [161.57203038, 135.14867658],
-                                          [150.86767554, 152.08868824],
-                                          [135.50154984, 167.65900498],
-                                          [115.38918643, 187.02141497],
-                                          [93.00770583, 196.9633751],
-                                          [68.68470174, 200.86139148],
-                                          [43.68434508, 200.10445456],
-                                          [18.28476712, 195.44958702],
-                                          [23.53265303, 55.71937105],
-                                          [17.9649934, 63.90264665],
-                                          [15.55605939, 74.17002657],
-                                          [16.63479621, 84.37585532],
-                                          [20.58703068, 93.40012265],
-                                          [17.43094904, 114.21918023],
-                                          [8.43507654, 125.39260635],
-                                          [2.4008645, 138.35427044],
-                                          [1.56520568, 151.36086382],
-                                          [8.60866558, 163.88819772],
-                                          [33.10019692, 103.95961759],
-                                          [47.25368667, 103.08786691],
-                                          [61.67406413, 102.68512872],
-                                          [77.10378638, 101.67400095],
-                                          [85.7358453, 86.49915174],
-                                          [89.11550583, 97.67032089],
-                                          [91.00533132, 109.37981584],
-                                          [88.21279407, 117.69980754],
-                                          [84.54200076, 124.65638206],
-                                          [40.31079125, 69.47691491],
-                                          [34.3036891, 74.35452803],
-                                          [33.17442528, 83.95537112],
-                                          [37.09979548, 92.06172262],
-                                          [41.22462339, 84.85685672],
-                                          [43.47869442, 75.72207092],
-                                          [34.59233557, 124.36224816],
-                                          [24.36292985, 130.97352987],
-                                          [21.74824996, 141.07018437],
-                                          [26.36124109, 151.16502601],
-                                          [31.84622487, 143.32753518],
-                                          [34.48151342, 133.14559097],
-                                          [117.83907583, 90.5145853],
-                                          [112.62211772, 97.44922176],
-                                          [109.85120974, 104.77889356],
-                                          [110.911401, 112.18314623],
-                                          [108.21692684, 118.60739086],
-                                          [109.68847724, 130.20230795],
-                                          [112.4214409, 141.66354869],
-                                          [120.82995787, 130.21422374],
-                                          [124.68597685, 119.97106848],
-                                          [125.74071883, 112.35412967],
-                                          [125.97034877, 105.00378581],
-                                          [123.54356964, 96.70126365],
-                                          [117.43961426, 94.11975273],
-                                          [116.5705649, 106.06578435],
-                                          [114.98233273, 112.06278965],
-                                          [113.69646838, 118.61916064],
-                                          [112.87813868, 136.72713211],
-                                          [116.74072208, 118.94098628],
-                                          [118.2839861, 112.17621352],
-                                          [118.81254036, 104.99973274]])))
-
-initial_shape.append(PointCloud(np.array([[29.30459178, 27.24534074],
-                                          [39.47004743, 24.38292299],
-                                          [51.54667438, 22.42372272],
-                                          [63.30767547, 22.37162616],
-                                          [74.20561385, 23.95008332],
-                                          [84.14265809, 27.94519239],
-                                          [92.16017681, 32.65929179],
-                                          [100.81474852, 38.52291926],
-                                          [105.39445843, 48.03051044],
-                                          [105.81247938, 59.1588891],
-                                          [102.5870203, 70.01814005],
-                                          [96.6149594, 80.84730771],
-                                          [88.64221584, 94.46788512],
-                                          [77.98963764, 103.31089364],
-                                          [65.35346377, 109.16323748],
-                                          [51.63461821, 112.58672956],
-                                          [37.10056847, 113.95059826],
-                                          [18.51972657, 37.11814141],
-                                          [16.7457652 , 42.42481409],
-                                          [17.01019564, 48.38086547],
-                                          [19.16282912, 53.76837796],
-                                          [22.69767086, 58.07217393],
-                                          [24.17432616, 69.88402627],
-                                          [20.99379373, 77.34357057],
-                                          [19.69904043, 85.32174442],
-                                          [21.23971857, 92.52684647],
-                                          [26.99391031, 98.26243543],
-                                          [31.12604697, 61.89794357],
-                                          [38.69324039, 59.25231487],
-                                          [46.47759964, 56.82093276],
-                                          [54.71781058, 53.90368008],
-                                          [57.08652729, 44.32277008],
-                                          [60.63919033, 49.88253722],
-                                          [63.46381778, 55.96376588],
-                                          [63.2207775 , 60.91909025],
-                                          [62.29071322, 65.26731234],
-                                          [29.75929632, 42.02967737],
-                                          [27.23910711, 45.60515084],
-                                          [28.09755316, 51.00222264],
-                                          [31.47695917, 54.81070084],
-                                          [32.61597345, 50.25772899],
-                                          [32.44103485, 44.94168113],
-                                          [35.06791957, 72.77012704],
-                                          [30.51633486, 77.93664152],
-                                          [30.64262749, 83.83136479],
-                                          [34.70122609, 88.61629379],
-                                          [36.4832508 , 83.51044643],
-                                          [36.35508694, 77.56615533],
-                                          [75.16994555, 41.58256719],
-                                          [73.39524567, 46.15605223],
-                                          [73.01204743, 50.56922423],
-                                          [74.72479626, 54.43524106],
-                                          [74.24428281, 58.34404327],
-                                          [76.82374875, 64.42709819],
-                                          [80.0690436 , 70.24390436],
-                                          [82.88766915, 62.72435028],
-                                          [83.41431565, 56.55948008],
-                                          [82.81967592, 52.25328539],
-                                          [81.81699053, 48.21872699],
-                                          [79.2228748 , 44.073611],
-                                          [75.50567221, 43.60542492],
-                                          [76.86548014, 50.2385966],
-                                          [76.9213308 , 53.74522715],
-                                          [77.22751327, 57.5098225],
-                                          [79.56023029, 67.48793174],
-                                          [78.93326695, 57.21790467],
-                                          [78.73516471, 53.30042959],
-                                          [77.92179698, 49.31461186]])))
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training_images = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    im.crop_to_landmarks_proportion_inplace(0.1)
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training_images.append(im)
-
-# build aam
-template_trilist_image = training_images[0].landmarks[None]
-trilist = ibug_face_68_trimesh(template_trilist_image)[1].lms.trilist
-aam = AAMBuilder(features=igo,
-                 transform=DifferentiablePiecewiseAffine,
-                 trilist=trilist,
-                 normalization_diagonal=150,
-                 n_levels=3,
-                 downscale=2,
-                 scaled_shape_models=True,
-                 max_shape_components=[1, 2, 3],
-                 max_appearance_components=[3, 2, 1],
-                 boundary=3).build(training_images)
-
-aam2 = AAMBuilder(features=igo,
-                  transform=DifferentiablePiecewiseAffine,
-                  trilist=trilist,
-                  normalization_diagonal=150,
-                  n_levels=1,
-                  downscale=2,
-                  scaled_shape_models=True,
-                  max_shape_components=[1],
-                  max_appearance_components=[1],
-                  boundary=3).build(training_images)
-
-
-def test_aam():
-    assert (aam.n_training_images == 4)
-    assert (aam.n_levels == 3)
-    assert (aam.downscale == 2)
-    #assert (aam.features[0] == igo and len(aam.features) == 1)
-    assert_allclose(np.around(aam.reference_shape.range()), (109., 103.))
-    assert aam.scaled_shape_models
-    assert aam.pyramid_on_features
-    assert_allclose([aam.shape_models[j].n_components
-                     for j in range(aam.n_levels)], (1, 2, 3))
-    assert_allclose([aam.appearance_models[j].n_components
-                     for j in range(aam.n_levels)], (3, 2, 1))
-    assert_allclose([aam.appearance_models[j].template_instance.n_channels
-                     for j in range(aam.n_levels)], (2, 2, 2))
-    assert_allclose([aam.appearance_models[j].components.shape[1]
-                     for j in range(aam.n_levels)], (884, 3652, 14892))
-
-
-@raises(TypeError, ValueError)
-def test_n_shape_exception():
-    fitter = LucasKanadeAAMFitter(aam, n_shape=[3, 6, 'a'])
-
-
-@raises(ValueError)
-def test_n_appearance_exception():
-    fitter = LucasKanadeAAMFitter(aam, n_appearance=[10, 20])
-
-
-def test_pertrurb_shape():
-    fitter = LucasKanadeAAMFitter(aam)
-    s = fitter.perturb_shape(training_images[0].landmarks[None].lms,
-                             noise_std=0.08, rotation=False)
-    assert (s.n_dims == 2)
-    assert (s.n_landmark_groups == 0)
-    assert (s.n_points == 68)
-
-
-def test_obtain_shape_from_bb():
-    fitter = LucasKanadeAAMFitter(aam)
-    s = fitter.obtain_shape_from_bb(np.array([[53.916, 1.853],
-                                              [408.469, 339.471]]))
-    assert ((np.around(s.points) == np.around(initial_shape[0].points)).all())
-    assert (s.n_dims == 2)
-    assert (s.n_landmark_groups == 0)
-    assert (s.n_points == 68)
-
-
-@raises(ValueError)
-def test_max_iters_exception():
-    fitter = LucasKanadeAAMFitter(aam,
-                                  algorithm=AIC)
-    fitter.fit(training_images[0], initial_shape[0],
-               max_iters=[10, 20, 30, 40])
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_str_mock(mock_stdout):
-    print(aam)
-    fitter = LucasKanadeAAMFitter(aam,
-                                  algorithm=AIC)
-    print(fitter)
-    print(aam2)
-    fitter = LucasKanadeAAMFitter(aam2,
-                                  algorithm=SFA)
-    print(fitter)
-
-
-def aam_helper(aam, algorithm, im_number, max_iters, initial_error,
-               final_error, error_type):
-    fitter = LucasKanadeAAMFitter(aam, algorithm=algorithm)
-    fitting_result = fitter.fit(
-        training_images[im_number], initial_shape[im_number],
-        gt_shape=training_images[im_number].landmarks[None].lms,
-        max_iters=max_iters)
-    assert_allclose(
-        np.around(fitting_result.initial_error(error_type=error_type), 5),
-        initial_error)
-    assert_allclose(
-        np.around(fitting_result.final_error(error_type=error_type), 5),
-        final_error)
-
-
-@attr('fuzzy')
-def test_alternating_ic():
-    aam_helper(aam, AIC, 0, 6, 0.09062, 0.05607, 'me_norm')
-
-
-@attr('fuzzy')
-def test_simultaneous_ic():
-    aam_helper(aam, SIC, 2, 7, 0.12616, 0.11152, 'me_norm')
-
-
-@attr('fuzzy')
-def test_projectout_ic():
-    aam_helper(aam, PIC, 3, 6, 0.10796, 0.07346, 'me_norm')
-
-
-@attr('fuzzy')
-def test_alternating_fa():
-    aam_helper(aam, AFA, 0, 8, 0.09062, 0.07225, 'me_norm')
-
-
-@attr('fuzzy')
-def test_simultaneous_fa():
-    aam_helper(aam, SFA, 2, 5, 0.12616, 0.11151, 'me_norm')
-
-
-@attr('fuzzy')
-def test_alternating_fc():
-    aam_helper(aam, AFC, 0, 6, 0.09062, 0.07129, 'me_norm')
-
-
-@attr('fuzzy')
-def test_simultaneous_fc():
-    aam_helper(aam, SFC, 2, 5, 0.12616, 0.11738, 'me_norm')
diff --git a/menpofit/test/atm_builder_test.py b/menpofit/test/atm_builder_test.py
deleted file mode 100644
index 9b50b3a..0000000
--- a/menpofit/test/atm_builder_test.py
+++ /dev/null
@@ -1,178 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-
-from mock import patch
-import numpy as np
-from numpy.testing import assert_allclose
-from nose.tools import raises
-from menpo.transform import PiecewiseAffine, ThinPlateSplines
-from menpo.feature import sparse_hog, igo, lbp, no_op
-
-import menpo.io as mio
-from menpofit.atm import ATMBuilder, PatchBasedATMBuilder
-
-
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training = []
-templates = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training.append(im.landmarks[None].lms)
-    templates.append(im)
-
-# build atms
-atm1 = ATMBuilder(features=[igo, sparse_hog, no_op],
-                  transform=PiecewiseAffine,
-                  normalization_diagonal=150,
-                  n_levels=3,
-                  downscale=2,
-                  scaled_shape_models=False,
-                  max_shape_components=[1, 2, 3],
-                  boundary=3).build(training, templates[0])
-
-atm2 = ATMBuilder(features=[no_op, no_op],
-                  transform=ThinPlateSplines,
-                  trilist=None,
-                  normalization_diagonal=None,
-                  n_levels=2,
-                  downscale=1.2,
-                  scaled_shape_models=True,
-                  max_shape_components=None,
-                  boundary=0).build(training, templates[1])
-
-atm3 = ATMBuilder(features=igo,
-                  transform=ThinPlateSplines,
-                  trilist=None,
-                  normalization_diagonal=None,
-                  n_levels=1,
-                  downscale=3,
-                  scaled_shape_models=True,
-                  max_shape_components=[2],
-                  boundary=2).build(training, templates[2])
-
-atm4 = PatchBasedATMBuilder(features=lbp,
-                            patch_shape=(10, 13),
-                            normalization_diagonal=200,
-                            n_levels=2,
-                            downscale=1.2,
-                            scaled_shape_models=True,
-                            max_shape_components=1,
-                            boundary=2).build(training, templates[3])
-
-
-@raises(ValueError)
-def test_features_exception():
-    ATMBuilder(features=[igo, sparse_hog]).build(training, templates[0])
-
-
-@raises(ValueError)
-def test_n_levels_exception():
-    ATMBuilder(n_levels=0).build(training, templates[1])
-
-
-@raises(ValueError)
-def test_downscale_exception():
-    atm = ATMBuilder(downscale=1).build(training, templates[2])
-    assert (atm.downscale == 1)
-    ATMBuilder(downscale=0).build(training, templates[2])
-
-
-@raises(ValueError)
-def test_normalization_diagonal_exception():
-    atm = ATMBuilder(normalization_diagonal=100).build(training, templates[3])
-    assert (atm.warped_templates[0].n_true_pixels() == 1246)
-    ATMBuilder(normalization_diagonal=10).build(training, templates[3])
-
-
-@raises(ValueError)
-def test_max_shape_components_exception():
-    ATMBuilder(max_shape_components=[1, 0.2, 'a']).build(training, templates[0])
-
-
-@raises(ValueError)
-def test_max_shape_components_exception_2():
-    ATMBuilder(max_shape_components=[1, 2]).build(training, templates[0])
-
-
-@raises(ValueError)
-def test_boundary_exception():
-    ATMBuilder(boundary=-1).build(training, templates[1])
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_verbose_mock(mock_stdout):
-    ATMBuilder().build(training, templates[2], verbose=True)
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_str_mock(mock_stdout):
-    print(atm1)
-    print(atm2)
-    print(atm3)
-    print(atm4)
-
-
-def test_atm_1():
-    assert(atm1.n_training_shapes == 4)
-    assert(atm1.n_levels == 3)
-    assert(atm1.downscale == 2)
-    assert_allclose(np.around(atm1.reference_shape.range()), (109., 103.))
-    assert(not atm1.scaled_shape_models)
-    assert(not atm1.pyramid_on_features)
-    assert_allclose([atm1.shape_models[j].n_components
-                     for j in range(atm1.n_levels)], (1, 2, 3))
-    assert_allclose([atm1.warped_templates[j].n_channels
-                     for j in range(atm1.n_levels)], (2, 36, 1))
-    assert_allclose([atm1.warped_templates[j].shape[1]
-                     for j in range(atm1.n_levels)], (164, 164, 164))
-
-
-def test_atm_2():
-    assert (atm2.n_training_shapes == 4)
-    assert (atm2.n_levels == 2)
-    assert (atm2.downscale == 1.2)
-    assert_allclose(np.around(atm2.reference_shape.range()), (169., 161.))
-    assert atm2.scaled_shape_models
-    assert (not atm2.pyramid_on_features)
-    assert (np.all([atm2.shape_models[j].n_components == 3
-                    for j in range(atm2.n_levels)]))
-    assert (np.all([atm2.warped_templates[j].n_channels == 1
-                    for j in range(atm2.n_levels)]))
-    assert_allclose([atm2.warped_templates[j].shape[1]
-                     for j in range(atm2.n_levels)], (132, 158))
-
-
-def test_atm_3():
-    assert (atm3.n_training_shapes == 4)
-    assert (atm3.n_levels == 1)
-    assert (atm3.downscale == 3)
-    assert_allclose(np.around(atm3.reference_shape.range()), (169., 161.))
-    assert atm3.scaled_shape_models
-    assert atm3.pyramid_on_features
-    assert (np.all([atm3.shape_models[j].n_components == 2
-                    for j in range(atm3.n_levels)]))
-    assert (np.all([atm3.warped_templates[j].n_channels == 2
-                    for j in range(atm3.n_levels)]))
-    assert_allclose([atm3.warped_templates[j].shape[1]
-                     for j in range(atm3.n_levels)], 162)
-
-
-def test_atm_4():
-    assert (atm4.n_training_shapes == 4)
-    assert (atm4.n_levels == 2)
-    assert (atm4.downscale == 1.2)
-    assert_allclose(np.around(atm4.reference_shape.range()), (145., 138.))
-    assert atm4.scaled_shape_models
-    assert atm4.pyramid_on_features
-    assert (np.all([atm4.shape_models[j].n_components == 1
-                    for j in range(atm4.n_levels)]))
-    assert (np.all([atm4.warped_templates[j].n_channels == 4
-                    for j in range(atm4.n_levels)]))
-    assert_allclose([atm4.warped_templates[j].shape[1]
-                     for j in range(atm4.n_levels)], (162, 188))
diff --git a/menpofit/test/atm_fitter_test.py b/menpofit/test/atm_fitter_test.py
deleted file mode 100644
index 53f5975..0000000
--- a/menpofit/test/atm_fitter_test.py
+++ /dev/null
@@ -1,441 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-
-from mock import patch
-from nose.plugins.attrib import attr
-import numpy as np
-from numpy.testing import assert_allclose
-from nose.tools import raises
-from menpo.feature import igo
-from menpofit.transform import DifferentiablePiecewiseAffine
-
-import menpo.io as mio
-from menpo.shape.pointcloud import PointCloud
-from menpofit.atm import ATMBuilder, LucasKanadeATMFitter
-from menpofit.lucaskanade.image import FA, FC, IC
-
-
-initial_shape = []
-initial_shape.append(PointCloud(np.array([[150.9737801, 1.85331141],
-                                          [191.20452708, 1.86714624],
-                                          [237.5088486, 7.16836457],
-                                          [280.68439528, 19.1356864],
-                                          [319.00988383, 36.18921029],
-                                          [351.31395982, 61.11002727],
-                                          [375.83681819, 86.68264647],
-                                          [401.50706656, 117.12858347],
-                                          [408.46977018, 156.72258055],
-                                          [398.49810436, 197.95690492],
-                                          [375.44584527, 234.437902],
-                                          [342.35427495, 267.96920594],
-                                          [299.04149064, 309.66693535],
-                                          [250.84207113, 331.07734674],
-                                          [198.46150259, 339.47188196],
-                                          [144.62222804, 337.84178783],
-                                          [89.92321435, 327.81734317],
-                                          [101.22474793, 26.90269773],
-                                          [89.23456877, 44.52571118],
-                                          [84.04683242, 66.6369272],
-                                          [86.36993557, 88.61559027],
-                                          [94.88123162, 108.04971327],
-                                          [88.08448274, 152.88439191],
-                                          [68.71150917, 176.94681489],
-                                          [55.7165906, 204.86028035],
-                                          [53.9169657, 232.87050281],
-                                          [69.08534014, 259.8486207],
-                                          [121.82883888, 130.79001073],
-                                          [152.30894887, 128.91266055],
-                                          [183.36381228, 128.04534764],
-                                          [216.59234031, 125.86784329],
-                                          [235.18182671, 93.18819461],
-                                          [242.46006172, 117.24575711],
-                                          [246.52987701, 142.46262589],
-                                          [240.51603561, 160.38006297],
-                                          [232.61083444, 175.36132625],
-                                          [137.35714406, 56.53012228],
-                                          [124.42060774, 67.0342585],
-                                          [121.98869265, 87.71006061],
-                                          [130.4421354, 105.16741493],
-                                          [139.32511836, 89.65144616],
-                                          [144.17935107, 69.97931719],
-                                          [125.04221953, 174.72789706],
-                                          [103.0127825, 188.96555839],
-                                          [97.38196408, 210.70911033],
-                                          [107.31622619, 232.4487582],
-                                          [119.12835959, 215.57040617],
-                                          [124.80355957, 193.64317941],
-                                          [304.3174261, 101.83559243],
-                                          [293.08249678, 116.76961123],
-                                          [287.11523488, 132.55435452],
-                                          [289.39839945, 148.49971074],
-                                          [283.59574087, 162.33458018],
-                                          [286.76478391, 187.30470094],
-                                          [292.65033117, 211.98694428],
-                                          [310.75841097, 187.33036207],
-                                          [319.06250309, 165.27131484],
-                                          [321.3339324, 148.86793045],
-                                          [321.82844973, 133.03866904],
-                                          [316.60228316, 115.15885333],
-                                          [303.45716953, 109.59946563],
-                                          [301.58563675, 135.32572565],
-                                          [298.16531481, 148.240518],
-                                          [295.39615418, 162.35992687],
-                                          [293.63384823, 201.35617245],
-                                          [301.95207707, 163.05299135],
-                                          [305.27555828, 148.48478086],
-                                          [306.41382116, 133.02994058]])))
-
-initial_shape.append(PointCloud(np.array([[33.08569962, 26.2373455],
-                                          [43.88613611, 26.24105964],
-                                          [56.31709803, 27.66423659],
-                                          [67.90810205, 30.87701063],
-                                          [78.19704859, 35.45523787],
-                                          [86.86947323, 42.14553624],
-                                          [93.45293474, 49.0108189],
-                                          [100.34442715, 57.18440338],
-                                          [102.21365016, 67.81389656],
-                                          [99.53663441, 78.88375569],
-                                          [93.34797327, 88.67752592],
-                                          [84.46413615, 97.67941492],
-                                          [72.83628901, 108.8736808],
-                                          [59.89656483, 114.62156782],
-                                          [45.83436002, 116.87518356],
-                                          [31.38054772, 116.43756484],
-                                          [16.69592792, 113.74637996],
-                                          [19.72996295, 32.96215989],
-                                          [16.51105259, 37.69327358],
-                                          [15.11834126, 43.62930018],
-                                          [15.74200674, 49.52974132],
-                                          [18.02696835, 54.74706954],
-                                          [16.20229791, 66.78348784],
-                                          [11.00138601, 73.24333984],
-                                          [7.51274105, 80.73705133],
-                                          [7.02960972, 88.25673842],
-                                          [11.10174551, 95.4993444],
-                                          [25.26138338, 60.85198075],
-                                          [33.44414202, 60.34798312],
-                                          [41.78120024, 60.11514235],
-                                          [50.70180534, 59.53056465],
-                                          [55.69238052, 50.75731293],
-                                          [57.6463118, 57.21586007],
-                                          [58.73890353, 63.98563718],
-                                          [57.12441419, 68.79579249],
-                                          [55.00216617, 72.817696],
-                                          [29.43014699, 40.91600468],
-                                          [25.95717546, 43.73596863],
-                                          [25.30429808, 49.2866408],
-                                          [27.57372827, 53.97328126],
-                                          [29.95847378, 49.80782952],
-                                          [31.26165197, 44.52660569],
-                                          [26.12405475, 72.64764418],
-                                          [20.20998272, 76.46991865],
-                                          [18.69832059, 82.30724133],
-                                          [21.36529486, 88.14351591],
-                                          [24.53640666, 83.6123157],
-                                          [26.05998356, 77.72568327],
-                                          [74.25267847, 53.07881273],
-                                          [71.23652416, 57.08803288],
-                                          [69.63453966, 61.32564044],
-                                          [70.24748314, 65.6063665],
-                                          [68.68968841, 69.32050656],
-                                          [69.54045681, 76.02404113],
-                                          [71.12050401, 82.6502915],
-                                          [75.9818397, 76.03093018],
-                                          [78.21117488, 70.10890893],
-                                          [78.82096788, 65.70521959],
-                                          [78.95372711, 61.4556606],
-                                          [77.55069872, 56.65560521],
-                                          [74.02173206, 55.16311953],
-                                          [73.51929617, 62.06964895],
-                                          [72.60106888, 65.53678304],
-                                          [71.85765381, 69.32731119],
-                                          [71.38454121, 79.79633067],
-                                          [73.61767156, 69.51337283],
-                                          [74.50990078, 65.60235839],
-                                          [74.81548138, 61.45331734]])))
-
-initial_shape.append(PointCloud(np.array([[46.63369884, 44.08764686],
-                                          [65.31491309, 44.09407109],
-                                          [86.81640178, 46.55570064],
-                                          [106.86503868, 52.11274643],
-                                          [124.66154301, 60.0315786],
-                                          [139.66199441, 71.6036014],
-                                          [151.04922447, 83.47828965],
-                                          [162.96924699, 97.61591112],
-                                          [166.20238999, 116.0014495],
-                                          [161.57203038, 135.14867658],
-                                          [150.86767554, 152.08868824],
-                                          [135.50154984, 167.65900498],
-                                          [115.38918643, 187.02141497],
-                                          [93.00770583, 196.9633751],
-                                          [68.68470174, 200.86139148],
-                                          [43.68434508, 200.10445456],
-                                          [18.28476712, 195.44958702],
-                                          [23.53265303, 55.71937105],
-                                          [17.9649934, 63.90264665],
-                                          [15.55605939, 74.17002657],
-                                          [16.63479621, 84.37585532],
-                                          [20.58703068, 93.40012265],
-                                          [17.43094904, 114.21918023],
-                                          [8.43507654, 125.39260635],
-                                          [2.4008645, 138.35427044],
-                                          [1.56520568, 151.36086382],
-                                          [8.60866558, 163.88819772],
-                                          [33.10019692, 103.95961759],
-                                          [47.25368667, 103.08786691],
-                                          [61.67406413, 102.68512872],
-                                          [77.10378638, 101.67400095],
-                                          [85.7358453, 86.49915174],
-                                          [89.11550583, 97.67032089],
-                                          [91.00533132, 109.37981584],
-                                          [88.21279407, 117.69980754],
-                                          [84.54200076, 124.65638206],
-                                          [40.31079125, 69.47691491],
-                                          [34.3036891, 74.35452803],
-                                          [33.17442528, 83.95537112],
-                                          [37.09979548, 92.06172262],
-                                          [41.22462339, 84.85685672],
-                                          [43.47869442, 75.72207092],
-                                          [34.59233557, 124.36224816],
-                                          [24.36292985, 130.97352987],
-                                          [21.74824996, 141.07018437],
-                                          [26.36124109, 151.16502601],
-                                          [31.84622487, 143.32753518],
-                                          [34.48151342, 133.14559097],
-                                          [117.83907583, 90.5145853],
-                                          [112.62211772, 97.44922176],
-                                          [109.85120974, 104.77889356],
-                                          [110.911401, 112.18314623],
-                                          [108.21692684, 118.60739086],
-                                          [109.68847724, 130.20230795],
-                                          [112.4214409, 141.66354869],
-                                          [120.82995787, 130.21422374],
-                                          [124.68597685, 119.97106848],
-                                          [125.74071883, 112.35412967],
-                                          [125.97034877, 105.00378581],
-                                          [123.54356964, 96.70126365],
-                                          [117.43961426, 94.11975273],
-                                          [116.5705649, 106.06578435],
-                                          [114.98233273, 112.06278965],
-                                          [113.69646838, 118.61916064],
-                                          [112.87813868, 136.72713211],
-                                          [116.74072208, 118.94098628],
-                                          [118.2839861, 112.17621352],
-                                          [118.81254036, 104.99973274]])))
-
-initial_shape.append(PointCloud(np.array([[29.30459178, 27.24534074],
-                                          [39.47004743, 24.38292299],
-                                          [51.54667438, 22.42372272],
-                                          [63.30767547, 22.37162616],
-                                          [74.20561385, 23.95008332],
-                                          [84.14265809, 27.94519239],
-                                          [92.16017681, 32.65929179],
-                                          [100.81474852, 38.52291926],
-                                          [105.39445843, 48.03051044],
-                                          [105.81247938, 59.1588891],
-                                          [102.5870203, 70.01814005],
-                                          [96.6149594, 80.84730771],
-                                          [88.64221584, 94.46788512],
-                                          [77.98963764, 103.31089364],
-                                          [65.35346377, 109.16323748],
-                                          [51.63461821, 112.58672956],
-                                          [37.10056847, 113.95059826],
-                                          [18.51972657, 37.11814141],
-                                          [16.7457652 , 42.42481409],
-                                          [17.01019564, 48.38086547],
-                                          [19.16282912, 53.76837796],
-                                          [22.69767086, 58.07217393],
-                                          [24.17432616, 69.88402627],
-                                          [20.99379373, 77.34357057],
-                                          [19.69904043, 85.32174442],
-                                          [21.23971857, 92.52684647],
-                                          [26.99391031, 98.26243543],
-                                          [31.12604697, 61.89794357],
-                                          [38.69324039, 59.25231487],
-                                          [46.47759964, 56.82093276],
-                                          [54.71781058, 53.90368008],
-                                          [57.08652729, 44.32277008],
-                                          [60.63919033, 49.88253722],
-                                          [63.46381778, 55.96376588],
-                                          [63.2207775 , 60.91909025],
-                                          [62.29071322, 65.26731234],
-                                          [29.75929632, 42.02967737],
-                                          [27.23910711, 45.60515084],
-                                          [28.09755316, 51.00222264],
-                                          [31.47695917, 54.81070084],
-                                          [32.61597345, 50.25772899],
-                                          [32.44103485, 44.94168113],
-                                          [35.06791957, 72.77012704],
-                                          [30.51633486, 77.93664152],
-                                          [30.64262749, 83.83136479],
-                                          [34.70122609, 88.61629379],
-                                          [36.4832508 , 83.51044643],
-                                          [36.35508694, 77.56615533],
-                                          [75.16994555, 41.58256719],
-                                          [73.39524567, 46.15605223],
-                                          [73.01204743, 50.56922423],
-                                          [74.72479626, 54.43524106],
-                                          [74.24428281, 58.34404327],
-                                          [76.82374875, 64.42709819],
-                                          [80.0690436 , 70.24390436],
-                                          [82.88766915, 62.72435028],
-                                          [83.41431565, 56.55948008],
-                                          [82.81967592, 52.25328539],
-                                          [81.81699053, 48.21872699],
-                                          [79.2228748 , 44.073611],
-                                          [75.50567221, 43.60542492],
-                                          [76.86548014, 50.2385966],
-                                          [76.9213308 , 53.74522715],
-                                          [77.22751327, 57.5098225],
-                                          [79.56023029, 67.48793174],
-                                          [78.93326695, 57.21790467],
-                                          [78.73516471, 53.30042959],
-                                          [77.92179698, 49.31461186]])))
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training_shapes = []
-templates = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    im.crop_to_landmarks_proportion_inplace(0.1)
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training_shapes.append(im.landmarks[None].lms)
-    templates.append(im)
-
-# build atm
-atm1 = ATMBuilder(features=igo,
-                  transform=DifferentiablePiecewiseAffine,
-                  normalization_diagonal=150,
-                  n_levels=3,
-                  downscale=2,
-                  scaled_shape_models=True,
-                  max_shape_components=[1, 2, 3],
-                  boundary=3).build(training_shapes, templates[0])
-
-atm2 = ATMBuilder(features=igo,
-                  transform=DifferentiablePiecewiseAffine,
-                  normalization_diagonal=150,
-                  n_levels=1,
-                  downscale=2,
-                  scaled_shape_models=True,
-                  max_shape_components=[1],
-                  boundary=3).build(training_shapes, templates[1])
-
-atm3 = ATMBuilder(features=igo,
-                  transform=DifferentiablePiecewiseAffine,
-                  normalization_diagonal=150,
-                  n_levels=3,
-                  downscale=2,
-                  scaled_shape_models=True,
-                  max_shape_components=[1, 2, 3],
-                  boundary=3).build(training_shapes, templates[2])
-
-atm4 = ATMBuilder(features=igo,
-                  transform=DifferentiablePiecewiseAffine,
-                  normalization_diagonal=150,
-                  n_levels=1,
-                  downscale=2,
-                  scaled_shape_models=True,
-                  max_shape_components=[1],
-                  boundary=3).build(training_shapes, templates[3])
-
-
-def test_atm1():
-    assert (atm1.n_training_shapes == 4)
-    assert (atm1.n_levels == 3)
-    assert (atm1.downscale == 2)
-    assert_allclose(np.around(atm1.reference_shape.range()), (109., 103.))
-    assert atm1.scaled_shape_models
-    assert atm1.pyramid_on_features
-    assert_allclose([atm1.shape_models[j].n_components
-                     for j in range(atm1.n_levels)], (1, 2, 3))
-    assert_allclose([atm1.warped_templates[j].n_channels
-                     for j in range(atm1.n_levels)], (2, 2, 2))
-    assert_allclose([atm1.warped_templates[j].shape[1]
-                     for j in range(atm1.n_levels)], (46, 85, 164))
-
-
-@raises(TypeError, ValueError)
-def test_n_shape_exception():
-    fitter = LucasKanadeATMFitter(atm1, n_shape=[3, 6, 'a'])
-
-
-@raises(ValueError)
-def test_n_shape_exception_2():
-    fitter = LucasKanadeATMFitter(atm1, n_shape=[10, 20])
-
-
-def test_pertrurb_shape():
-    fitter = LucasKanadeATMFitter(atm1)
-    s = fitter.perturb_shape(templates[0].landmarks[None].lms,
-                             noise_std=0.08, rotation=False)
-    assert (s.n_dims == 2)
-    assert (s.n_landmark_groups == 0)
-    assert (s.n_points == 68)
-
-
-def test_obtain_shape_from_bb():
-    fitter = LucasKanadeATMFitter(atm1)
-    s = fitter.obtain_shape_from_bb(np.array([[53.916, 1.853],
-                                              [408.469, 339.471]]))
-    assert ((np.around(s.points) == np.around(initial_shape[0].points)).all())
-    assert (s.n_dims == 2)
-    assert (s.n_landmark_groups == 0)
-    assert (s.n_points == 68)
-
-
-@raises(ValueError)
-def test_max_iters_exception():
-    fitter = LucasKanadeATMFitter(atm1,
-                                  algorithm=IC)
-    fitter.fit(templates[0], initial_shape[0], max_iters=[10, 20, 30, 40])
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_str_mock(mock_stdout):
-    print(atm1)
-    fitter = LucasKanadeATMFitter(atm1,
-                                  algorithm=IC)
-    print(fitter)
-    print(atm2)
-    fitter = LucasKanadeATMFitter(atm2,
-                                  algorithm=FA)
-    print(fitter)
-
-
-def atm_helper(atm, algorithm, im_number, max_iters, initial_error,
-               final_error, error_type):
-    fitter = LucasKanadeATMFitter(atm, algorithm=algorithm)
-    fitting_result = fitter.fit(
-        templates[im_number], initial_shape[im_number],
-        gt_shape=templates[im_number].landmarks[None].lms,
-        max_iters=max_iters)
-    assert_allclose(
-        np.around(fitting_result.initial_error(error_type=error_type), 5),
-        initial_error)
-    assert_allclose(
-        np.around(fitting_result.final_error(error_type=error_type), 5),
-        final_error)
-
-
-@attr('fuzzy')
-def test_ic():
-    atm_helper(atm1, IC, 0, 6, 0.09062, 0.06788, 'me_norm')
-
-
-@attr('fuzzy')
-def test_fa():
-    atm_helper(atm2, FA, 1, 8, 0.09051, 0.08188, 'me_norm')
-
-
-@attr('fuzzy')
-def test_fc():
-    atm_helper(atm3, FC, 2, 6, 0.12615, 0.08255, 'me_norm')
-
-@attr('fuzzy')
-def test_ic_2():
-    atm_helper(atm4, IC, 3, 7, 0.09748, 0.09511, 'me_norm')
diff --git a/menpofit/test/clm_builder_test.py b/menpofit/test/clm_builder_test.py
deleted file mode 100644
index 406222a..0000000
--- a/menpofit/test/clm_builder_test.py
+++ /dev/null
@@ -1,197 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-from sklearn import qda
-
-from mock import patch
-import numpy as np
-from numpy.testing import assert_allclose
-from nose.tools import raises
-from menpo.feature import sparse_hog, igo, no_op
-
-import menpo.io as mio
-from menpofit.clm import CLMBuilder
-from menpofit.clm.classifier import linear_svm_lr
-from menpofit.base import name_of_callable
-
-
-def random_forest(X, t):
-    clf = qda.QDA()
-    clf.fit(X, t)
-
-    def random_forest_predict(x):
-        return clf.predict_proba(x)[:, 1]
-
-    return random_forest_predict
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training_images = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    im.crop_to_landmarks_proportion_inplace(0.1)
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training_images.append(im)
-
-# build clms
-clm1 = CLMBuilder(classifier_trainers=[linear_svm_lr],
-                  patch_shape=(5, 5),
-                  features=[igo, sparse_hog, no_op],
-                  normalization_diagonal=150,
-                  n_levels=3,
-                  downscale=2,
-                  scaled_shape_models=False,
-                  max_shape_components=[1, 2, 3],
-                  boundary=3).build(training_images)
-
-clm2 = CLMBuilder(classifier_trainers=[random_forest, linear_svm_lr],
-                  patch_shape=(3, 10),
-                  features=[no_op, no_op],
-                  normalization_diagonal=None,
-                  n_levels=2,
-                  downscale=1.2,
-                  scaled_shape_models=True,
-                  max_shape_components=None,
-                  boundary=0).build(training_images)
-
-clm3 = CLMBuilder(classifier_trainers=[linear_svm_lr],
-                  patch_shape=(2, 3),
-                  features=igo,
-                  normalization_diagonal=None,
-                  n_levels=1,
-                  downscale=3,
-                  scaled_shape_models=True,
-                  max_shape_components=[1],
-                  boundary=2).build(training_images)
-
-
-@raises(ValueError)
-def test_classifier_type_1_exception():
-    CLMBuilder(classifier_trainers=[linear_svm_lr, linear_svm_lr]).build(
-        training_images)
-
-@raises(ValueError)
-def test_classifier_type_2_exception():
-    CLMBuilder(classifier_trainers=['linear_svm_lr']).build(training_images)
-
-@raises(ValueError)
-def test_patch_shape_1_exception():
-    CLMBuilder(patch_shape=(5, 1)).build(training_images)
-
-@raises(ValueError)
-def test_patch_shape_2_exception():
-    CLMBuilder(patch_shape=(5, 6, 7)).build(training_images)
-
-@raises(ValueError)
-def test_features_exception():
-    CLMBuilder(features=[igo, sparse_hog]).build(training_images)
-
-@raises(ValueError)
-def test_n_levels_exception():
-    clm = CLMBuilder(n_levels=0).build(training_images)
-
-
-@raises(ValueError)
-def test_downscale_exception():
-    clm = CLMBuilder(downscale=1).build(training_images)
-    assert (clm.downscale == 1)
-    CLMBuilder(downscale=0).build(training_images)
-
-
-@raises(ValueError)
-def test_normalization_diagonal_exception():
-    CLMBuilder(normalization_diagonal=10).build(training_images)
-
-
-@raises(ValueError)
-def test_max_shape_components_1_exception():
-    CLMBuilder(max_shape_components=[1, 0.2, 'a']).build(training_images)
-
-
-@raises(ValueError)
-def test_max_shape_components_2_exception():
-    CLMBuilder(max_shape_components=[1, 2]).build(training_images)
-
-
-@raises(ValueError)
-def test_boundary_exception():
-    CLMBuilder(boundary=-1).build(training_images)
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_verbose_mock(mock_stdout):
-    CLMBuilder().build(training_images, verbose=True)
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_str_mock(mock_stdout):
-    print(clm1)
-    print(clm2)
-    print(clm3)
-
-
-def test_clm_1():
-    assert (clm1.n_training_images == 4)
-    assert (clm1.n_levels == 3)
-    assert (clm1.downscale == 2)
-    #assert (clm1.features[0] == igo and clm1.features[2] is no_op)
-    assert_allclose(np.around(clm1.reference_shape.range()), (109., 103.))
-    assert (not clm1.scaled_shape_models)
-    assert (not clm1.pyramid_on_features)
-    assert_allclose(clm1.patch_shape, (5, 5))
-    assert_allclose([clm1.shape_models[j].n_components
-                     for j in range(clm1.n_levels)], (1, 2, 3))
-    assert_allclose(clm1.n_classifiers_per_level, [68, 68, 68])
-
-    ran_0 = np.random.randint(0, clm1.n_classifiers_per_level[0])
-    ran_1 = np.random.randint(0, clm1.n_classifiers_per_level[1])
-    ran_2 = np.random.randint(0, clm1.n_classifiers_per_level[2])
-
-    assert (name_of_callable(clm1.classifiers[0][ran_0])
-            == 'linear_svm_lr')
-    assert (name_of_callable(clm1.classifiers[1][ran_1])
-            == 'linear_svm_lr')
-    assert (name_of_callable(clm1.classifiers[2][ran_2])
-            == 'linear_svm_lr')
-
-
-def test_clm_2():
-    assert (clm2.n_training_images == 4)
-    assert (clm2.n_levels == 2)
-    assert (clm2.downscale == 1.2)
-    #assert (clm2.features[0] is no_op and clm2.features[1] is no_op)
-    assert_allclose(np.around(clm2.reference_shape.range()), (169., 161.))
-    assert clm2.scaled_shape_models
-    assert (not clm2.pyramid_on_features)
-    assert_allclose(clm2.patch_shape, (3, 10))
-    assert (np.all([clm2.shape_models[j].n_components == 3
-                    for j in range(clm2.n_levels)]))
-    assert_allclose(clm2.n_classifiers_per_level, [68, 68])
-
-    ran_0 = np.random.randint(0, clm2.n_classifiers_per_level[0])
-    ran_1 = np.random.randint(0, clm2.n_classifiers_per_level[1])
-
-    assert (name_of_callable(clm2.classifiers[0][ran_0])
-            == 'random_forest_predict')
-    assert (name_of_callable(clm2.classifiers[1][ran_1])
-            == 'linear_svm_lr')
-
-
-def test_clm_3():
-    assert (clm3.n_training_images == 4)
-    assert (clm3.n_levels == 1)
-    assert (clm3.downscale == 3)
-    #assert (clm3.features[0] == igo and len(clm3.features) == 1)
-    assert_allclose(np.around(clm3.reference_shape.range()), (169., 161.))
-    assert clm3.scaled_shape_models
-    assert clm3.pyramid_on_features
-    assert_allclose(clm3.patch_shape, (2, 3))
-    assert (np.all([clm3.shape_models[j].n_components == 1
-                    for j in range(clm3.n_levels)]))
-    assert_allclose(clm3.n_classifiers_per_level, [68])
-    ran_0 = np.random.randint(0, clm3.n_classifiers_per_level[0])
-
-    assert (name_of_callable(clm3.classifiers[0][ran_0])
-            == 'linear_svm_lr')
diff --git a/menpofit/test/clm_fitter_test.py b/menpofit/test/clm_fitter_test.py
deleted file mode 100644
index c2ef74c..0000000
--- a/menpofit/test/clm_fitter_test.py
+++ /dev/null
@@ -1,373 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-
-from mock import patch
-import numpy as np
-from numpy.testing import assert_allclose
-from nose.tools import raises
-from menpo.feature import sparse_hog
-
-import menpo.io as mio
-from menpo.shape.pointcloud import PointCloud
-from menpofit.clm import CLMBuilder
-from menpofit.clm import GradientDescentCLMFitter
-from menpofit.gradientdescent import RLMS
-from menpofit.clm.classifier import linear_svm_lr
-from menpofit.base import name_of_callable
-
-
-initial_shape = []
-initial_shape.append(PointCloud(np.array([[150.9737801, 1.85331141],
-                                          [191.20452708, 1.86714624],
-                                          [237.5088486, 7.16836457],
-                                          [280.68439528, 19.1356864],
-                                          [319.00988383, 36.18921029],
-                                          [351.31395982, 61.11002727],
-                                          [375.83681819, 86.68264647],
-                                          [401.50706656, 117.12858347],
-                                          [408.46977018, 156.72258055],
-                                          [398.49810436, 197.95690492],
-                                          [375.44584527, 234.437902],
-                                          [342.35427495, 267.96920594],
-                                          [299.04149064, 309.66693535],
-                                          [250.84207113, 331.07734674],
-                                          [198.46150259, 339.47188196],
-                                          [144.62222804, 337.84178783],
-                                          [89.92321435, 327.81734317],
-                                          [101.22474793, 26.90269773],
-                                          [89.23456877, 44.52571118],
-                                          [84.04683242, 66.6369272],
-                                          [86.36993557, 88.61559027],
-                                          [94.88123162, 108.04971327],
-                                          [88.08448274, 152.88439191],
-                                          [68.71150917, 176.94681489],
-                                          [55.7165906, 204.86028035],
-                                          [53.9169657, 232.87050281],
-                                          [69.08534014, 259.8486207],
-                                          [121.82883888, 130.79001073],
-                                          [152.30894887, 128.91266055],
-                                          [183.36381228, 128.04534764],
-                                          [216.59234031, 125.86784329],
-                                          [235.18182671, 93.18819461],
-                                          [242.46006172, 117.24575711],
-                                          [246.52987701, 142.46262589],
-                                          [240.51603561, 160.38006297],
-                                          [232.61083444, 175.36132625],
-                                          [137.35714406, 56.53012228],
-                                          [124.42060774, 67.0342585],
-                                          [121.98869265, 87.71006061],
-                                          [130.4421354, 105.16741493],
-                                          [139.32511836, 89.65144616],
-                                          [144.17935107, 69.97931719],
-                                          [125.04221953, 174.72789706],
-                                          [103.0127825, 188.96555839],
-                                          [97.38196408, 210.70911033],
-                                          [107.31622619, 232.4487582],
-                                          [119.12835959, 215.57040617],
-                                          [124.80355957, 193.64317941],
-                                          [304.3174261, 101.83559243],
-                                          [293.08249678, 116.76961123],
-                                          [287.11523488, 132.55435452],
-                                          [289.39839945, 148.49971074],
-                                          [283.59574087, 162.33458018],
-                                          [286.76478391, 187.30470094],
-                                          [292.65033117, 211.98694428],
-                                          [310.75841097, 187.33036207],
-                                          [319.06250309, 165.27131484],
-                                          [321.3339324, 148.86793045],
-                                          [321.82844973, 133.03866904],
-                                          [316.60228316, 115.15885333],
-                                          [303.45716953, 109.59946563],
-                                          [301.58563675, 135.32572565],
-                                          [298.16531481, 148.240518],
-                                          [295.39615418, 162.35992687],
-                                          [293.63384823, 201.35617245],
-                                          [301.95207707, 163.05299135],
-                                          [305.27555828, 148.48478086],
-                                          [306.41382116, 133.02994058]])))
-
-initial_shape.append(PointCloud(np.array([[33.08569962, 26.2373455],
-                                          [43.88613611, 26.24105964],
-                                          [56.31709803, 27.66423659],
-                                          [67.90810205, 30.87701063],
-                                          [78.19704859, 35.45523787],
-                                          [86.86947323, 42.14553624],
-                                          [93.45293474, 49.0108189],
-                                          [100.34442715, 57.18440338],
-                                          [102.21365016, 67.81389656],
-                                          [99.53663441, 78.88375569],
-                                          [93.34797327, 88.67752592],
-                                          [84.46413615, 97.67941492],
-                                          [72.83628901, 108.8736808],
-                                          [59.89656483, 114.62156782],
-                                          [45.83436002, 116.87518356],
-                                          [31.38054772, 116.43756484],
-                                          [16.69592792, 113.74637996],
-                                          [19.72996295, 32.96215989],
-                                          [16.51105259, 37.69327358],
-                                          [15.11834126, 43.62930018],
-                                          [15.74200674, 49.52974132],
-                                          [18.02696835, 54.74706954],
-                                          [16.20229791, 66.78348784],
-                                          [11.00138601, 73.24333984],
-                                          [7.51274105, 80.73705133],
-                                          [7.02960972, 88.25673842],
-                                          [11.10174551, 95.4993444],
-                                          [25.26138338, 60.85198075],
-                                          [33.44414202, 60.34798312],
-                                          [41.78120024, 60.11514235],
-                                          [50.70180534, 59.53056465],
-                                          [55.69238052, 50.75731293],
-                                          [57.6463118, 57.21586007],
-                                          [58.73890353, 63.98563718],
-                                          [57.12441419, 68.79579249],
-                                          [55.00216617, 72.817696],
-                                          [29.43014699, 40.91600468],
-                                          [25.95717546, 43.73596863],
-                                          [25.30429808, 49.2866408],
-                                          [27.57372827, 53.97328126],
-                                          [29.95847378, 49.80782952],
-                                          [31.26165197, 44.52660569],
-                                          [26.12405475, 72.64764418],
-                                          [20.20998272, 76.46991865],
-                                          [18.69832059, 82.30724133],
-                                          [21.36529486, 88.14351591],
-                                          [24.53640666, 83.6123157],
-                                          [26.05998356, 77.72568327],
-                                          [74.25267847, 53.07881273],
-                                          [71.23652416, 57.08803288],
-                                          [69.63453966, 61.32564044],
-                                          [70.24748314, 65.6063665],
-                                          [68.68968841, 69.32050656],
-                                          [69.54045681, 76.02404113],
-                                          [71.12050401, 82.6502915],
-                                          [75.9818397, 76.03093018],
-                                          [78.21117488, 70.10890893],
-                                          [78.82096788, 65.70521959],
-                                          [78.95372711, 61.4556606],
-                                          [77.55069872, 56.65560521],
-                                          [74.02173206, 55.16311953],
-                                          [73.51929617, 62.06964895],
-                                          [72.60106888, 65.53678304],
-                                          [71.85765381, 69.32731119],
-                                          [71.38454121, 79.79633067],
-                                          [73.61767156, 69.51337283],
-                                          [74.50990078, 65.60235839],
-                                          [74.81548138, 61.45331734]])))
-
-initial_shape.append(PointCloud(np.array([[46.63369884, 44.08764686],
-                                          [65.31491309, 44.09407109],
-                                          [86.81640178, 46.55570064],
-                                          [106.86503868, 52.11274643],
-                                          [124.66154301, 60.0315786],
-                                          [139.66199441, 71.6036014],
-                                          [151.04922447, 83.47828965],
-                                          [162.96924699, 97.61591112],
-                                          [166.20238999, 116.0014495],
-                                          [161.57203038, 135.14867658],
-                                          [150.86767554, 152.08868824],
-                                          [135.50154984, 167.65900498],
-                                          [115.38918643, 187.02141497],
-                                          [93.00770583, 196.9633751],
-                                          [68.68470174, 200.86139148],
-                                          [43.68434508, 200.10445456],
-                                          [18.28476712, 195.44958702],
-                                          [23.53265303, 55.71937105],
-                                          [17.9649934, 63.90264665],
-                                          [15.55605939, 74.17002657],
-                                          [16.63479621, 84.37585532],
-                                          [20.58703068, 93.40012265],
-                                          [17.43094904, 114.21918023],
-                                          [8.43507654, 125.39260635],
-                                          [2.4008645, 138.35427044],
-                                          [1.56520568, 151.36086382],
-                                          [8.60866558, 163.88819772],
-                                          [33.10019692, 103.95961759],
-                                          [47.25368667, 103.08786691],
-                                          [61.67406413, 102.68512872],
-                                          [77.10378638, 101.67400095],
-                                          [85.7358453, 86.49915174],
-                                          [89.11550583, 97.67032089],
-                                          [91.00533132, 109.37981584],
-                                          [88.21279407, 117.69980754],
-                                          [84.54200076, 124.65638206],
-                                          [40.31079125, 69.47691491],
-                                          [34.3036891, 74.35452803],
-                                          [33.17442528, 83.95537112],
-                                          [37.09979548, 92.06172262],
-                                          [41.22462339, 84.85685672],
-                                          [43.47869442, 75.72207092],
-                                          [34.59233557, 124.36224816],
-                                          [24.36292985, 130.97352987],
-                                          [21.74824996, 141.07018437],
-                                          [26.36124109, 151.16502601],
-                                          [31.84622487, 143.32753518],
-                                          [34.48151342, 133.14559097],
-                                          [117.83907583, 90.5145853],
-                                          [112.62211772, 97.44922176],
-                                          [109.85120974, 104.77889356],
-                                          [110.911401, 112.18314623],
-                                          [108.21692684, 118.60739086],
-                                          [109.68847724, 130.20230795],
-                                          [112.4214409, 141.66354869],
-                                          [120.82995787, 130.21422374],
-                                          [124.68597685, 119.97106848],
-                                          [125.74071883, 112.35412967],
-                                          [125.97034877, 105.00378581],
-                                          [123.54356964, 96.70126365],
-                                          [117.43961426, 94.11975273],
-                                          [116.5705649, 106.06578435],
-                                          [114.98233273, 112.06278965],
-                                          [113.69646838, 118.61916064],
-                                          [112.87813868, 136.72713211],
-                                          [116.74072208, 118.94098628],
-                                          [118.2839861, 112.17621352],
-                                          [118.81254036, 104.99973274]])))
-
-initial_shape.append(PointCloud(np.array([[109.7313602, 59.79617265],
-                                          [148.98369157, 59.80967103],
-                                          [194.16188757, 64.98196322],
-                                          [236.28740084, 76.65823864],
-                                          [273.68080984, 93.2970192],
-                                          [305.19924763, 117.61175954],
-                                          [329.12570774, 142.56245019],
-                                          [354.17165322, 172.2679391],
-                                          [360.96502322, 210.89900639],
-                                          [351.23586926, 251.13050805],
-                                          [328.74424331, 286.72428381],
-                                          [296.45746314, 319.44010324],
-                                          [254.19804988, 360.12373989],
-                                          [207.17084485, 381.01344803],
-                                          [156.06417675, 389.20382735],
-                                          [103.53427849, 387.61337726],
-                                          [50.16555004, 377.83272806],
-                                          [61.19222924, 84.23635551],
-                                          [49.49365238, 101.43077559],
-                                          [44.43208227, 123.00424473],
-                                          [46.69868733, 144.44838462],
-                                          [55.00298785, 163.40986789],
-                                          [48.37153655, 207.15416287],
-                                          [29.46971554, 230.63138542],
-                                          [16.79083463, 257.86599274],
-                                          [15.03497678, 285.19500392],
-                                          [29.83445491, 311.5170114],
-                                          [81.29522673, 185.59711915],
-                                          [111.03405753, 183.7654263],
-                                          [141.3336637, 182.91920653],
-                                          [173.75407076, 180.79465929],
-                                          [191.89145908, 148.90978278],
-                                          [198.99268671, 172.38226306],
-                                          [202.96352371, 196.98585519],
-                                          [197.0959395, 214.46753849],
-                                          [189.38299358, 229.08445602],
-                                          [96.44588204, 113.14323827],
-                                          [83.82396352, 123.3919129],
-                                          [81.45119284, 143.56487752],
-                                          [89.69904705, 160.59766732],
-                                          [98.36599502, 145.45904839],
-                                          [103.10217233, 126.26534747],
-                                          [84.43045765, 228.46643189],
-                                          [62.93677864, 242.35783193],
-                                          [57.44290226, 263.57257862],
-                                          [67.13556217, 284.78351617],
-                                          [78.66042335, 268.31564727],
-                                          [84.19760192, 246.92169276],
-                                          [259.34567409, 157.34687506],
-                                          [248.38397932, 171.9176971],
-                                          [242.5618418, 187.31855393],
-                                          [244.78947959, 202.87611756],
-                                          [239.12794222, 216.37452165],
-                                          [242.21991383, 240.73736669],
-                                          [247.96232402, 264.81933552],
-                                          [265.63001359, 240.76240374],
-                                          [273.7321494, 219.23983464],
-                                          [275.94833733, 203.23538213],
-                                          [276.43082796, 187.79108987],
-                                          [271.33176225, 170.34611298],
-                                          [258.50633904, 164.92193012],
-                                          [256.68032211, 190.02252505],
-                                          [253.34318274, 202.62322841],
-                                          [250.64136836, 216.39925191],
-                                          [248.92192186, 254.44710508],
-                                          [257.03785057, 217.075461],
-                                          [260.28050441, 202.86155077],
-                                          [261.39108462, 187.78257369]])))
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training_images = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    im.crop_to_landmarks_proportion_inplace(0.1)
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training_images.append(im)
-
-# build clm
-clm = CLMBuilder(classifier_trainers=linear_svm_lr,
-                 patch_shape=(8, 8),
-                 features=sparse_hog,
-                 normalization_diagonal=100,
-                 n_levels=2,
-                 downscale=1.1,
-                 scaled_shape_models=True,
-                 max_shape_components=[2, 2],
-                 boundary=3).build(training_images)
-
-
-def test_clm():
-    assert (clm.n_training_images == 4)
-    assert (clm.n_levels == 2)
-    assert (clm.downscale == 1.1)
-    #assert (clm.features[0] == sparse_hog and len(clm.features) == 1)
-    assert_allclose(np.around(clm.reference_shape.range()), (72.,  69.))
-    assert clm.scaled_shape_models
-    assert clm.pyramid_on_features
-    assert_allclose(clm.patch_shape, (8, 8))
-    assert_allclose([clm.shape_models[j].n_components
-                     for j in range(clm.n_levels)], (2, 2))
-    assert_allclose(clm.n_classifiers_per_level, [68, 68])
-
-    ran_0 = np.random.randint(0, clm.n_classifiers_per_level[0])
-    ran_1 = np.random.randint(0, clm.n_classifiers_per_level[1])
-
-    assert (name_of_callable(clm.classifiers[0][ran_0])
-            == 'linear_svm_lr')
-    assert (name_of_callable(clm.classifiers[1][ran_1])
-            == 'linear_svm_lr')
-
-
-@raises(ValueError)
-def test_n_shape_1_exception():
-    fitter = GradientDescentCLMFitter(clm, n_shape=[3, 6, 'a'])
-
-
-@raises(ValueError)
-def test_n_shape_2_exception():
-    fitter = GradientDescentCLMFitter(clm, n_shape=[10, 20, 3])
-
-
-def test_perturb_shape():
-    fitter = GradientDescentCLMFitter(clm)
-    s = fitter.perturb_shape(training_images[0].landmarks[None].lms,
-                             noise_std=0.08, rotation=False)
-    assert (s.n_dims == 2)
-    assert (s.n_landmark_groups == 0)
-    assert (s.n_points == 68)
-
-
-@raises(ValueError)
-def test_max_iters_exception():
-    fitter = GradientDescentCLMFitter(clm)
-    fitter.fit(training_images[0], initial_shape[0],
-               max_iters=[10, 20, 30, 40])
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_str_mock(mock_stdout):
-    print(clm)
-    fitter = GradientDescentCLMFitter(
-        clm, algorithm=RLMS)
-    print(fitter)
diff --git a/menpofit/test/fitmulitlevel_base_test.py b/menpofit/test/fitmulitlevel_base_test.py
deleted file mode 100644
index 3df3c19..0000000
--- a/menpofit/test/fitmulitlevel_base_test.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from menpo.feature import sparse_hog, igo
-
-from menpofit.base import (is_pyramid_on_features,
-                                         name_of_callable)
-
-
-class Foo():
-    def __call__(self):
-        pass
-
-
-def test_is_pyramid_on_features_true():
-    assert is_pyramid_on_features(igo)
-
-
-def test_is_pyramid_on_features_false():
-    assert not is_pyramid_on_features([igo, sparse_hog])
-
-
-def test_name_of_callable_partial():
-    assert name_of_callable(sparse_hog) == 'sparse_hog'
-
-
-def test_name_of_callable_function():
-    assert name_of_callable(igo) == 'igo'
-
-
-def test_name_of_callable_object_with_call():
-    assert name_of_callable(Foo()) == 'Foo'
diff --git a/menpofit/test/fittingresult_test.py b/menpofit/test/fittingresult_test.py
deleted file mode 100644
index 311de16..0000000
--- a/menpofit/test/fittingresult_test.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import numpy as np
-from numpy.testing import assert_approx_equal
-from nose.plugins.attrib import attr
-from nose.tools import raises
-from mock import MagicMock
-from menpo.shape import PointCloud
-from menpo.testing import is_same_array
-from menpo.image import MaskedImage
-
-from menpofit.fittingresult import FittingResult, NonParametricFittingResult
-
-
-class MockedFittingResult(FittingResult):
-
-    def __init__(self, gt_shape=None):
-        FittingResult.__init__(self, MaskedImage.init_blank((10, 10)),
-                               gt_shape=gt_shape)
-    @property
-    def n_iters(self):
-        return 1
-
-    @property
-    def shapes(self):
-        return [PointCloud(np.ones([3, 2]))]
-
-    @property
-    def final_shape(self):
-        return PointCloud(np.ones([3, 2]))
-
-    @property
-    def initial_shape(self):
-        return PointCloud(np.ones([3, 2]))
-
-
-@attr('fuzzy')
-def test_fittingresult_errors_me_norm():
-    pcloud = PointCloud(np.array([[1., 2], [3, 4], [5, 6]]))
-    fr = MockedFittingResult(gt_shape=pcloud)
-
-    assert_approx_equal(fr.errors()[0], 0.9173896)
-
-
-@raises(ValueError)
-def test_fittingresult_errors_no_gt():
-    fr = MockedFittingResult()
-    fr.errors()
-
-
-def test_fittingresult_gt_shape():
-    pcloud = PointCloud(np.ones([3, 2]))
-    fr = MockedFittingResult(gt_shape=pcloud)
-    assert (is_same_array(fr.gt_shape.points, pcloud.points))
-
-
-@attr('fuzzy')
-def test_fittingresult_final_error_me_norm():
-    pcloud = PointCloud(np.array([[1., 2], [3, 4], [5, 6]]))
-    fr = MockedFittingResult(gt_shape=pcloud)
-
-    assert_approx_equal(fr.final_error(), 0.9173896)
-
-
-@raises(ValueError)
-def test_fittingresult_final_error_no_gt():
-    fr = MockedFittingResult()
-    fr.final_error()
-
-
-@attr('fuzzy')
-def test_fittingresult_initial_error_me_norm():
-    pcloud = PointCloud(np.array([[1., 2], [3, 4], [5, 6]]))
-    fr = MockedFittingResult(gt_shape=pcloud)
-
-    assert_approx_equal(fr.initial_error(), 0.9173896)
-
-
-@raises(ValueError)
-def test_fittingresult_initial_error_no_gt():
-    fr = MockedFittingResult()
-    fr.initial_error()
-
-
-def test_nonpara_fittingresult_as_serialized():
-    image = MagicMock()
-    fitter = MagicMock()
-    parameters = [MagicMock()]
-    gt_shape = MagicMock()
-    fr = NonParametricFittingResult(image, fitter, parameters=parameters,
-                                    gt_shape=gt_shape)
-    s_fr = fr.as_serializable()
-
-    image.copy.assert_called_once()
-    parameters[0].copy.assert_called_once()
-    gt_shape.copy.assert_called_once()
-
-
-def test_nonpara_fittingresult_as_serialized():
-    image = MagicMock()
-    fitter = MagicMock()
-    parameters = [MagicMock()]
-    gt_shape = MagicMock()
-    fr = NonParametricFittingResult(image, fitter, parameters=parameters,
-                                    gt_shape=gt_shape)
-    s_fr = fr.as_serializable()
-
-    image.copy.assert_called_once()
-    parameters[0].copy.assert_called_once()
-    gt_shape.copy.assert_called_once()
\ No newline at end of file
diff --git a/menpofit/test/multilevel_fittingresult_test.py b/menpofit/test/multilevel_fittingresult_test.py
deleted file mode 100644
index 5bdca7a..0000000
--- a/menpofit/test/multilevel_fittingresult_test.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from mock import MagicMock
-from menpofit.fittingresult import MultilevelFittingResult
-
-
-def test_multilevel_fittingresult_as_serialized():
-    image = MagicMock()
-    multiple_fitter = MagicMock()
-    fitting_results = [MagicMock()]
-    affine_correction = MagicMock()
-    gt_shape = MagicMock()
-    fr = MultilevelFittingResult(image, multiple_fitter, fitting_results,
-                                 affine_correction, gt_shape=gt_shape)
-    s_fr = fr.as_serializable()
-
-    image.copy.assert_called_once()
-    fitting_results[0].as_serialized.assert_called_once()
-    affine_correction.copy.assert_called_once()
-    gt_shape.copy.assert_called_once()
diff --git a/menpofit/test/sdm_test.py b/menpofit/test/sdm_test.py
deleted file mode 100644
index e54bbf2..0000000
--- a/menpofit/test/sdm_test.py
+++ /dev/null
@@ -1,114 +0,0 @@
-try:
-    from StringIO import StringIO
-except ImportError:
-    from io import StringIO
-
-from mock import patch
-from nose.tools import raises
-import numpy as np
-from menpo.feature import sparse_hog, igo, no_op
-from menpo.transform import PiecewiseAffine
-
-import menpo.io as mio
-from menpo.landmark import ibug_face_68_trimesh
-from menpofit.sdm import SDMTrainer, SDAAMTrainer
-from menpofit.clm.classifier import linear_svm_lr
-from menpofit.regression.regressors import mlr_svd
-from menpofit.aam import AAMBuilder
-from menpofit.clm import CLMBuilder
-
-
-# load images
-filenames = ['breakingbad.jpg', 'takeo.ppm', 'lenna.png', 'einstein.jpg']
-training_images = []
-for i in range(4):
-    im = mio.import_builtin_asset(filenames[i])
-    im.crop_to_landmarks_proportion_inplace(0.1)
-    if im.n_channels == 3:
-        im = im.as_greyscale(mode='luminosity')
-    training_images.append(im)
-
-# Seed the random number generator
-np.random.seed(seed=1000)
-
-template_trilist_image = training_images[0].landmarks[None]
-trilist = ibug_face_68_trimesh(template_trilist_image)[1].lms.trilist
-aam = AAMBuilder(features=sparse_hog,
-                 transform=PiecewiseAffine,
-                 trilist=trilist,
-                 normalization_diagonal=150,
-                 n_levels=3,
-                 downscale=1.2,
-                 scaled_shape_models=False,
-                 max_shape_components=None,
-                 max_appearance_components=3,
-                 boundary=3).build(training_images)
-
-clm = CLMBuilder(classifier_trainers=linear_svm_lr,
-                 features=sparse_hog,
-                 normalization_diagonal=100,
-                 patch_shape=(5, 5),
-                 n_levels=1,
-                 downscale=1.1,
-                 scaled_shape_models=True,
-                 max_shape_components=25,
-                 boundary=3).build(training_images)
-
-@raises(ValueError)
-def test_features_exception():
-    sdm = SDMTrainer(features=[igo, sparse_hog],
-                     n_levels=3).train(training_images)
-
-
-@raises(ValueError)
-def test_regression_features_sdmtrainer_exception_1():
-    sdm = SDMTrainer(n_levels=2, regression_features=[no_op, no_op, no_op]).\
-        train(training_images)
-
-
-@raises(ValueError)
-def test_regression_features_sdmtrainer_exception_2():
-    sdm = SDMTrainer(n_levels=3, regression_features=[no_op, sparse_hog, 1]).\
-        train(training_images)
-
-
-@raises(ValueError)
-def test_regression_features_sdaamtrainer_exception_1():
-    sdm = SDAAMTrainer(aam, regression_features=[no_op, sparse_hog]).\
-        train(training_images)
-
-
-@raises(ValueError)
-def test_regression_features_sdaamtrainer_exception_2():
-    sdm = SDAAMTrainer(aam, regression_features=7).\
-        train(training_images)
-
-
-@raises(ValueError)
-def test_n_levels_exception():
-    sdm = SDMTrainer(n_levels=0).train(training_images)
-
-
-@raises(ValueError)
-def test_downscale_exception():
-    sdm = SDMTrainer(downscale=0).train(training_images)
-
-
-@raises(ValueError)
-def test_n_perturbations_exception():
-    sdm = SDAAMTrainer(aam, n_perturbations=-10).train(training_images)
-
-
-@patch('sys.stdout', new_callable=StringIO)
-def test_verbose_mock(mock_stdout):
-    sdm = SDMTrainer(regression_type=mlr_svd,
-                     regression_features=sparse_hog,
-                     patch_shape=(16, 16),
-                     features=no_op,
-                     normalization_diagonal=150,
-                     n_levels=1,
-                     downscale=1.3,
-                     noise_std=0.04,
-                     rotation=False,
-                     n_perturbations=2).train(training_images,
-                                              verbose=True)
diff --git a/menpofit/transform/__init__.py b/menpofit/transform/__init__.py
index 4c6c12b..2aee5a1 100644
--- a/menpofit/transform/__init__.py
+++ b/menpofit/transform/__init__.py
@@ -1,4 +1,4 @@
-from .modeldriven import ModelDrivenTransform, OrthoMDTransform
+from .modeldriven import OrthoMDTransform, LinearOrthoMDTransform
 from .homogeneous import (DifferentiableAffine, DifferentiableSimilarity,
                           DifferentiableAlignmentSimilarity,
                           DifferentiableAlignmentAffine)
diff --git a/menpofit/transform/modeldriven.py b/menpofit/transform/modeldriven.py
index ddf8d00..42c1993 100644
--- a/menpofit/transform/modeldriven.py
+++ b/menpofit/transform/modeldriven.py
@@ -1,11 +1,12 @@
 import numpy as np
-
 from menpo.base import Targetable, Vectorizable
+from menpo.shape import PointCloud
 from menpofit.modelinstance import PDM, GlobalPDM, OrthoPDM
 from menpo.transform.base import Transform, VComposable, VInvertible
 from menpofit.differentiable import DP
 
 
+# TODO: Should MDT implement VComposable and VInvertible?
 class ModelDrivenTransform(Transform, Targetable, Vectorizable,
                            VComposable, VInvertible, DP):
     r"""
@@ -135,22 +136,17 @@ def compose_after_from_vector_inplace(self, delta):
         r"""
         Composes two ModelDrivenTransforms together based on the
         first order approximation proposed by Papandreou and Maragos in [1].
-
         Parameters
         ----------
         delta : (N,) ndarray
             Vectorized :class:`ModelDrivenTransform` to be applied **before**
             self
-
         Returns
         --------
         transform : self
             self, updated to the result of the composition
-
-
         References
         ----------
-
         .. [1] G. Papandreou and P. Maragos, "Adaptive and Constrained
                Algorithms for Inverse Compositional Active Appearance Model
                Fitting", CVPR08
@@ -176,15 +172,6 @@ def compose_after_from_vector_inplace(self, delta):
         # (n_points, n_params, n_dims)
         dW_dx_dW_dp_0 = np.einsum('ijk, ilk -> eilk', dW_dx, dW_dp_0)
 
-        #TODO: Can we do this without splitting across the two dimensions?
-        # dW_dx_x = dW_dx[:, 0, :].flatten()[..., None]
-        # dW_dx_y = dW_dx[:, 1, :].flatten()[..., None]
-        # dW_dp_0_mat = np.reshape(dW_dp_0, (n_points * self.n_dims,
-        #                                    self.n_parameters))
-        # dW_dx_dW_dp_0 = dW_dp_0_mat * dW_dx_x + dW_dp_0_mat * dW_dx_y
-        # dW_dx_dW_dp_0 = np.reshape(dW_dx_dW_dp_0,
-        #                            (n_points, self.n_parameters, self.n_dims))
-
         # (n_params, n_params)
         J = np.einsum('ijk, ilk -> jl', dW_dp, dW_dx_dW_dp_0)
         # (n_params, n_params)
@@ -203,19 +190,14 @@ def _build_pseudoinverse(self):
     def pseudoinverse_vector(self, vector):
         r"""
         The vectorized pseudoinverse of a provided vector instance.
-
         Syntactic sugar for
-
         self.from_vector(vector).pseudoinverse.as_vector()
-
         On ModelDrivenTransform this is especially fast - we just negate the
         vector provided.
-
         Parameters
         ----------
         vector :  (P,) ndarray
             A vectorized version of self
-
         Returns
         -------
         pseudoinverse_vector : (N,) ndarray
@@ -262,7 +244,6 @@ def d_dp(self, points):
 
         # dW_dl:  n_points x (n_dims) x n_centres x n_dims
         # dX_dp:  (n_points x n_dims) x n_params
-        
 
         # The following is equivalent to
         # np.einsum('ild, lpd -> ipd', self.dW_dl, dX_dp)
@@ -273,6 +254,47 @@ def d_dp(self, points):
 
         return dW_dp
 
+    def Jp(self):
+        r"""
+        Compute parameters' Jacobian.
+
+        References
+        ----------
+
+        .. [1] G. Papandreou and P. Maragos, "Adaptive and Constrained
+               Algorithms for Inverse Compositional Active Appearance Model
+               Fitting", CVPR08
+        """
+        # the incremental warp is always evaluated at p=0, ie the mean shape
+        points = self.pdm.model.mean().points
+
+        # compute:
+        #   - dW/dp when p=0
+        #   - dW/dp when p!=0
+        #   - dW/dx when p!=0 evaluated at the source landmarks
+
+        # dW/dp when p=0 and when p!=0 are the same and simply given by
+        # the Jacobian of the model
+        # (n_points, n_params, n_dims)
+        dW_dp_0 = self.pdm.d_dp(points)
+        # (n_points, n_params, n_dims)
+        dW_dp = dW_dp_0
+
+        # (n_points, n_dims, n_dims)
+        dW_dx = self.transform.d_dx(points)
+
+        # (n_points, n_params, n_dims)
+        dW_dx_dW_dp_0 = np.einsum('ijk, ilk -> eilk', dW_dx, dW_dp_0)
+
+        # (n_params, n_params)
+        J = np.einsum('ijk, ilk -> jl', dW_dp, dW_dx_dW_dp_0)
+        # (n_params, n_params)
+        H = np.einsum('ijk, ilk -> jl', dW_dp, dW_dp)
+        # (n_params, n_params)
+        Jp = np.linalg.solve(H, J)
+
+        return Jp
+
 
 # noinspection PyMissingConstructor
 class GlobalMDTransform(ModelDrivenTransform):
@@ -326,18 +348,76 @@ def compose_after_from_vector_inplace(self, delta):
         r"""
         Composes two ModelDrivenTransforms together based on the
         first order approximation proposed by Papandreou and Maragos in [1].
-
         Parameters
         ----------
         delta : (N,) ndarray
             Vectorized :class:`ModelDrivenTransform` to be applied **before**
             self
-
         Returns
         --------
         transform : self
             self, updated to the result of the composition
+        References
+        ----------
+        .. [1] G. Papandreou and P. Maragos, "Adaptive and Constrained
+               Algorithms for Inverse Compositional Active Appearance Model
+               Fitting", CVPR08
+        """
+        # the incremental warp is always evaluated at p=0, ie the mean shape
+        points = self.pdm.model.mean().points
+
+        # compute:
+        #   - dW/dp when p=0
+        #   - dW/dp when p!=0
+        #   - dW/dx when p!=0 evaluated at the source landmarks
+
+        # dW/dq when p=0 and when p!=0 are the same and given by the
+        # Jacobian of the global transform evaluated at the mean of the
+        # model
+        # (n_points, n_global_params, n_dims)
+        dW_dq = self.pdm._global_transform_d_dp(points)
+
+        # dW/db when p=0, is the Jacobian of the model
+        # (n_points, n_weights, n_dims)
+        dW_db_0 = PDM.d_dp(self.pdm, points)
+
+        # dW/dp when p=0, is simply the concatenation of the previous
+        # two terms
+        # (n_points, n_params, n_dims)
+        dW_dp_0 = np.hstack((dW_dq, dW_db_0))
+
+        # by application of the chain rule dW_db when p!=0,
+        # is the Jacobian of the global transform wrt the points times
+        # the Jacobian of the model: dX(S)/db = dX/dS *  dS/db
+        # (n_points, n_dims, n_dims)
+        dW_dS = self.pdm.global_transform.d_dx(points)
+        # (n_points, n_weights, n_dims)
+        dW_db = np.einsum('ilj, idj -> idj', dW_dS, dW_db_0)
+
+        # dW/dp is simply the concatenation of dW_dq with dW_db
+        # (n_points, n_params, n_dims)
+        dW_dp = np.hstack((dW_dq, dW_db))
+
+        # dW/dx is the jacobian of the transform evaluated at the source
+        # landmarks
+        # (n_points, n_dims, n_dims)
+        dW_dx = self.transform.d_dx(points)
+
+        # (n_points, n_params, n_dims)
+        dW_dx_dW_dp_0 = np.einsum('ijk, ilk -> ilk', dW_dx, dW_dp_0)
+
+        # (n_params, n_params)
+        J = np.einsum('ijk, ilk -> jl', dW_dp, dW_dx_dW_dp_0)
+        # (n_params, n_params)
+        H = np.einsum('ijk, ilk -> jl', dW_dp, dW_dp)
+        # (n_params, n_params)
+        Jp = np.linalg.solve(H, J)
 
+        self.from_vector_inplace(self.as_vector() + np.dot(Jp, delta))
+
+    def Jp(self):
+        r"""
+        Compute parameters Jacobian.
 
         References
         ----------
@@ -389,16 +469,6 @@ def compose_after_from_vector_inplace(self, delta):
         # (n_points, n_params, n_dims)
         dW_dx_dW_dp_0 = np.einsum('ijk, ilk -> ilk', dW_dx, dW_dp_0)
 
-        #TODO: Can we do this without splitting across the two dimensions?
-        # dW_dx_x = dW_dx[:, 0, :].flatten()[..., None]
-        # dW_dx_y = dW_dx[:, 1, :].flatten()[..., None]
-        # dW_dp_0_mat = np.reshape(dW_dp_0, (n_points * self.n_dims,
-        #                                    self.n_parameters))
-        # dW_dx_dW_dp_0 = dW_dp_0_mat * dW_dx_x + dW_dp_0_mat * dW_dx_y
-        # # (n_points, n_params, n_dims)
-        # dW_dx_dW_dp_0 = np.reshape(dW_dx_dW_dp_0,
-        #                            (n_points, self.n_parameters, self.n_dims))
-
         # (n_params, n_params)
         J = np.einsum('ijk, ilk -> jl', dW_dp, dW_dx_dW_dp_0)
         # (n_params, n_params)
@@ -406,7 +476,7 @@ def compose_after_from_vector_inplace(self, delta):
         # (n_params, n_params)
         Jp = np.linalg.solve(H, J)
 
-        self.from_vector_inplace(self.as_vector() + np.dot(Jp, delta))
+        return Jp
 
 
 # noinspection PyMissingConstructor
@@ -450,8 +520,47 @@ class OrthoMDTransform(GlobalMDTransform):
         The source landmarks of the transform. If no `source` is provided the
         mean of the model is used.
     """
-    def __init__(self, model, transform_cls, global_transform, source=None):
-        self.pdm = OrthoPDM(model, global_transform)
+    def __init__(self, model, transform_cls, source=None):
+        self.pdm = OrthoPDM(model)
         self._cached_points = None
         self.transform = transform_cls(source, self.target)
 
+
+# TODO: document me!
+class LinearOrthoMDTransform(OrthoPDM, Transform):
+    r"""
+    """
+    def __init__(self, model, sparse_instance):
+        super(LinearOrthoMDTransform, self).__init__(model)
+        self._sparse_instance = sparse_instance
+        self.W = np.vstack((self.similarity_model.components,
+                            self.model.components))
+        V = self.W[:, :self.n_dims*self.n_landmarks]
+        self.pinv_V = np.linalg.pinv(V)
+
+    @property
+    def n_landmarks(self):
+        return self._sparse_instance.n_points
+
+    @property
+    def dense_target(self):
+        return PointCloud(self.target.points[self.n_landmarks:])
+
+    @property
+    def sparse_target(self):
+        sparse_target = PointCloud(self.target.points[:self.n_landmarks])
+        return self._sparse_instance.from_vector(sparse_target.as_vector())
+
+    def set_target(self, target):
+        if target.n_points == self.n_landmarks:
+            # densify target
+            target = np.dot(np.dot(target.as_vector(), self.pinv_V), self.W)
+            target = PointCloud(np.reshape(target, (-1, self.n_dims)))
+        OrthoPDM.set_target(self, target)
+
+    def _apply(self, _, **kwargs):
+        return self.target.points[self.n_landmarks:]
+
+    def d_dp(self, _):
+        return OrthoPDM.d_dp(self, _)[self.n_landmarks:, ...]
+
diff --git a/menpofit/visualize/__init__.py b/menpofit/visualize/__init__.py
index 6aaea70..30039c3 100644
--- a/menpofit/visualize/__init__.py
+++ b/menpofit/visualize/__init__.py
@@ -5,3 +5,4 @@
                           visualize_fitting_result)
 except ImportError:
     pass
+from .textutils import print_progress
diff --git a/menpofit/visualize/textutils.py b/menpofit/visualize/textutils.py
new file mode 100644
index 0000000..ce49bad
--- /dev/null
+++ b/menpofit/visualize/textutils.py
@@ -0,0 +1,24 @@
+from menpo.visualize import print_progress as menpo_print_progress
+
+
+def print_progress(iterable, prefix='', n_items=None, offset=0,
+                   show_bar=True, show_count=True, show_eta=True,
+                   end_with_newline=True, verbose=True):
+    r"""
+    Please see the menpo ``print_progress`` documentation.
+
+    This method is identical to the print progress method, but adds a verbose
+    flag which allows the printing to be skipped if necessary.
+    """
+    if verbose:
+        # Yield the images from the menpo print_progress (yield from would
+        # be perfect here :( )
+        for i in menpo_print_progress(iterable, prefix=prefix, n_items=n_items,
+                                      offset=offset, show_bar=show_bar,
+                                      show_count=show_count, show_eta=show_eta,
+                                      end_with_newline=end_with_newline):
+            yield i
+    else:
+        # Skip the verbosity!
+        for i in iterable:
+            yield i
diff --git a/menpofit/visualize/widgets/base.py b/menpofit/visualize/widgets/base.py
index 8dd5b22..9bd0170 100644
--- a/menpofit/visualize/widgets/base.py
+++ b/menpofit/visualize/widgets/base.py
@@ -3,7 +3,7 @@
 import matplotlib.pyplot as plt
 from matplotlib import collections as mc
 
-import IPython.html.widgets as ipywidgets
+import ipywidgets
 import IPython.display as ipydisplay
 
 from menpo.visualize.widgets import (RendererOptionsWidget,
@@ -31,7 +31,7 @@ def _check_n_parameters(n_params, n_levels, max_n_params):
     r"""
     Checks the maximum number of components per level either of the shape
     or the appearance model. It must be ``None`` or `int` or `float` or a `list`
-    of those containing ``1`` or ``n_levels`` elements.
+    of those containing ``1`` or ``n_scales`` elements.
     """
     str_error = ("n_params must be None or 1 <= int <= max_n_params or "
                  "a list of those containing 1 or {} elements").format(n_levels)
@@ -128,7 +128,7 @@ def visualize_shape_model(shape_model, n_parameters=5, mode='multiple',
     max_n_params = [sp.n_active_components for sp in shape_model]
 
     # Check the given number of parameters (the returned n_parameters is a list
-    # of len n_levels)
+    # of len n_scales)
     n_parameters = _check_n_parameters(n_parameters, n_levels, max_n_params)
 
     # Initial options dictionaries
@@ -213,7 +213,7 @@ def render_function(name, value):
                 axes_font_weight=tmp3['axes_font_weight'],
                 axes_x_limits=tmp3['axes_x_limits'],
                 axes_y_limits=tmp3['axes_y_limits'],
-                figure_size=new_figure_size, label=None)
+                figure_size=new_figure_size)
 
             # Invert y axis if needed
             if mean_wid.value and axes_mode_wid.value == 1:
@@ -247,7 +247,7 @@ def render_function(name, value):
                 axes_font_weight=tmp3['axes_font_weight'],
                 axes_x_limits=tmp3['axes_x_limits'],
                 axes_y_limits=tmp3['axes_y_limits'],
-                figure_size=new_figure_size, label=None)
+                figure_size=new_figure_size)
 
             # Render vectors
             ax = plt.gca()
@@ -326,11 +326,11 @@ def plot_variance(name):
     mode_dict = OrderedDict()
     mode_dict['Deformation'] = 1
     mode_dict['Vectors'] = 2
-    mode_wid = ipywidgets.RadioButtonsWidget(options=mode_dict,
-                                             description='Mode:', value=1)
+    mode_wid = ipywidgets.RadioButtons(options=mode_dict,
+                                       description='Mode:', value=1)
     mode_wid.on_trait_change(render_function, 'value')
-    mean_wid = ipywidgets.CheckboxWidget(value=False,
-                                         description='Render mean shape')
+    mean_wid = ipywidgets.Checkbox(value=False,
+                                   description='Render mean shape')
     mean_wid.on_trait_change(render_function, 'value')
 
     # Function that controls mean shape checkbox visibility
@@ -346,7 +346,7 @@ def mean_visible(name, value):
         mode=mode, params_bounds=parameters_bounds, params_step=0.1,
         plot_variance_visible=True, plot_variance_function=plot_variance,
         style=model_parameters_style)
-    axes_mode_wid = ipywidgets.RadioButtonsWidget(
+    axes_mode_wid = ipywidgets.RadioButtons(
         options={'Image': 1, 'Point cloud': 2}, description='Axes mode:',
         value=2)
     axes_mode_wid.on_trait_change(render_function, 'value')
@@ -381,7 +381,7 @@ def update_widgets(name, value):
                 radio_str["Level {} (high)".format(l)] = l
             else:
                 radio_str["Level {}".format(l)] = l
-        level_wid = ipywidgets.RadioButtonsWidget(
+        level_wid = ipywidgets.RadioButtons(
             options=radio_str, description='Pyramid:', value=0)
         level_wid.on_trait_change(update_widgets, 'value')
         level_wid.on_trait_change(render_function, 'value')
@@ -487,7 +487,7 @@ def visualize_appearance_model(appearance_model, n_parameters=5,
     max_n_params = [ap.n_active_components for ap in appearance_model]
 
     # Check the given number of parameters (the returned n_parameters is a list
-    # of len n_levels)
+    # of len n_scales)
     n_parameters = _check_n_parameters(n_parameters, n_levels, max_n_params)
 
     # Find initial groups and labels that will be passed to the landmark options
@@ -718,7 +718,7 @@ def update_widgets(name, value):
                 radio_str["Level {} (high)".format(l)] = l
             else:
                 radio_str["Level {}".format(l)] = l
-        level_wid = ipywidgets.RadioButtonsWidget(
+        level_wid = ipywidgets.RadioButtons(
             options=radio_str, description='Pyramid:', value=0)
         level_wid.on_trait_change(update_widgets, 'value')
         level_wid.on_trait_change(render_function, 'value')
@@ -790,7 +790,7 @@ def visualize_aam(aam, n_shape_parameters=5, n_appearance_parameters=5,
     print('Initializing...')
 
     # Get the number of levels
-    n_levels = aam.n_levels
+    n_levels = aam.n_scales
 
     # Define the styling options
     if style == 'coloured':
@@ -829,7 +829,7 @@ def visualize_aam(aam, n_shape_parameters=5, n_appearance_parameters=5,
     max_n_appearance = [ap.n_active_components for ap in aam.appearance_models]
 
     # Check the given number of parameters (the returned n_parameters is a list
-    # of len n_levels)
+    # of len n_scales)
     n_shape_parameters = _check_n_parameters(n_shape_parameters, n_levels,
                                              max_n_shape)
     n_appearance_parameters = _check_n_parameters(n_appearance_parameters,
@@ -897,7 +897,7 @@ def render_function(name, value):
         # Compute weights and instance
         shape_weights = shape_model_parameters_wid.parameters
         appearance_weights = appearance_model_parameters_wid.parameters
-        instance = aam.instance(level=level, shape_weights=shape_weights,
+        instance = aam.instance(scale_index=level, shape_weights=shape_weights,
                                 appearance_weights=appearance_weights)
 
         # Update info
@@ -961,39 +961,17 @@ def update_info(aam, instance, level, group):
         aam_mean = lvl_app_mod.mean()
         n_channels = aam_mean.n_channels
         tmplt_inst = lvl_app_mod.template_instance
-        feat = (aam.features if aam.pyramid_on_features
-                else aam.features[level])
+        feat = aam.holistic_features[level]
 
         # Feature string
         tmp_feat = 'Feature is {} with {} channel{}'.format(
             name_of_callable(feat), n_channels, 's' * (n_channels > 1))
 
-        # create info str
-        if n_levels == 1:
-            tmp_shape_models = ''
-            tmp_pyramid = ''
-        else:  # n_levels > 1
-            # shape models info
-            if aam.scaled_shape_models:
-                tmp_shape_models = "Each level has a scaled shape model " \
-                                   "(reference frame)"
-            else:
-                tmp_shape_models = "Shape models (reference frames) are " \
-                                   "not scaled"
-            # pyramid info
-            if aam.pyramid_on_features:
-                tmp_pyramid = "Pyramid was applied on feature space"
-            else:
-                tmp_pyramid = "Features were extracted at each pyramid level"
-
         # update info widgets
         text_per_line = [
-            "> {} training images".format(aam.n_training_images),
-            "> {}".format(tmp_shape_models),
             "> Warp using {} transform".format(aam.transform.__name__),
-            "> {}".format(tmp_pyramid),
-            "> Level {}/{} (downscale={:.1f})".format(
-                level + 1, aam.n_levels, aam.downscale),
+            "> Level {}/{}".format(
+                level + 1, aam.n_scales),
             "> {} landmark points".format(
                 instance.landmarks[group].lms.n_points),
             "> {} shape components ({:.2f}% of variance)".format(
@@ -1150,7 +1128,7 @@ def update_widgets(name, value):
                 radio_str["Level {} (high)".format(l)] = l
             else:
                 radio_str["Level {}".format(l)] = l
-        level_wid = ipywidgets.RadioButtonsWidget(
+        level_wid = ipywidgets.RadioButtons(
             options=radio_str, description='Pyramid:', value=0)
         level_wid.on_trait_change(update_widgets, 'value')
         level_wid.on_trait_change(render_function, 'value')
@@ -1216,7 +1194,7 @@ def visualize_atm(atm, n_shape_parameters=5, mode='multiple',
     print('Initializing...')
 
     # Get the number of levels
-    n_levels = atm.n_levels
+    n_levels = atm.n_scales
 
     # Define the styling options
     if style == 'coloured':
@@ -1252,7 +1230,7 @@ def visualize_atm(atm, n_shape_parameters=5, mode='multiple',
     max_n_shape = [sp.n_active_components for sp in atm.shape_models]
 
     # Check the given number of parameters (the returned n_parameters is a list
-    # of len n_levels)
+    # of len n_scales)
     n_shape_parameters = _check_n_parameters(n_shape_parameters, n_levels,
                                              max_n_shape)
 
@@ -1317,7 +1295,7 @@ def render_function(name, value):
 
         # Compute weights and instance
         shape_weights = shape_model_parameters_wid.parameters
-        instance = atm.instance(level=level, shape_weights=shape_weights)
+        instance = atm.instance(scale_index=level, shape_weights=shape_weights)
 
         # Update info
         update_info(atm, instance, level,
@@ -1377,39 +1355,17 @@ def update_info(atm, instance, level, group):
         lvl_shape_mod = atm.shape_models[level]
         tmplt_inst = atm.warped_templates[level]
         n_channels = tmplt_inst.n_channels
-        feat = (atm.features if atm.pyramid_on_features
-                else atm.features[level])
+        feat = atm.holistic_features[level]
 
         # Feature string
         tmp_feat = 'Feature is {} with {} channel{}'.format(
             name_of_callable(feat), n_channels, 's' * (n_channels > 1))
 
-        # create info str
-        if n_levels == 1:
-            tmp_shape_models = ''
-            tmp_pyramid = ''
-        else:  # n_levels > 1
-            # shape models info
-            if atm.scaled_shape_models:
-                tmp_shape_models = "Each level has a scaled shape model " \
-                                   "(reference frame)"
-            else:
-                tmp_shape_models = "Shape models (reference frames) are " \
-                                   "not scaled"
-            # pyramid info
-            if atm.pyramid_on_features:
-                tmp_pyramid = "Pyramid was applied on feature space"
-            else:
-                tmp_pyramid = "Features were extracted at each pyramid level"
-
         # update info widgets
         text_per_line = [
-            "> {} training shapes".format(atm.n_training_shapes),
-            "> {}".format(tmp_shape_models),
             "> Warp using {} transform".format(atm.transform.__name__),
-            "> {}".format(tmp_pyramid),
-            "> Level {}/{} (downscale={:.1f})".format(
-                level + 1, atm.n_levels, atm.downscale),
+            "> Level {}/{}".format(
+                level + 1, atm.n_scales),
             "> {} landmark points".format(
                 instance.landmarks[group].lms.n_points),
             "> {} shape components ({:.2f}% of variance)".format(
@@ -1520,7 +1476,7 @@ def update_widgets(name, value):
                 radio_str["Level {} (high)".format(l)] = l
             else:
                 radio_str["Level {}".format(l)] = l
-        level_wid = ipywidgets.RadioButtonsWidget(
+        level_wid = ipywidgets.RadioButtons(
             options=radio_str, description='Pyramid:', value=0)
         level_wid.on_trait_change(update_widgets, 'value')
         level_wid.on_trait_change(render_function, 'value')
@@ -1590,7 +1546,7 @@ def plot_ced(errors, legend_entries=None, error_range=None,
         as part of a parent widget. If ``False``, the widget object is not
         returned, it is just visualized.
     """
-    from menpofit.fittingresult import plot_cumulative_error_distribution
+    from menpofit.result import plot_cumulative_error_distribution
     print('Initializing...')
 
     # Make sure that errors is a list even with one list member
@@ -2223,7 +2179,7 @@ def plot_errors_function(name):
             renderer_options_wid.selected_values[0]['figure']['x_scale'] * 10,
             renderer_options_wid.selected_values[0]['figure']['y_scale'] * 3)
         renderer = fitting_results[im].plot_errors(
-            error_type=error_type_wid.value,
+            error_type=_error_type_key_to_func(error_type_wid.value),
             figure_id=save_figure_wid.renderer.figure_id,
             figure_size=new_figure_size)
 
@@ -2448,18 +2404,27 @@ def update_info(name, value):
 
         # Create output str
         if fitting_results[im].gt_shape is not None:
+            from menpofit.result import (
+                compute_root_mean_square_error, compute_point_to_point_error,
+                compute_normalise_point_to_point_error)
+            if value is 'me_norm':
+                func = compute_normalise_point_to_point_error
+            elif value is 'me':
+                func = compute_point_to_point_error
+            elif value is 'rmse':
+                func = compute_root_mean_square_error
             text_per_line = [
                 "> Initial error: {:.4f}".format(
-                    fitting_results[im].initial_error(error_type=value)),
+                    fitting_results[im].initial_error(compute_error=func)),
                 "> Final error: {:.4f}".format(
-                    fitting_results[im].final_error(error_type=value)),
+                    fitting_results[im].final_error(compute_error=func)),
                 "> {} iterations".format(fitting_results[im].n_iters)]
         else:
             text_per_line = [
                 "> {} iterations".format(fitting_results[im].n_iters)]
-        if hasattr(fitting_results[im], 'n_levels'):  # Multilevel result
-            text_per_line.append("> {} levels with downscale of {:.1f}".format(
-                fitting_results[im].n_levels, fitting_results[im].downscale))
+        if hasattr(fitting_results[im], 'n_scales'):  # Multilevel result
+            text_per_line.append("> {} scales".format(
+                fitting_results[im].n_scales))
         info_wid.set_widget_state(n_lines=len(text_per_line),
                                   text_per_line=text_per_line)
 
@@ -2508,13 +2473,13 @@ def plot_ced_fun(name):
         # widget closes
         plot_ced_but.visible = False
 
-        # Get error type
         error_type = error_type_wid.value
+        func = _error_type_key_to_func(error_type)
 
         # Create errors list
-        fit_errors = [f.final_error(error_type=error_type)
+        fit_errors = [f.final_error(compute_error=func)
                       for f in fitting_results]
-        initial_errors = [f.initial_error(error_type=error_type)
+        initial_errors = [f.initial_error(compute_error=func)
                           for f in fitting_results]
         errors = [fit_errors, initial_errors]
 
@@ -2627,8 +2592,8 @@ def update_widgets(name, value):
         # animation. Specifically, if the animation is activated and the user
         # selects the iterations tab, then the animation stops.
         def results_tab_fun(name, value):
-            if value == 1 and image_number_wid.play_toggle.value:
-                image_number_wid.stop_toggle.value = True
+            if value == 1 and image_number_wid.play_options_toggle.value:
+                image_number_wid.stop_options_toggle.value = True
         result_wid.on_trait_change(results_tab_fun, 'selected_index')
 
         # Widget titles
@@ -2657,8 +2622,8 @@ def results_tab_fun(name, value):
         # If animation is activated and the user selects the save figure tab,
         # then the animation stops.
         def save_fig_tab_fun(name, value):
-            if value == 3 and image_number_wid.play_toggle.value:
-                image_number_wid.stop_toggle.value = True
+            if value == 3 and image_number_wid.play_options_toggle.value:
+                image_number_wid.stop_options_toggle.value = True
         options_box.on_trait_change(save_fig_tab_fun, 'selected_index')
 
     # Set widget's style
@@ -2672,3 +2637,16 @@ def save_fig_tab_fun(name, value):
 
     # Reset value to trigger initial visualization
     renderer_options_wid.options_widgets[3].render_legend_checkbox.value = True
+
+
+def _error_type_key_to_func(error_type):
+    from menpofit.result import (
+        compute_root_mean_square_error, compute_point_to_point_error,
+        compute_normalise_point_to_point_error)
+    if error_type is 'me_norm':
+        func = compute_normalise_point_to_point_error
+    elif error_type is 'me':
+        func = compute_point_to_point_error
+    elif error_type is 'rmse':
+        func = compute_root_mean_square_error
+    return func
diff --git a/menpofit/visualize/widgets/options.py b/menpofit/visualize/widgets/options.py
index ccfda5e..1e71edb 100644
--- a/menpofit/visualize/widgets/options.py
+++ b/menpofit/visualize/widgets/options.py
@@ -1,6 +1,6 @@
 from collections import OrderedDict
 
-import IPython.html.widgets as ipywidgets
+import ipywidgets
 
 from menpo.visualize.widgets.tools import (_format_box, _format_font,
                                            _map_styles_to_hex_colours)
@@ -250,8 +250,8 @@ def _check_parameters(self, parameters, bounds):
     def style(self, box_style=None, border_visible=False, border_color='black',
               border_style='solid', border_width=1, border_radius=0, padding=0,
               margin=0, font_family='', font_size=None, font_style='',
-              font_weight='', slider_width='', slider_handle_colour='',
-              slider_background_colour='', buttons_style=''):
+              font_weight='', slider_width='', slider_handle_colour=None,
+              slider_background_colour=None, buttons_style=''):
         r"""
         Function that defines the styling of the widget.
 
@@ -376,8 +376,8 @@ def predefined_style(self, style):
                        border_width=1, border_radius=0, padding='0.2cm',
                        margin='0.3cm', font_family='', font_size=None,
                        font_style='', font_weight='', slider_width='',
-                       slider_handle_colour='', slider_background_colour='',
-                       buttons_style='')
+                       slider_handle_colour=None,
+                       slider_background_colour=None, buttons_style='')
         elif (style == 'info' or style == 'success' or style == 'danger' or
               style == 'warning'):
             self.style(box_style=style, border_visible=True,
@@ -387,7 +387,8 @@ def predefined_style(self, style):
                        font_size=None, font_style='', font_weight='',
                        slider_width='',
                        slider_handle_colour=_map_styles_to_hex_colours(style),
-                       slider_background_colour='', buttons_style='primary')
+                       slider_background_colour=None,
+                       buttons_style='primary')
         else:
             raise ValueError('style must be minimal or info or success or '
                              'danger or warning')
@@ -1381,10 +1382,10 @@ def style(self, box_style=None, border_visible=False, border_color='black',
             self.index_animation.play_options_toggle.button_style = ''
             _format_box(self.index_animation.loop_interval_box, '', False,
                         'black', 'solid', 1, 10, '0.1cm', '0.1cm')
-            self.index_animation.index_wid.slider.slider_color = ''
-            self.index_animation.index_wid.slider.background_color = ''
-            self.index_slider.slider_color = ''
-            self.index_slider.background_color = ''
+            self.index_animation.index_wid.slider.slider_color = None
+            self.index_animation.index_wid.slider.background_color = None
+            self.index_slider.slider_color = None
+            self.index_slider.background_color = None
             self.common_figure.button_style = ''
         else:
             self.index_animation.play_stop_toggle.button_style = 'success'