diff --git a/tobac/analysis.py b/tobac/analysis.py
index bfb373de..d8651522 100644
--- a/tobac/analysis.py
+++ b/tobac/analysis.py
@@ -1,3 +1,31 @@
+"""Provide tools to analyse and visualize the tracked objects.          
+This module provides a set of routines that enables performing analyses
+and deriving statistics for individual tracks, such as the time series
+of integrated properties and vertical profiles. It also provides
+routines to calculate summary statistics of the entire population of
+tracked features in the field like histograms of areas/volumes
+or mass and a detailed cell lifetime analysis. These analysis
+routines are all built in a modular manner. Thus, users can reuse the
+most basic methods for interacting with the data structure of the
+package in their own analysis procedures in Python. This includes
+functions performing simple tasks like looping over all identified
+objects or trajectories and masking arrays for the analysis of
+individual features. Plotting routines include both visualizations
+for individual convective cells and their properties. [1]_
+
+References
+----------
+.. Heikenfeld, M., Marinescu, P. J., Christensen, M.,
+   Watson-Parris, D., Senf, F., van den Heever, S. C.
+   & Stier, P. (2019). tobac 1.2: towards a flexible 
+   framework for tracking and analysis of clouds in 
+   diverse datasets. Geoscientific Model Development,
+   12(11), 4551-4570.
+   
+Notes
+-----
+"""
+
 import pandas as pd
 import numpy as np
 import logging
@@ -19,6 +47,45 @@ def cell_statistics_all(
     dimensions=["x", "y"],
     **kwargs
 ):
+    """
+    Parameters
+    ----------
+    input_cubes : iris.cube.Cube
+
+    track : dask.dataframe.DataFrame
+
+    mask : iris.cube.Cube
+        Cube containing mask (int id for tracked volumes 0 everywhere
+        else).
+
+    aggregators : list
+        list of iris.analysis.Aggregator instances
+
+    output_path : str, optional
+        Default is './'.
+
+    cell_selection : optional
+        Default is None.
+
+    output_name : str, optional
+        Default is 'Profiles'.
+
+    width : int, optional
+        Default is 10000.
+
+    z_coord : str, optional
+        Name of the vertical coordinate in the cube. Default is
+        'model_level_number'.
+
+    dimensions : list of str, optional
+        Default is ['x', 'y'].
+
+    **kwargs
+
+    Returns
+    -------
+    None
+    """
     if cell_selection is None:
         cell_selection = np.unique(track["cell"])
     for cell in cell_selection:
@@ -50,6 +117,46 @@ def cell_statistics(
     dimensions=["x", "y"],
     **kwargs
 ):
+    """
+    Parameters
+    ----------
+    input_cubes : iris.cube.Cube
+
+    track : dask.dataframe.DataFrame
+
+    mask : iris.cube.Cube
+        Cube containing mask (int id for tracked volumes 0 everywhere
+        else).
+
+    aggregators list
+        list of iris.analysis.Aggregator instances
+
+    cell : int
+        Integer id of cell to create masked cube for output.
+
+    output_path : str, optional
+        Default is './'.
+
+    output_name : str, optional
+        Default is 'Profiles'.
+
+    width : int, optional
+        Default is 10000.
+
+    z_coord : str, optional
+        Name of the vertical coordinate in the cube. Default is
+        'model_level_number'.
+
+    dimensions : list of str, optional
+        Default is ['x', 'y'].
+
+    **kwargs
+
+    Returns
+    -------
+    None
+    """
+
     from iris.cube import Cube, CubeList
     from iris.coords import AuxCoord
     from iris import Constraint, save
@@ -180,6 +287,31 @@ def cog_cell(
     Mask=None,
     savedir=None,
 ):
+    """
+    Parameters
+    ----------
+    cell : int
+        Integer id of cell to create masked cube for output.
+
+    Tracks : optional
+        Default is None.
+
+    M_total : subset of cube, optional
+        Default is None.
+
+    M_liquid : subset of cube, optional
+        Default is None.
+
+    M_frozen : subset of cube, optional
+        Default is None.
+
+    savedir : str
+        Default is None.
+
+    Returns
+    -------
+    None
+    """
 
     from iris import Constraint
 
@@ -227,6 +359,48 @@ def cog_cell(
 def lifetime_histogram(
     Track, bin_edges=np.arange(0, 200, 20), density=False, return_values=False
 ):
+    """Compute the lifetime histogram of linked features.
+
+    Parameters
+    ----------
+    Track : pandas.DataFrame
+        Dataframe of linked features, containing the columns 'cell'
+        and 'time_cell'.
+
+    bin_edges : int or ndarray, optional
+        If bin_edges is an int, it defines the number of equal-width
+        bins in the given range. If bins is a ndarray, it defines a
+        monotonically increasing array of bin edges, including the
+        rightmost edge. The unit is minutes.
+        Default is np.arange(0, 200, 20).
+
+    density : bool, optional
+        If False, the result will contain the number of samples in
+        each bin. If True, the result is the value of the probability
+        density function at the bin, normalized such that the integral
+        over the range is 1. Default is False.
+
+    return_values : bool, optional
+        Bool determining wether the lifetimes of the features are
+        returned from this function. Default is False.
+
+    Returns
+    -------
+    hist : ndarray
+        The values of the histogram.
+
+    bin_edges : ndarray
+        The edges of the histogram.
+
+    bin_centers : ndarray
+        The centers of the histogram intervalls.
+
+    minutes, optional : ndarray
+        Numpy.array of the lifetime of each feature in minutes.
+        Returned if return_values is True.
+
+    """
+
     Track_cell = Track.groupby("cell")
     minutes = (Track_cell["time_cell"].max() / pd.Timedelta(minutes=1)).values
     hist, bin_edges = np.histogram(minutes, bin_edges, density=density)
@@ -238,13 +412,27 @@ def lifetime_histogram(
 
 
 def haversine(lat1, lon1, lat2, lon2):
-    """Computes the Haversine distance in kilometres between two points (based on implementation CIS https://github.com/cedadev/cis)
-    :param lat1: first point or points as array, each as array of latitude in degrees
-    :param lon1: first point or points as array, each as array of longitude in degrees
-    :param lat2: second point or points as array, each as array of latitude in degrees
-    :param lon2: second point or points as array, each as array of longitude in degrees
-    :return: distance between the two points in kilometres
+    """Computes the Haversine distance in kilometers.
+
+    Calculates the Haversine distance between two points
+    (based on implementation CIS https://github.com/cedadev/cis).
+
+    Parameters
+    ----------
+    lat1, lon1 : array of latitude, longitude
+        First point or points as array in degrees.
+
+    lat2, lon2 : array of latitude, longitude
+        Second point or points as array in degrees.
+
+    Returns
+    -------
+    arclen * RADIUS_EARTH : array
+        Array of Distance(s) between the two points(-arrays) in
+        kilometers.
+
     """
+
     RADIUS_EARTH = 6378.0
     lat1 = np.radians(lat1)
     lat2 = np.radians(lat2)
@@ -261,10 +449,31 @@ def haversine(lat1, lon1, lat2, lon2):
 
 
 def calculate_distance(feature_1, feature_2, method_distance=None):
-    """Computes distance between two features based on either lat/lon coordinates or x/y coordinates
-    :param feature_1: first feature or points as array, each as array of latitude, longitude in degrees
-    :param feature_2: second feature or points as array, each as array of latitude, longitude in degrees
-    :return: distance between the two features in metres
+    """Compute the distance between two features. It is based on
+    either lat/lon coordinates or x/y coordinates.
+
+    Parameters
+    ----------
+    feature_1, feature_2 : pandas.DataFrame or pandas.Series
+        Dataframes containing multiple features or pandas.Series
+        of one feature. Need to contain either projection_x_coordinate
+        and projection_y_coordinate or latitude and longitude
+        coordinates.
+
+    method_distance : {None, 'xy', 'latlon'}, optional
+        Method of distance calculation. 'xy' uses the length of the
+        vector between the two features, 'latlon' uses the haversine
+        distance. None checks wether the required coordinates are
+        present and starts with 'xy'. Default is None.
+
+    Returns
+    -------
+    distance : float or pandas.Series
+        Float with the distance between the two features in meters if
+        the input are two pandas.Series containing one feature,
+        pandas.Series of the distances if one of the inputs contains
+        multiple features.
+
     """
     if method_distance is None:
         if (
@@ -312,6 +521,34 @@ def calculate_distance(feature_1, feature_2, method_distance=None):
 
 
 def calculate_velocity_individual(feature_old, feature_new, method_distance=None):
+    """Calculate the mean velocity of a feature between two timeframes.
+
+    Parameters
+    ----------
+    feature_old : pandas.Series
+        pandas.Series of a feature at a certain timeframe. Needs to
+        contain a 'time' column and either projection_x_coordinate
+        and projection_y_coordinate or latitude and longitude coordinates.
+
+    feature_new : pandas.Series
+        pandas.Series of the same feature at a later timeframe. Needs
+        to contain a 'time' column and either projection_x_coordinate
+        and projection_y_coordinate or latitude and longitude coordinates.
+
+    method_distance : {None, 'xy', 'latlon'}, optional
+        Method of distance calculation, used to calculate the velocity.
+        'xy' uses the length of the vector between the two features,
+        'latlon' uses the haversine distance. None checks wether the
+        required coordinates are present and starts with 'xy'.
+        Default is None.
+
+    Returns
+    -------
+    velocity : float
+        Value of the approximate velocity.
+
+    """
+
     distance = calculate_distance(
         feature_old, feature_new, method_distance=method_distance
     )
@@ -321,6 +558,30 @@ def calculate_velocity_individual(feature_old, feature_new, method_distance=None
 
 
 def calculate_velocity(track, method_distance=None):
+    """Calculate the velocities of a set of linked features.
+
+    Parameters
+    ----------
+    track : pandas.DataFrame
+        Dataframe of linked features, containing the columns 'cell',
+         'time' and either 'projection_x_coordinate' and
+         'projection_y_coordinate' or 'latitude' and 'longitude'.
+
+    method_distance : {None, 'xy', 'latlon'}, optional
+        Method of distance calculation, used to calculate the
+        velocity. 'xy' uses the length of the vector between the
+        two features, 'latlon' uses the haversine distance. None
+        checks wether the required coordinates are present and
+        starts with 'xy'. Default is None.
+
+    Returns
+    -------
+    track  : pandas.DataFrame
+        DataFrame from the input, with an additional column 'v',
+        contain the value of the velocity for every feature at
+        every possible timestep
+    """
+
     for cell_i, track_i in track.groupby("cell"):
         index = track_i.index.values
         for i, index_i in enumerate(index[:-1]):
@@ -340,6 +601,52 @@ def velocity_histogram(
     method_distance=None,
     return_values=False,
 ):
+    """Create an velocity histogram of the features. If the DataFrame
+    does not contain a velocity column, the velocities are calculated.
+
+    Parameters
+    ----------
+    track: pandas.DataFrame
+        DataFrame of the linked features, containing the columns 'cell',
+         'time' and either 'projection_x_coordinate' and
+         'projection_y_coordinate' or 'latitude' and 'longitude'.
+
+    bin_edges : int or ndarray, optional
+        If bin_edges is an int, it defines the number of equal-width
+        bins in the given range. If bins is a ndarray, it defines a
+        monotonically increasing array of bin edges, including the
+        rightmost edge. Default is np.arange(0, 30000, 500).
+
+    density : bool, optional
+        If False, the result will contain the number of samples in
+        each bin. If True, the result is the value of the probability
+        density function at the bin, normalized such that the integral
+        over the range is 1. Default is False.
+
+    methods_distance : {None, 'xy', 'latlon'}, optional
+        Method of distance calculation, used to calculate the velocity.
+        'xy' uses the length of the vector between the two features,
+        'latlon' uses the haversine distance. None checks wether the
+        required coordinates are present and starts with 'xy'.
+        Default is None.
+
+    return_values : bool, optional
+        Bool determining wether the velocities of the features are
+        returned from this function. Default is False.
+
+    Returns
+    -------
+    hist : ndarray
+        The values of the histogram.
+
+    bin_edges : ndarray
+        The edges of the histogram.
+
+    velocities , optional : ndarray
+        Numpy array with the velocities of each feature.
+
+    """
+
     if "v" not in track.columns:
         logging.info("calculate velocities")
         track = calculate_velocity(track)
@@ -354,6 +661,30 @@ def velocity_histogram(
 
 
 def calculate_nearestneighbordistance(features, method_distance=None):
+    """Calculate the distance between a feature and the nearest other
+    feature in the same timeframe.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+        DataFrame of the features whose nearest neighbor distance is to
+        be calculated. Needs to contain either projection_x_coordinate
+        and projection_y_coordinate or latitude and longitude coordinates.
+
+    method_distance : {None, 'xy', 'latlon'}, optional
+        Method of distance calculation. 'xy' uses the length of the vector
+        between the two features, 'latlon' uses the haversine distance.
+        None checks wether the required coordinates are present and starts
+        with 'xy'. Default is None.
+
+    Returns
+    -------
+    features : pandas.DataFrame
+        DataFrame of the features with a new column 'min_distance',
+        containing the calculated minimal distance to other features.
+
+    """
+
     from itertools import combinations
 
     features["min_distance"] = np.nan
@@ -393,6 +724,49 @@ def nearestneighbordistance_histogram(
     method_distance=None,
     return_values=False,
 ):
+    """Create an nearest neighbor distance histogram of the features.
+    If the DataFrame does not contain a 'min_distance' column, the
+    distances are calculated.
+
+    ----------
+    features
+
+    bin_edges : int or ndarray, optional
+        If bin_edges is an int, it defines the number of equal-width
+        bins in the given range. If bins is a ndarray, it defines a
+        monotonically increasing array of bin edges, including the
+        rightmost edge. Default is np.arange(0, 30000, 500).
+
+    density : bool, optional
+        If False, the result will contain the number of samples in
+        each bin. If True, the result is the value of the probability
+        density function at the bin, normalized such that the integral
+        over the range is 1. Default is False.
+
+    method_distance : {None, 'xy', 'latlon'}, optional
+        Method of distance calculation. 'xy' uses the length of the
+        vector between the two features, 'latlon' uses the haversine
+        distance. None checks wether the required coordinates are
+        present and starts with 'xy'. Default is None.
+
+    return_values : bool, optional
+        Bool determining wether the nearest neighbor distance of the
+        features are returned from this function. Default is False.
+
+    Returns
+    -------
+    hist : ndarray
+        The values of the histogram.
+
+    bin_edges : ndarray
+        The edges of the histogram.
+
+    distances, optional : ndarray
+        A numpy array with the nearest neighbor distances of each
+        feature.
+
+    """
+
     if "min_distance" not in features.columns:
         logging.debug("calculate nearest neighbor distances")
         features = calculate_nearestneighbordistance(
@@ -410,34 +784,34 @@ def nearestneighbordistance_histogram(
 
 # Treatment of 2D lat/lon coordinates to be added:
 def calculate_areas_2Dlatlon(_2Dlat_coord, _2Dlon_coord):
-    """
-    Calculate an array of cell areas when given two 2D arrays of latitude and
-    longitude values
+    """Calculate an array of cell areas when given two 2D arrays
+    of latitude and longitude values
 
-    NOTE: This currently assuems that the lat/lon grid is orthogonal, which is
-    not strictly true! It's close enough for most cases, but should be updated
-    in future to use the cross product of the distances to the neighbouring
-    cells. This will require the use of a more advanced calculation. I would
-    advise using pyproj at some point in the future to solve this issue and
-    replace haversine distance.
+    NOTE: This currently assuems that the lat/lon grid is orthogonal,
+    which is not strictly true! It's close enough for most cases, but
+    should be updated in future to use the cross product of the
+    distances to the neighbouring cells. This will require the use
+    of a more advanced calculation. I would advise using pyproj
+    at some point in the future to solve this issue and replace
+    haversine distance.
 
     Parameters
     ----------
     _2Dlat_coord : AuxCoord
-        Iris auxilliary coordinate containing a 2d grid of latitudes for each
-        point
+        Iris auxilliary coordinate containing a 2d grid of latitudes
+        for each point.
 
     _2Dlon_coord : AuxCoord
-        Iris auxilliary coordinate containing a 2d grid of longitudes for each
-        point
-
+        Iris auxilliary coordinate containing a 2d grid of longitudes
+        for each point.
 
     Returns
     -------
     area : ndarray
-        A numpy array approximating the area of each cell
+        A numpy array approximating the area of each cell.
 
     """
+
     hdist1 = (
         haversine(
             _2Dlat_coord.points[:-1],
@@ -474,6 +848,48 @@ def calculate_areas_2Dlatlon(_2Dlat_coord, _2Dlon_coord):
 
 
 def calculate_area(features, mask, method_area=None):
+    """Calculate the area of the segments for each feature.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+        DataFrame of the features whose area is to be calculated.
+
+    mask : iris.cube.Cube
+        Cube containing mask (int for tracked volumes 0 everywhere
+        else). Needs to contain either projection_x_coordinate and
+        projection_y_coordinate or latitude and longitude
+        coordinates.
+
+    method_area : {None, 'xy', 'latlon'}, optional
+        Flag determining how the area is calculated. 'xy' uses the
+        areas of the individual pixels, 'latlon' uses the
+        area_weights method of iris.analysis.cartography, None
+        checks wether the required coordinates are present and
+        starts with 'xy'. Default is None.
+
+    Returns
+    -------
+    features : pandas.DataFrame
+        DataFrame of the features with a new column 'area',
+        containing the calculated areas.
+
+    Raises
+    ------
+    ValueError
+        If neither latitude/longitude nor
+        projection_x_coordinate/projection_y_coordinate are
+        present in mask_coords.
+
+        If latitude/longitude coordinates are 2D.
+
+        If latitude/longitude shapes are not supported.
+
+        If method is undefined, i.e. method is neither None,
+        'xy' nor 'latlon'.
+
+    """
+
     from tobac.utils import mask_features_surface, mask_features
     from iris import Constraint
     from iris.analysis.cartography import area_weights
@@ -541,6 +957,61 @@ def area_histogram(
     return_values=False,
     representative_area=False,
 ):
+    """Create an area histogram of the features. If the DataFrame
+    does not contain an area column, the areas are calculated.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+        DataFrame of the features.
+
+    mask : iris.cube.Cube
+        Cube containing mask (int for tracked volumes 0
+        everywhere else). Needs to contain either
+        projection_x_coordinate and projection_y_coordinate or
+        latitude and longitude coordinates. The output of a
+        segmentation should be used here.
+
+    bin_edges : int or ndarray, optional
+        If bin_edges is an int, it defines the number of
+        equal-width bins in the given range. If bins is a ndarray,
+        it defines a monotonically increasing array of bin edges,
+        including the rightmost edge.
+        Default is np.arange(0, 30000, 500).
+
+    density : bool, optional
+        If False, the result will contain the number of samples
+        in each bin. If True, the result is the value of the
+        probability density function at the bin, normalized such
+        that the integral over the range is 1. Default is False.
+
+    return_values : bool, optional
+        Bool determining wether the areas of the features are
+        returned from this function. Default is False.
+
+    representive_area: bool, optional
+        If False, no weights will associated to the values.
+        If True, the weights for each area will be the areas
+        itself, i.e. each bin count will have the value of
+        the sum of all areas within the edges of the bin.
+        Default is False.
+
+    Returns
+    -------
+    hist : ndarray
+        The values of the histogram.
+
+    bin_edges : ndarray
+        The edges of the histogram.
+
+    bin_centers : ndarray
+        The centers of the histogram intervalls.
+
+    areas : ndarray, optional
+        A numpy array approximating the area of each feature.
+
+    """
+
     if "area" not in features.columns:
         logging.info("calculate area")
         features = calculate_area(features, mask, method_area)
@@ -563,6 +1034,57 @@ def area_histogram(
 def histogram_cellwise(
     Track, variable=None, bin_edges=None, quantity="max", density=False
 ):
+    """Create a histogram of the maximum, minimum or mean of
+    a variable for the cells (series of features linked together
+    over multiple timesteps) of a track. Essentially a wrapper
+    of the numpy.histogram() method.
+
+    Parameters
+    ----------
+    Track : pandas.DataFrame
+        The track containing the variable to create the histogram
+        from.
+
+    variable : string, optional
+        Column of the DataFrame with the variable on which the
+        histogram is to be based on. Default is None.
+
+    bin_edges : int or ndarray, optional
+        If bin_edges is an int, it defines the number of
+        equal-width bins in the given range. If bins is a ndarray,
+        it defines a monotonically increasing array of bin edges,
+        including the rightmost edge.
+
+    quantity : {'max', 'min', 'mean'}, optional
+        Flag determining wether to use maximum, minimum or mean
+        of a variable from all timeframes the cell covers.
+        Default is 'max'.
+
+    density : bool, optional
+        If False, the result will contain the number of samples
+        in each bin. If True, the result is the value of the
+        probability density function at the bin, normalized such
+        that the integral over the range is 1.
+        Default is False.
+
+    Returns
+    -------
+    hist : ndarray
+        The values of the histogram
+
+    bin_edges : ndarray
+        The edges of the histogram
+
+    bin_centers : ndarray
+        The centers of the histogram intervalls
+
+    Raises
+    ------
+    ValueError
+        If quantity is not 'max', 'min' or 'mean'.
+
+    """
+
     Track_cell = Track.groupby("cell")
     if quantity == "max":
         variable_cell = Track_cell[variable].max().values
@@ -579,6 +1101,46 @@ def histogram_cellwise(
 
 
 def histogram_featurewise(Track, variable=None, bin_edges=None, density=False):
+    """Create a histogram of a variable from the features
+    (detected objects at a single time step) of a track.
+    Essentially a wrapper of the numpy.histogram() method.
+
+    Parameters
+    ----------
+    Track : pandas.DataFrame
+        The track containing the variable to create the
+        histogram from.
+
+    variable : string, optional
+        Column of the DataFrame with the variable on which the
+        histogram is to be based on. Default is None.
+
+    bin_edges : int or ndarray, optional
+        If bin_edges is an int, it defines the number of
+        equal-width bins in the given range. If bins is
+        a sequence, it defines a monotonically increasing
+        array of bin edges, including the rightmost edge.
+
+    density : bool, optional
+        If False, the result will contain the number of
+        samples in each bin. If True, the result is the
+        value of the probability density function at the
+        bin, normalized such that the integral over the
+        range is 1. Default is False.
+
+    Returns
+    -------
+    hist : ndarray
+        The values of the histogram
+
+    bin_edges : ndarray
+        The edges of the histogram
+
+    bin_centers : ndarray
+        The centers of the histogram intervalls
+
+    """
+
     hist, bin_edges = np.histogram(Track[variable].values, bin_edges, density=density)
     bin_centers = bin_edges[:-1] + 0.5 * np.diff(bin_edges)
 
@@ -588,6 +1150,36 @@ def histogram_featurewise(Track, variable=None, bin_edges=None, density=False):
 def calculate_overlap(
     track_1, track_2, min_sum_inv_distance=None, min_mean_inv_distance=None
 ):
+    """Count the number of time frames in which the
+    individual cells of two tracks are present together
+    and calculate their mean and summed inverse distance.
+
+    Parameters
+    ----------
+    track_1, track_2 : pandas.DataFrame
+        The tracks conaining the cells to analyze.
+
+    min_sum_inv_distance : float, optional
+        Minimum of the inverse net distance for two
+        cells to be counted as overlapping.
+        Default is None.
+
+    min_mean_inv_distance : float, optional
+        Minimum of the inverse mean distance for two cells
+        to be counted as overlapping. Default is None.
+
+    Returns
+    -------
+    overlap : pandas.DataFrame
+        DataFrame containing the columns cell_1 and cell_2
+        with the index of the cells from the tracks,
+        n_overlap with the number of frames both cells are
+        present in, mean_inv_distance with the mean inverse
+        distance and sum_inv_distance with the summed
+        inverse distance of the cells.
+
+    """
+
     cells_1 = track_1["cell"].unique()
     #    n_cells_1_tot=len(cells_1)
     cells_2 = track_2["cell"].unique()
diff --git a/tobac/centerofgravity.py b/tobac/centerofgravity.py
index d7d5675d..472412a6 100644
--- a/tobac/centerofgravity.py
+++ b/tobac/centerofgravity.py
@@ -1,19 +1,33 @@
+"""Identify center of gravity and mass for analysis.
+"""
+
 import logging
 
 
 def calculate_cog(tracks, mass, mask):
-    """caluclate centre of gravity and mass forech individual tracked cell in the simulation
-    Input:
-    tracks:     pandas.DataFrame
-                DataFrame containing trajectories of cell centres
-    mass:       iris.cube.Cube
-                cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate')
-    mask:       iris.cube.Cube
-                cube containing mask (int > where belonging to cloud volume, 0 everywhere else )
-    Output:
-    tracks_out  pandas.DataFrame
-                Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass each tracked cells at each timestep
+    """Calculate center of gravity and mass for each tracked cell.
+
+    Parameters
+    ----------
+    tracks : pandas.DataFrame
+         DataFrame containing trajectories of cell centers.
+
+    mass : iris.cube.Cube
+        Cube of quantity (need coordinates 'time',
+        'geopotential_height','projection_x_coordinate' and
+        'projection_y_coordinate').
+
+    mask : iris.cube.Cube
+        Cube containing mask (int > where belonging to area/volume
+        of feature, 0 else).
+
+    Returns
+    -------
+    tracks_out : pandas.DataFrame
+        Dataframe containing t, x, y, z positions of center of gravity
+        and total mass of each tracked cell at each timestep.
     """
+
     from .utils import mask_cube_cell
     from iris import Constraint
 
@@ -39,17 +53,26 @@ def calculate_cog(tracks, mass, mask):
 
 
 def calculate_cog_untracked(mass, mask):
-    """caluclate centre of gravity and mass for untracked parts of domain
-    Input:
-    mass:       iris.cube.Cube
-                cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate')
-
-    mask:       iris.cube.Cube
-                cube containing mask (int > where belonging to cloud volume, 0 everywhere else )
-    Output:
-    tracks_out  pandas.DataFrame
-                Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass for untracked part of dimain
+    """Calculate center of gravity and mass for untracked domain parts.
+
+    Parameters
+    ----------
+    mass : iris.cube.Cube
+        Cube of quantity (need coordinates 'time',
+        'geopotential_height','projection_x_coordinate' and
+        'projection_y_coordinate').
+
+    mask : iris.cube.Cube
+        Cube containing mask (int > where belonging to area/volume
+        of feature, 0 else).
+
+    Returns
+    -------
+    tracks_out : pandas.DataFrame
+        Dataframe containing t, x, y, z positions of center of gravity
+        and total mass for untracked part of the domain.
     """
+
     from pandas import DataFrame
     from .utils import mask_cube_untracked
     from iris import Constraint
@@ -81,14 +104,22 @@ def calculate_cog_untracked(mass, mask):
 
 
 def calculate_cog_domain(mass):
-    """caluclate centre of gravity and mass for entire domain
-    Input:
-    mass:       iris.cube.Cube
-                cube of quantity (need coordinates 'time', 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate')
-    Output:
-    tracks_out  pandas.DataFrame
-                Dataframe containing t,x,y,z positions of centre of gravity and total cloud mass
+    """Calculate center of gravity and mass for entire domain.
+
+    Parameters
+    ----------
+    mass : iris.cube.Cube
+        Cube of quantity (need coordinates 'time',
+        'geopotential_height','projection_x_coordinate' and
+        'projection_y_coordinate').
+
+    Returns
+    -------
+    tracks_out : pandas.DataFrame
+        Dataframe containing t, x, y, z positions of center of gravity
+        and total mass of the entire domain.
     """
+
     from pandas import DataFrame
     from iris import Constraint
 
@@ -115,21 +146,30 @@ def calculate_cog_domain(mass):
 
 
 def center_of_gravity(cube_in):
-    """caluclate centre of gravity and sum of quantity
-    Input:
-    cube_in:       iris.cube.Cube
-                   cube (potentially masked) of quantity (need coordinates 'geopotential_height','projection_x_coordinate' and 'projection_y_coordinate')
-    Output:
-    x:             float
-                   x position of centre of gravity
-    y:             float
-                   y position of centre of gravity
-    z:             float
-                   z position of centre of gravity
-    variable_sum:  float
-                   sum of quantity of over unmasked part of the cube
+    """Calculate center of gravity and sum of quantity.
+
+    Parameters
+    ----------
+    cube_in : iris.cube.Cube
+        Cube (potentially masked) of quantity (need coordinates
+        'geopotential_height','projection_x_coordinate' and
+        'projection_y_coordinate').
 
+    Returns
+    -------
+    x : float
+        X position of center of gravity.
+
+    y : float
+        Y position of center of gravity.
+
+    z : float
+        Z position of center of gravity.
+
+    variable_sum : float
+        Sum of quantity of over unmasked part of the cube.
     """
+
     from iris.analysis import SUM
     import numpy as np
 
diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py
index 69d2bd90..c11d527b 100644
--- a/tobac/feature_detection.py
+++ b/tobac/feature_detection.py
@@ -1,3 +1,22 @@
+"""Provide feature detection.
+
+This module can work with any two-dimensional field.
+To identify the features, contiguous regions above or 
+below a threshold are determined and labelled individually.
+To describe the specific location of the feature at a 
+specific point in time, different spatial properties 
+are used to describe the identified region. [2]_
+
+References
+----------
+.. Heikenfeld, M., Marinescu, P. J., Christensen, M.,
+   Watson-Parris, D., Senf, F., van den Heever, S. C.
+   & Stier, P. (2019). tobac 1.2: towards a flexible 
+   framework for tracking and analysis of clouds in 
+   diverse datasets. Geoscientific Model Development,
+   12(11), 4551-4570.
+"""
+
 import logging
 import numpy as np
 import pandas as pd
@@ -15,27 +34,33 @@ def feature_position(
     position_threshold="center",
     target=None,
 ):
-    """Function to  determine feature position
+    """Determine feature position with regard to the horizontal
+    dimensions in pixels from the identified region above
+    threshold values
 
     Parameters
     ----------
     hdim1_indices : list
-        list of indices along hdim1 (typically ```y```)
+        indices of pixels in region along first horizontal
+        dimension
 
     hdim2_indices : list
-        List of indices of feature along hdim2 (typically ```x```)
+        indices of pixels in region along second horizontal
+        dimension
 
     region_small : 2D array-like
         A true/false array containing True where the threshold
-        is met and false where the threshold isn't met. This array should
-        be the the size specified by region_bbox, and can be a subset of the
-        overall input array (i.e., ```track_data```).
+        is met and false where the threshold isn't met. This
+        array should be the the size specified by region_bbox,
+        and can be a subset of the overall input array
+        (i.e., ```track_data```).
 
     region_bbox : list or tuple with length of 4
-        The coordinates that region_small occupies within the total track_data
-        array. This is in the order that the coordinates come from the
-        ```get_label_props_in_dict``` function. For 2D data, this should be:
-        (hdim1 start, hdim 2 start, hdim 1 end, hdim 2 end).
+        The coordinates that region_small occupies within the
+        total track_data array. This is in the order that the
+        coordinates come from the ```get_label_props_in_dict```
+        function. For 2D data, this should be: (hdim1 start,
+        hdim 2 start, hdim 1 end, hdim 2 end).
 
     track_data : 2D array-like
         2D array containing the data
@@ -43,16 +68,20 @@ def feature_position(
     threshold_i : float
         The threshold value that we are testing against
 
-    position_threshold : {'center', 'extreme', 'weighted_diff', 'weighted abs'}
+    position_threshold : {'center', 'extreme', 'weighted_diff', '
+                          weighted abs'}
         How to select the single point position from our data.
-        'center' picks the geometrical centre of the region, and is typically not recommended.
-        'extreme' picks the maximum or minimum value inside the region (max/min set by ```target```)
-        'weighted_diff' picks the centre of the region weighted by the distance from the threshold value
-        'weighted_abs' picks the centre of the region weighted by the absolute values of the field
+        'center' picks the geometrical centre of the region,
+        and is typically not recommended. 'extreme' picks the
+        maximum or minimum value inside the region (max/min set by
+         ```target```) 'weighted_diff' picks the centre of the
+         region weighted by the distance from the threshold value
+        'weighted_abs' picks the centre of the region weighted by
+        the absolute values of the field
 
     target : {'maximum', 'minimum'}
-        Used only when position_threshold is set to 'extreme', this sets whether
-        it is looking for maxima or minima.
+        Used only when position_threshold is set to 'extreme',
+        this sets whether it is looking for maxima or minima.
 
     Returns
     -------
@@ -104,36 +133,57 @@ def feature_position(
 
 
 def test_overlap(region_inner, region_outer):
+    """Test for overlap between two regions
+
+    Parameters
+    ----------
+    region_1 : list
+        list of 2-element tuples defining the indices of
+        all cell in the region
+
+    region_2 : list
+        list of 2-element tuples defining the indices of
+        all cell in the region
+
+    Returns
+    ----------
+    overlap : bool
+        True if there are any shared points between the two
+        regions
     """
-    function to test for overlap between two regions (probably scope for further speedup here)
-    Input:
-    region_1:      list
-                   list of 2-element tuples defining the indices of all cell in the region
-    region_2:      list
-                   list of 2-element tuples defining the indices of all cell in the region
-
-    Output:
-    overlap:       bool
-                   True if there are any shared points between the two regions
-    """
+
     overlap = frozenset(region_outer).isdisjoint(region_inner)
     return not overlap
 
 
 def remove_parents(features_thresholds, regions_i, regions_old):
+    """Remove parents of newly detected feature regions.
+
+    Remove features where its regions surround newly
+    detected feature regions.
+
+    Parameters
+    ----------
+    features_thresholds : pandas.DataFrame
+        Dataframe containing detected features.
+
+    regions_i : dict
+        Dictionary containing the regions above/below
+        threshold for the newly detected feature
+        (feature ids as keys).
+
+    regions_old : dict
+        Dictionary containing the regions above/below
+        threshold from previous threshold
+        (feature ids as keys).
+
+    Returns
+    -------
+    features_thresholds : pandas.DataFrame
+        Dataframe containing detected features excluding those
+        that are superseded by newly detected ones.
     """
-    function to remove features whose regions surround newly detected feature regions
-    Input:
-        features_thresholds:    pandas.DataFrame
-                                Dataframe containing detected features
-    regions_i:                  dict
-                                dictionary containing the regions above/below threshold for the newly detected feature (feature ids as keys)
-    regions_old:                dict
-                                dictionary containing the regions above/below threshold from previous threshold (feature ids as keys)
-    Output:
-        features_thresholds     pandas.DataFrame
-                                Dataframe containing detected features excluding those that are superseded by newly detected ones
-    """
+
     try:
         all_curr_pts = np.concatenate([vals for idx, vals in regions_i.items()])
         all_old_pts = np.concatenate([vals for idx, vals in regions_old.items()])
@@ -173,35 +223,55 @@ def feature_detection_threshold(
     min_distance=0,
     idx_start=0,
 ):
+    """Find features based on individual threshold value.
+
+    Parameters
+    ----------
+    data_i : iris.cube.Cube
+        2D field to perform the feature detection (single timestep) on.
+
+    i_time : int
+        Number of the current timestep.
+
+    threshold : float, optional
+        Threshold value used to select target regions to track. Default
+                is None.
+
+    target : {'maximum', 'minimum'}, optional
+        Flag to determine if tracking is targetting minima or maxima
+        in the data. Default is 'maximum'.
+
+    position_threshold : {'center', 'extreme', 'weighted_diff',
+                          'weighted_abs'}, optional
+        Flag choosing method used for the position of the tracked
+        feature. Default is 'center'.
+
+    sigma_threshold: float, optional
+        Standard deviation for intial filtering step. Default is 0.5.
+
+    n_erosion_threshold: int, optional
+        Number of pixel by which to erode the identified features.
+        Default is 0.
+
+    n_min_threshold : int, optional
+        Minimum number of identified features. Default is 0.
+
+    min_distance : float, optional
+        Minimum distance between detected features. Default is 0.
+
+    idx_start : int, optional
+        Feature id to start with. Default is 0.
+
+    Returns
+    -------
+    features_threshold : pandas DataFrame
+        Detected features for individual threshold.
+
+    regions : dict
+        Dictionary containing the regions above/below threshold used
+        for each feature (feature ids as keys).
     """
-    function to find features based on individual threshold value:
-    Input:
-    data_i:      iris.cube.Cube
-                 2D field to perform the feature detection (single timestep)
-    i_time:      int
-                 number of the current timestep
-    threshold:    float
-                  threshold value used to select target regions to track
-    target:       str ('minimum' or 'maximum')
-                  flag to determine if tracking is targetting minima or maxima in the data
-    position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center')
-                      flag choosing method used for the position of the tracked feature
-    sigma_threshold: float
-                     standard deviation for intial filtering step
-    n_erosion_threshold: int
-                         number of pixel by which to erode the identified features
-    n_min_threshold: int
-                     minimum number of identified features
-    min_distance:  float
-                   minimum distance between detected features (m)
-    idx_start: int
-               feature id to start with
-    Output:
-    features_threshold:      pandas DataFrame
-                             detected features for individual threshold
-    regions:                 dict
-                             dictionary containing the regions above/below threshold used for each feature (feature ids as keys)
-    """
+
     from skimage.measure import label
     from skimage.morphology import binary_erosion
     from copy import deepcopy
@@ -337,35 +407,54 @@ def feature_detection_multithreshold_timestep(
     min_distance=0,
     feature_number_start=1,
 ):
-    """
-    function to find features in each timestep based on iteratively finding regions above/below a set of thresholds
-    Input:
-    data_i:      iris.cube.Cube
-                 2D field to perform the feature detection (single timestep)
-    i_time:      int
-                 number of the current timestep
-
-    threshold:    list of floats
-                  threshold values used to select target regions to track
-    dxy:          float
-                  grid spacing of the input data (m)
-    target:       str ('minimum' or 'maximum')
-                  flag to determine if tracking is targetting minima or maxima in the data
-    position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center')
-                      flag choosing method used for the position of the tracked feature
-    sigma_threshold: float
-                     standard deviation for intial filtering step
-    n_erosion_threshold: int
-                         number of pixel by which to erode the identified features
-    n_min_threshold: int
-                     minimum number of identified features
-    min_distance:  float
-                   minimum distance between detected features (m)
-    feature_number_start: int
-                          feature number to start with
-    Output:
-    features_threshold:      pandas DataFrame
-                             detected features for individual timestep
+    """Find features in each timestep.
+
+    Based on iteratively finding regions above/below a set of
+    thresholds. Smoothing the input data with the Gaussian filter makes
+    output less sensitive to noisiness of input data.
+
+    Parameters
+    ----------
+
+    data_i : iris.cube.Cube
+        2D field to perform the feature detection (single timestep) on.
+
+    threshold : float, optional
+        Threshold value used to select target regions to track. Default
+        is None.
+
+    min_num : int, optional
+        This parameter is not used in the function. Default is 0.
+
+    target : {'maximum', 'minimum'}, optinal
+        Flag to determine if tracking is targetting minima or maxima
+        in the data. Default is 'maximum'.
+
+    position_threshold : {'center', 'extreme', 'weighted_diff',
+                          'weighted_abs'}, optional
+        Flag choosing method used for the position of the tracked
+        feature. Default is 'center'.
+
+    sigma_threshold: float, optional
+        Standard deviation for intial filtering step. Default is 0.5.
+
+    n_erosion_threshold: int, optional
+        Number of pixel by which to erode the identified features.
+        Default is 0.
+
+    n_min_threshold : int, optional
+        Minimum number of identified features. Default is 0.
+
+    min_distance : float, optional
+        Minimum distance between detected features. Default is 0.
+
+    feature_number_start : int, optional
+        Feature id to start with. Default is 1.
+
+    Returns
+    -------
+    features_threshold : pandas DataFrame
+        Detected features for individual timestep.
     """
     # Handle scipy depreciation gracefully
     try:
@@ -437,31 +526,62 @@ def feature_detection_multithreshold(
     min_distance=0,
     feature_number_start=1,
 ):
-    """Function to perform feature detection based on contiguous regions above/below a threshold
-    Input:
-    field_in:      iris.cube.Cube
-                   2D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions)
-
-    thresholds:    list of floats
-                   threshold values used to select target regions to track
-    dxy:           float
-                   grid spacing of the input data (m)
-    target:        str ('minimum' or 'maximum')
-                   flag to determine if tracking is targetting minima or maxima in the data
-    position_threshold: str('extreme', 'weighted_diff', 'weighted_abs' or 'center')
-                      flag choosing method used for the position of the tracked feature
-    sigma_threshold: float
-                     standard deviation for intial filtering step
-    n_erosion_threshold: int
-                         number of pixel by which to erode the identified features
-    n_min_threshold: int
-                     minimum number of identified features
-    min_distance:  float
-                   minimum distance between detected features (m)
-    Output:
-    features:      pandas DataFrame
-                   detected features
+    """Perform feature detection based on contiguous regions.
+
+    The regions are above/below a threshold.
+
+    Parameters
+    ----------
+    field_in : iris.cube.Cube
+        2D field to perform the tracking on (needs to have coordinate
+        'time' along one of its dimensions),
+
+    dxy : float
+        Grid spacing of the input data (in meter).
+
+    thresholds : list of floats, optional
+        Threshold values used to select target regions to track.
+        Default is None.
+
+    target : {'maximum', 'minimum'}, optional
+        Flag to determine if tracking is targetting minima or maxima in
+        the data. Default is 'maximum'.
+
+    position_threshold : {'center', 'extreme', 'weighted_diff',
+                          'weighted_abs'}, optional
+        Flag choosing method used for the position of the tracked
+        feature. Default is 'center'.
+
+    coord_interp_kind : str, optional
+        The kind of interpolation for coordinates. Default is 'linear'.
+        For 1d interp, {'linear', 'nearest', 'nearest-up', 'zero',
+                        'slinear', 'quadratic', 'cubic',
+                        'previous', 'next'}.
+        For 2d interp, {'linear', 'cubic', 'quintic'}.
+
+    sigma_threshold: float, optional
+        Standard deviation for intial filtering step. Default is 0.5.
+
+    n_erosion_threshold: int, optional
+        Number of pixel by which to erode the identified features.
+        Default is 0.
+
+    n_min_threshold : int, optional
+        Minimum number of identified features. Default is 0.
+
+    min_distance : float, optional
+        Minimum distance between detected features. Default is 0.
+
+    feature_number_start : int, optional
+        Feature id to start with. Default is 1.
+
+    Returns
+    -------
+    features : pandas.DataFrame
+        Detected features. The structure of this dataframe is explained
+        `here <https://tobac.readthedocs.io/en/latest/data_input.html>`__
     """
+
     from .utils import add_coordinates
 
     if min_num != 0:
@@ -528,18 +648,26 @@ def feature_detection_multithreshold(
 
 
 def filter_min_distance(features, dxy, min_distance):
-    """Function to perform feature detection based on contiguous regions above/below a threshold
-    Input:
-    features:      pandas DataFrame
-                   features
-    dxy:           float
-                   horzontal grid spacing (m)
-    min_distance:  float
-                   minimum distance between detected features (m)
-    Output:
-    features:      pandas DataFrame
-                   features
+    """Perform feature detection based on contiguous regions.
+
+    Regions are above/below a threshold.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+
+    dxy : float
+        Grid spacing (in meter) of the input data.
+
+    min_distance : float, optional
+        Minimum distance (in meter) between detected features.
+
+    Returns
+    -------
+    features : pandas.DataFrame
+        Detected features.
     """
+
     from itertools import combinations
 
     remove_list_distance = []
diff --git a/tobac/segmentation.py b/tobac/segmentation.py
index 2a76ebc6..ca06a798 100644
--- a/tobac/segmentation.py
+++ b/tobac/segmentation.py
@@ -1,3 +1,35 @@
+"""Provide segmentation techniques.
+
+Segmentation techniques are used to associate areas or volumes to each
+identified feature. The segmentation is implemented using watershedding
+techniques from the field of image processing with a fixed threshold
+value. This value has to be set specifically for every type of input
+data and application. The segmentation can be performed for both
+two-dimensional and three-dimensional data. At each timestep, a marker
+is set at the position (weighted mean center) of each feature identified
+in the detection step in an array otherwise filled with zeros. In case
+of the three-dimentional watershedding, all cells in the column above
+the weighted mean center position of the identified features fulfilling
+the threshold condition are set to the respective marker. The algorithm
+then fills the area (2D) or volume (3D) based on the input field
+starting from these markers until reaching the threshold. If two or more
+features are directly connected, the border runs along the
+watershed line between the two regions. This procedure creates a mask 
+that has the same form as the input data, with the corresponding integer 
+number at all grid points that belong to a feature, else with zero. This 
+mask can be conveniently and efficiently used to select the volume of each
+feature at a specific time step for further analysis or visialization. 
+
+References
+----------
+.. Heikenfeld, M., Marinescu, P. J., Christensen, M.,
+   Watson-Parris, D., Senf, F., van den Heever, S. C.
+   & Stier, P. (2019). tobac 1.2: towards a flexible 
+   framework for tracking and analysis of clouds in 
+   diverse datasets. Geoscientific Model Development,
+   12(11), 4551-4570.
+"""
+
 import logging
 
 
@@ -11,6 +43,8 @@ def segmentation_3D(
     method="watershed",
     max_distance=None,
 ):
+    """Wrapper for the segmentation()-function."""
+
     return segmentation(
         features,
         field,
@@ -33,6 +67,7 @@ def segmentation_2D(
     method="watershed",
     max_distance=None,
 ):
+    """Wrapper for the segmentation()-function."""
     return segmentation(
         features,
         field,
@@ -56,31 +91,73 @@ def segmentation_timestep(
     max_distance=None,
     vertical_coord="auto",
 ):
+    """Perform watershedding for an individual time step of the data. Works
+    for both 2D and 3D data
+
+    Parameters
+    ----------
+    field_in : iris.cube.Cube
+        Input field to perform the watershedding on (2D or 3D for one
+        specific point in time).
+
+    features_in : pandas.DataFrame
+        Features for one specific point in time.
+
+    dxy : float
+        Grid spacing of the input data in metres
+
+    threshold : float, optional
+        Threshold for the watershedding field to be used for the mask.
+        Default is 3e-3.
+
+    target : {'maximum', 'minimum'}, optional
+        Flag to determine if tracking is targetting minima or maxima in
+        the data to determine from which direction to approach the threshold
+        value. Default is 'maximum'.
+
+    level : slice of iris.cube.Cube, optional
+        Levels at which to seed the cells for the watershedding
+        algorithm. Default is None.
+
+    method : {'watershed'}, optional
+        Flag determining the algorithm to use (currently watershedding
+        implemented). 'random_walk' could be uncommented.
+
+    max_distance : float, optional
+        Maximum distance from a marker allowed to be classified as
+        belonging to that cell. Default is None.
+
+    vertical_coord : str, optional
+        Vertical coordinate in 3D input data. If 'auto', input is checked for
+        one of {'z', 'model_level_number', 'altitude','geopotential_height'}
+        as a likely coordinate name
+
+    Returns
+    -------
+    segmentation_out : iris.cube.Cube
+        Mask, 0 outside and integer numbers according to track
+        inside the ojects.
+
+    features_out : pandas.DataFrame
+        Feature dataframe including the number of cells (2D or 3D) in
+        the segmented area/volume of the feature at the timestep.
+
+    Raises
+    ------
+    ValueError
+        If target is neither 'maximum' nor 'minimum'.
+
+        If vertical_coord is not in {'auto', 'z', 'model_level_number',
+                                     'altitude', geopotential_height'}.
+
+        If there is more than one coordinate name.
+
+        If the spatial dimension is neither 2 nor 3.
+
+        If method is not 'watershed'.
+
     """
-    Function performing watershedding for an individual timestep of the data
-
-    Parameters:
-    features:   pandas.DataFrame
-                features for one specific point in time
-    field:      iris.cube.Cube
-                input field to perform the watershedding on (2D or 3D for one specific point in time)
-    threshold:  float
-                threshold for the watershedding field to be used for the mas
-    target:     string
-                switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima)
-    level       slice
-                vertical levels at which to seed the cells for the watershedding algorithm
-    method:     string
-                flag determining the algorithm to use (currently watershedding implemented)
-    max_distance: float
-                  maximum distance from a marker allowed to be classified as belonging to that cell
-
-    Output:
-    segmentation_out: iris.cube.Cube
-                      cloud mask, 0 outside and integer numbers according to track inside the clouds
-    features_out: pandas.DataFrame
-                  feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep
-    """
+
     # The location of watershed within skimage submodules changes with v0.19, but I've kept both for backward compatibility for now
     try:
         from skimage.segmentation import watershed
@@ -210,31 +287,65 @@ def segmentation(
     max_distance=None,
     vertical_coord="auto",
 ):
-    """
-    Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts
-
-    Parameters:
-    features:         pandas.DataFrame
-                   output from trackpy/maketrack
-    field:      iris.cube.Cube
-                   containing the field to perform the watershedding on
-    threshold:  float
-                   threshold for the watershedding field to be used for the mask
-
-    target:        string
-                   Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima)
-
-    level          slice
-                   levels at which to seed the cells for the watershedding algorithm
-    method:        str ('method')
-                   flag determining the algorithm to use (currently watershedding implemented)
-
-    max_distance: float
-                  Maximum distance from a marker allowed to be classified as belonging to that cell
-
-    Output:
-    segmentation_out: iris.cube.Cube
-                   Cloud mask, 0 outside and integer numbers according to track inside the cloud
+    """Use watershedding to determine region above a threshold
+    value around initial seeding position for all time steps of
+    the input data. Works both in 2D (based on single seeding
+    point) and 3D and returns a mask with zeros everywhere around
+    the identified regions and the feature id inside the regions.
+
+    Calls segmentation_timestep at each individal timestep of the
+    input data.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+        Output from trackpy/maketrack.
+
+    field : iris.cube.Cube
+        Containing the field to perform the watershedding on.
+
+    dxy : float
+        Grid spacing of the input data.
+
+    threshold : float, optional
+        Threshold for the watershedding field to be used for the mask.
+        Default is 3e-3.
+
+    target : {'maximum', 'minimum'}, optional
+        Flag to determine if tracking is targetting minima or maxima in
+        the data. Default is 'maximum'.
+
+    level : slice of iris.cube.Cube, optional
+        Levels at which to seed the cells for the watershedding
+        algorithm. Default is None.
+
+    method : {'watershed'}, optional
+        Flag determining the algorithm to use (currently watershedding
+        implemented). 'random_walk' could be uncommented.
+
+    max_distance : float, optional
+        Maximum distance from a marker allowed to be classified as
+        belonging to that cell. Default is None.
+
+    vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
+                      'geopotential_height'}, optional
+        Name of the vertical coordinate for use in 3D segmentation case
+
+    Returns
+    -------
+    segmentation_out : iris.cube.Cube
+        Mask, 0 outside and integer numbers according to track
+        inside the area/volume of the feature.
+
+    features_out : pandas.DataFrame
+        Feature dataframe including the number of cells (2D or 3D) in
+        the segmented area/volume of the feature at the timestep.
+
+    Raises
+    ------
+    ValueError
+        If field_in.ndim is neither 3 nor 4 and 'time' is not included
+        in coords.
     """
     import pandas as pd
     from iris.cube import CubeList
@@ -286,10 +397,12 @@ def segmentation(
 
 
 def watershedding_3D(track, field_in, **kwargs):
+    """Wrapper for the segmentation()-function."""
     kwargs.pop("method", None)
     return segmentation_3D(track, field_in, method="watershed", **kwargs)
 
 
 def watershedding_2D(track, field_in, **kwargs):
+    """Wrapper for the segmentation()-function."""
     kwargs.pop("method", None)
     return segmentation_2D(track, field_in, method="watershed", **kwargs)
diff --git a/tobac/testing.py b/tobac/testing.py
index 36338a44..cb2c750f 100644
--- a/tobac/testing.py
+++ b/tobac/testing.py
@@ -1,3 +1,7 @@
+"""Containing methods to make simple sample data for testing.
+
+"""
+
 import datetime
 import numpy as np
 from xarray import DataArray
@@ -5,17 +9,28 @@
 
 
 def make_simple_sample_data_2D(data_type="iris"):
-    """
-    function creating a simple dataset to use in tests for tobac.
-    The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 500 in y direction.
-    Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00).
-    The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range.
-    The data contains a single blob travelling on a linear trajectory through the dataset for part of the time.
+    """Create a simple dataset to use in tests.
 
-    :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce
-    :return: sample dataset as an Iris.Cube.cube or xarray.DataArray
+    The grid has a grid spacing of 1km in both horizontal directions
+    and 100 grid cells in x direction and 500 in y direction.
+    Time resolution is 1 minute and the total length of the dataset is
+    100 minutes around a abritraty date (2000-01-01 12:00).
+    The longitude and latitude coordinates are added as 2D aux
+    coordinates and arbitrary, but in realisitic range.
+    The data contains a single blob travelling on a linear trajectory
+    through the dataset for part of the time.
+
+    Parameters
+    ----------
+    data_type : {'iris', 'xarray'}, optional
+        Choose type of the dataset that will be produced.
+        Default is 'iris'
 
+    Returns
+    -------
+    sample_data : iris.cube.Cube or xarray.DataArray
     """
+
     from iris.cube import Cube
     from iris.coords import DimCoord, AuxCoord
 
@@ -79,21 +94,31 @@ def make_simple_sample_data_2D(data_type="iris"):
 
 
 def make_sample_data_2D_3blobs(data_type="iris"):
-    from iris.cube import Cube
-    from iris.coords import DimCoord, AuxCoord
+    """Create a simple dataset to use in tests.
 
-    """
-    function creating a simple dataset to use in tests for tobac. 
-    The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 200 in y direction.
-    Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). 
-    The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range.
-    The data contains a three individual blobs travelling on a linear trajectory through the dataset for part of the time.
-    
-    :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce
-    :return: sample dataset as an Iris.Cube.cube or xarray.DataArray
+    The grid has a grid spacing of 1km in both horizontal directions
+    and 100 grid cells in x direction and 200 in y direction.
+    Time resolution is 1 minute and the total length of the dataset is
+    100 minutes around a arbitrary date (2000-01-01 12:00).
+    The longitude and latitude coordinates are added as 2D aux
+    coordinates and arbitrary, but in realisitic range.
+    The data contains three individual blobs travelling on a linear
+    trajectory through the dataset for part of the time.
+
+    Parameters
+    ----------
+    data_type : {'iris', 'xarray'}, optional
+        Choose type of the dataset that will be produced.
+        Default is 'iris'
 
+    Returns
+    -------
+    sample_data : iris.cube.Cube or xarray.DataArray
     """
 
+    from iris.cube import Cube
+    from iris.coords import DimCoord, AuxCoord
+
     t_0 = datetime.datetime(2000, 1, 1, 12, 0, 0)
 
     x = np.arange(0, 100e3, 1000)
@@ -183,14 +208,24 @@ def make_sample_data_2D_3blobs(data_type="iris"):
 
 
 def make_sample_data_2D_3blobs_inv(data_type="iris"):
-    """
-    function creating a version of the dataset created in the function make_sample_cube_2D, but with switched coordinate order for the horizontal coordinates
-    for tests to ensure that this does not affect the results
+    """Create a version of the dataset with switched coordinates.
 
-    :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce
-    :return: sample dataset as an Iris.Cube.cube or xarray.DataArray
+    Create a version of the dataset created in the function
+    make_sample_cube_2D, but with switched coordinate order for the
+    horizontal coordinates for tests to ensure that this does not
+    affect the results.
 
+    Parameters
+    ----------
+    data_type : {'iris', 'xarray'}, optional
+        Choose type of the dataset that will be produced.
+        Default is 'iris'
+
+    Returns
+    -------
+    sample_data : iris.cube.Cube or xarray.DataArray
     """
+
     from iris.cube import Cube
     from iris.coords import DimCoord, AuxCoord
 
@@ -285,21 +320,35 @@ def make_sample_data_2D_3blobs_inv(data_type="iris"):
 
 
 def make_sample_data_3D_3blobs(data_type="iris", invert_xy=False):
-    from iris.cube import Cube
-    from iris.coords import DimCoord, AuxCoord
+    """Create a simple dataset to use in tests.
 
-    """
-    function creating a simple dataset to use in tests for tobac. 
-    The grid has a grid spacing of 1km in both horizontal directions and 100 grid cells in x direction and 200 in y direction.
-    Time resolution is 1 minute and the total length of the dataset is 100 minutes around a abritraty date (2000-01-01 12:00). 
-    The longitude and latitude coordinates are added as 2D aux coordinates and arbitrary, but in realisitic range.
-    The data contains a three individual blobs travelling on a linear trajectory through the dataset for part of the time.
-    
-    :param data_type: 'iris' or 'xarray' to chose the type of dataset to produce
-    :return: sample dataset as an Iris.Cube.cube or xarray.DataArray
+    The grid has a grid spacing of 1km in both horizontal directions
+    and 100 grid cells in x direction and 200 in y direction.
+    Time resolution is 1 minute and the total length of the dataset is
+    100 minutes around a abritraty date (2000-01-01 12:00).
+    The longitude and latitude coordinates are added as 2D aux
+    coordinates and arbitrary, but in realisitic range.
+    The data contains three individual blobs travelling on a linear
+    trajectory through the dataset for part of the time.
 
+    Parameters
+    ----------
+    data_type : {'iris', 'xarray'}, optional
+        Choose type of the dataset that will be produced.
+        Default is 'iris'
+
+    invert_xy : bool, optional
+        Flag to determine wether to switch x and y coordinates
+        Default is False
+
+    Returns
+    -------
+    sample_data : iris.cube.Cube or xarray.DataArray
     """
 
+    from iris.cube import Cube
+    from iris.coords import DimCoord, AuxCoord
+
     t_0 = datetime.datetime(2000, 1, 1, 12, 0, 0)
 
     x = np.arange(0, 100e3, 1000)
@@ -438,22 +487,33 @@ def make_dataset_from_arr(
     ----------
     in_arr: array-like
         The input array to convert to iris/xarray
-    data_type: str('xarray' or 'iris')
+
+    data_type: str('xarray' or 'iris'), optional
         Type of the dataset to return
-    time_dim_num: int or None
+        Default is 'xarray'
+
+    time_dim_num: int or None, optional
         What axis is the time dimension on, None for a single timestep
-    z_dim_num: int or None
+        Default is None
+
+    z_dim_num: int or None, optional
         What axis is the z dimension on, None for a 2D array
-    y_dim_num: int
+        Default is None
+
+    y_dim_num: int, optional
         What axis is the y dimension on, typically 0 for a 2D array
-    x_dim_num: int
+        Default is 0
+
+    x_dim_num: int, optional
         What axis is the x dimension on, typically 1 for a 2D array
+        Default is 1
 
     Returns
     -------
     Iris or xarray dataset with everything we need for feature detection/tracking.
 
     """
+
     import xarray as xr
     import iris
 
@@ -490,34 +550,46 @@ def make_feature_blob(
     shape="rectangle",
     amplitude=1,
 ):
-    import xarray as xr
-
-    """Function to make a defined "blob" in location (zloc, yloc, xloc) with 
+    """Function to make a defined "blob" in location (zloc, yloc, xloc) with
     user-specified shape and amplitude. Note that this function will
     round the size and locations to the nearest point within the array.
-    
+
     Parameters
     ----------
     in_arr: array-like
         input array to add the "blob" to
+
     h1_loc: float
         Center hdim_1 location of the blob, required
+
     h2_loc: float
         Center hdim_2 location of the blob, required
-    v_loc: float
+
+    v_loc: float, optional
         Center vdim location of the blob, optional. If this is None, we assume that the
         dataset is 2D.
-    h1_size: float
+        Default is None
+
+    h1_size: float, optional
         Size of the bubble in array coordinates in hdim_1
-    h2_size: float
+        Default is 1
+
+    h2_size: float, optional
         Size of the bubble in array coordinates in hdim_2
-    v_size: float
+        Default is 1
+
+    v_size: float, optional
         Size of the bubble in array coordinates in vdim
-    shape: str('rectangle')
+        Default is 1
+
+    shape: str('rectangle'), optional
         The shape of the blob that is added. For now, this is just rectangle
         'rectangle' adds a rectangular/rectangular prism bubble with constant amplitude `amplitude`.
-    amplitude: float
+        Default is "rectangle"
+
+    amplitude: float, optional
         Maximum amplitude of the blob
+        Default is 1
 
     Returns
     -------
@@ -525,6 +597,8 @@ def make_feature_blob(
         An array with the same type as `in_arr` that has the blob added.
     """
 
+    import xarray as xr
+
     # Check if z location is there and set our 3D-ness based on this.
     if v_loc is None:
         is_3D = False
@@ -576,27 +650,37 @@ def set_arr_2D_3D(
     ----------
     in_arr: array-like
         Array of values to set
+
     value: int, float, or array-like of size (end_v-start_v, end_h1-start_h1, end_h2-start_h2)
         The value to assign to in_arr. This will work to assign an array, but the array
         must have the same dimensions as the size specified in the function.
+
     start_h1: int
         Start index to set for hdim_1
+
     end_h1: int
         End index to set for hdim_1 (exclusive, so it acts like [start_h1:end_h1])
+
     start_h2: int
         Start index to set for hdim_2
+
     end_h2: int
         End index to set for hdim_2
-    start_v: int
-        Start index to set for vdim (optional)
-    end_v: int
-        End index to set for vdim (optional)
+
+    start_v: int, optional
+        Start index to set for vdim
+        Default is None
+
+    end_v: int, optional
+        End index to set for vdim
+        Default is None
 
     Returns
     -------
     array-like
         in_arr with the new values set.
     """
+
     if start_v is not None and end_v is not None:
         in_arr[start_v:end_v, start_h1:end_h1, start_h2:end_h2] = value
     else:
@@ -628,37 +712,64 @@ def generate_single_feature(
     ----------
     start_h1: float
         Starting point of the feature in hdim_1 space
+
     start_h2: float
         Starting point of the feature in hdim_2 space
-    start_v: float
+
+    start_v: float, optional
         Starting point of the feature in vdim space (if 3D). For 2D, set to None.
-    spd_h1: float
+        Default is None
+
+    spd_h1: float, optional
         Speed (per frame) of the feature in hdim_1
-    spd_h2: float
+        Default is 1
+
+    spd_h2: float, optional
         Speed (per frame) of the feature in hdim_2
-    spd_v: float
+        Default is 1
+
+    spd_v: float, optional
         Speed (per frame) of the feature in vdim
-    min_h1: int
+        Default is 1
+
+    min_h1: int, optional
         Minimum value of hdim_1 allowed. If PBC_flag is not 'none', then
         this will be used to know when to wrap around periodic boundaries.
         If PBC_flag is 'none', features will disappear if they are above/below
         these bounds.
-    max_h1: int
+        Default is 0
+
+    max_h1: int, optional
         Similar to min_h1, but the max value of hdim_1 allowed.
-    min_h2: int
+        Default is 1000
+
+    min_h2: int, optional
         Similar to min_h1, but the minimum value of hdim_2 allowed.
-    max_h2: int
+        Default is 0
+
+    max_h2: int, optional
         Similar to min_h1, but the maximum value of hdim_2 allowed.
-    num_frames: int
+        Default is 1000
+
+    num_frames: int, optional
         Number of frames to generate
-    dt: datetime.timedelta
+        Default is 1
+
+    dt: datetime.timedelta, optional
         Difference in time between each frame
-    start_date: datetime.datetime
+        Default is datetime.timedelta(minutes=5)
+
+    start_date: datetime.datetime, optional
         Start datetime
-    frame_start: int
+        Default is datetime.datetime(2022, 1, 1, 0)
+
+    frame_start: int, optional
         Number to start the frame at
-    feature_num: int
+        Default is 1
+
+    feature_num: int, optional
         What number to start the feature at
+        Default is 1
     """
 
     out_list_of_dicts = list()
diff --git a/tobac/tracking.py b/tobac/tracking.py
index 1bc9b925..d0eaa606 100644
--- a/tobac/tracking.py
+++ b/tobac/tracking.py
@@ -1,3 +1,24 @@
+"""Provide tracking methods.
+
+The individual features and associated area/volumes identified in
+each timestep have to be linked into trajectories to analyse
+the time evolution of their properties for a better understanding of
+the underlying physical processes. 
+The implementations are structured in a way that allows for the future
+addition of more complex tracking methods recording a more complex
+network of relationships between features at different points in
+time. 
+
+References
+----------
+.. Heikenfeld, M., Marinescu, P. J., Christensen, M.,
+   Watson-Parris, D., Senf, F., van den Heever, S. C.
+   & Stier, P. (2019). tobac 1.2: towards a flexible 
+   framework for tracking and analysis of clouds in 
+   diverse datasets. Geoscientific Model Development,
+   12(11), 4551-4570.
+"""
+
 import logging
 import numpy as np
 import pandas as pd
@@ -24,31 +45,125 @@ def linking_trackpy(
     cell_number_start=1,
     cell_number_unassigned=-1,
 ):
-    """
-    Function to perform the linking of features in trajectories
-
-    Parameters:
-    features:     pandas.DataFrame
-                  Detected features to be linked
-    v_max:        float
-                  speed at which features are allowed to move
-    dt:           float
-                  time resolution of tracked features
-    dxy:          float
-                  grid spacing of input data
-    memory        int
-                  number of output timesteps features allowed to vanish for to be still considered tracked
-    subnetwork_size int
-                    maximim size of subnetwork for linking
-    method_detection: str('trackpy' or 'threshold')
-                      flag choosing method used for feature detection
-    method_linking:   str('predict' or 'random')
-                      flag choosing method used for trajectory linking
+
+    """Perform Linking of features in trajectories.
+
+    The linking determines which of the features detected in a specific
+    timestep is most likely identical to an existing feature in the
+    previous timestep. For each existing feature, the movement within
+    a time step is extrapolated based on the velocities in a number
+    previous time steps. The algorithm then breaks the search process
+    down to a few candidate features by restricting the search to a
+    circular search region centered around the predicted position of
+    the feature in the next time step. For newly initialized trajectories,
+    where no velocity from previous time steps is available, the
+    algorithm resorts to the average velocity of the nearest tracked
+    objects. v_max and d_min are given as physical quantities and then
+    converted into pixel-based values used in trackpy. This allows for
+    tracking that is controlled by physically-based parameters that are
+    independent of the temporal and spatial resolution of the input
+    data. The algorithm creates a continuous track for the feature
+    that is the most probable based on the previous cell path.
+
+    Parameters
+    ----------
+    features : pandas.DataFrame
+        Detected features to be linked.
+
+    field_in : xarray.DataArray
+        Input field to perform the watershedding on (2D or 3D for one
+        specific point in time).
+
+    dt : float
+        Time resolution of tracked features.
+
+    dxy : float
+        Grid spacing of the input data.
+
+    d_max : float, optional
+        Maximum search range
+        Default is None.
+
+    d_min : float, optional
+        Variations in the shape of the regions used to determine the
+        positions of the features can lead to quasi-instantaneous shifts
+        of the position of the feature by one or two grid cells even for
+        a very high temporal resolution of the input data, potentially
+        jeopardising the tracking procedure. To prevent this, tobac uses
+        an additional minimum radius of the search range.
+        Default is None.
+
+    subnetwork_size : int, optional
+        Maximum size of subnetwork for linking. This parameter should be
+        adjusted when using adaptive search. Usually a lower value is desired
+        in that case. For a more in depth explanation have look
+        `here <https://soft-matter.github.io/trackpy/v0.5.0/tutorial/adaptive-search.html>`_
+        If None, 30 is used for regular search and 15 for adaptive search.
+        Default is None.
+
+    v_max : float, optional
+        Speed at which features are allowed to move. Default is None.
+
+    memory : int, optional
+        Number of output timesteps features allowed to vanish for to
+        be still considered tracked. Default is 0.
+        .. warning :: This parameter should be used with caution, as it
+                     can lead to erroneous trajectory linking,
+                     espacially for data with low time resolution.
+
+    stubs : int, optional
+        Minimum number of timesteps of a tracked cell to be reported
+        Default is 1
+
+    time_cell_min : float, optional
+        Minimum length in time of tracked cell to be reported in minutes
+        Default is None.
+
+    order : int, optional
+        Order of polynomial used to extrapolate trajectory into gaps and
+        ond start and end point.
+        Default is 1.
+
+    extrapolate : int, optional
+        Number or timesteps to extrapolate trajectories.
+        Default is 0.
+
+    method_linking : {'random', 'predict'}, optional
+        Flag choosing method used for trajectory linking.
+        Default is 'random'.
+
+    adaptive_step : float, optional
+        Reduce search range by multiplying it by this factor.
+
+    adaptive_stop : float, optional
+        If not None, when encountering an oversize subnet, retry by progressively
+        reducing search_range until the subnet is solvable. If search_range
+        becomes <= adaptive_stop, give up and raise a SubnetOversizeException.
+        Default is None
+
+    cell_number_start : int, optional
+        Cell number for first tracked cell.
+        Default is 1
+
     cell_number_unassigned: int
-        Number to set the unassigned/non-tracked cells to. By default, this is -1.
-        Note that if you set this to `np.nan`, the data type of 'cell' will
-        change to float.
+        Number to set the unassigned/non-tracked cells to. Note that if you set this
+        to `np.nan`, the data type of 'cell' will change to float.
+        Default is -1
+
+    Returns
+    -------
+    trajectories_final : pandas.DataFrame
+        Dataframe of the linked features, containing the variable 'cell',
+        with integers indicating the affiliation of a feature to a specific
+        track, and the variable 'time_cell' with the time the cell has
+        already existed.
+
+    Raises
+    ------
+    ValueError
+        If method_linking is neither 'random' nor 'predict'.
     """
+
     #    from trackpy import link_df
     import trackpy as tp
     from copy import deepcopy
@@ -202,24 +317,34 @@ def linking_trackpy(
 def fill_gaps(
     t, order=1, extrapolate=0, frame_max=None, hdim_1_max=None, hdim_2_max=None
 ):
-    """add cell time as time since the initiation of each cell
-    Input:
-    t:             pandas dataframe
-                   trajectories from trackpy
-    order:         int
-                    Order of polynomial used to extrapolate trajectory into gaps and beyond start and end point
-    extrapolate     int
-                    number of timesteps to extrapolate trajectories by
-    frame_max:      int
-                    size of input data along time axis
-    hdim_1_max:     int
-                    size of input data along first horizontal axis
-    hdim_2_max:     int
-                    size of input data along second horizontal axis
-    Output:
-    t:             pandas dataframe
-                   trajectories from trackpy with with filled gaps and potentially extrapolated
+    """Add cell time as time since the initiation of each cell.
+
+    Parameters
+    ----------
+    t : pandas.DataFrame
+        Trajectories from trackpy.
+
+    order : int, optional
+        Order of polynomial used to extrapolate trajectory into
+        gaps and beyond start and end point. Default is 1.
+
+    extrapolate : int, optional
+        Number or timesteps to extrapolate trajectories. Default is 0.
+
+    frame_max : int, optional
+        Size of input data along time axis. Default is None.
+
+    hdim_1_max, hdim2_max : int, optional
+        Size of input data along first and second horizontal axis.
+        Default is None.
+
+    Returns
+    -------
+    t : pandas.DataFrame
+        Trajectories from trackpy with with filled gaps and potentially
+        extrapolated.
     """
+
     from scipy.interpolate import InterpolatedUnivariateSpline
 
     logging.debug("start filling gaps")
@@ -276,13 +401,13 @@ def add_cell_time(t):
 
     Parameters
     ----------
-    t:             pandas  DataFrame
-                   trajectories with added coordinates
+    t : pandas.DataFrame
+        trajectories with added coordinates
 
     Returns
     -------
-    t:             pandas dataframe
-                   trajectories with added cell time
+    t : pandas.Dataframe
+        trajectories with added cell time
     """
 
     # logging.debug('start adding time relative to cell initiation')