handley-lab · williamjameshandley · Aug 21, 2022 · Aug 7, 2022 · Aug 7, 2022 · Aug 7, 2022
diff --git a/anesthetic/__init__.py b/anesthetic/__init__.py
@@ -15,6 +15,7 @@
 import pandas
 import pandas.plotting._core
 import pandas.plotting._misc
+from anesthetic._format import _DataFrameFormatter
 
 
 def _anesthetic_override(_get_plot_backend):
@@ -40,6 +41,8 @@ def wrapper(backend=None):
 # Set anesthetic.plotting._matplotlib as the actual backend
 pandas.options.plotting.backend = 'anesthetic.plotting._matplotlib'
 
+pandas.io.formats.format.DataFrameFormatter = _DataFrameFormatter
+pandas.options.display.max_colwidth = 14
 
 Samples = anesthetic.samples.Samples
 MCMCSamples = anesthetic.samples.MCMCSamples

diff --git a/anesthetic/_format.py b/anesthetic/_format.py
@@ -0,0 +1,55 @@
+# flake8: noqa
+from pandas.io.formats.format import (
+    DataFrameFormatter as DataFrameFormatter,
+    _make_fixed_width, is_numeric_dtype
+)
+from pandas import MultiIndex
+
+
+class _DataFrameFormatter(DataFrameFormatter):
+
+    def _get_formatted_column_labels(self, frame):
+        try:
+            from pandas.core.indexes.multi import sparsify_labels
+        except ImportError:
+            sparsify_labels = lambda x, *args: x
+
+        columns = frame.columns
+
+        if isinstance(columns, MultiIndex):
+            fmt_columns = columns.format(sparsify=False, adjoin=False)
+            fmt_columns = list(zip(*fmt_columns))
+            dtypes = self.frame.dtypes._values
+
+            # if we have a Float level, they don't use leading space at all
+            restrict_formatting = any(level.is_floating for level in columns.levels)
+            need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
+
+            def space_format(x, y):
+                if (
+                    y not in self.formatters
+                    and need_leadsp[x]
+                    and not restrict_formatting
+                ):
+                    return " " + y
+                return y
+
+            str_columns = list(
+                zip(*([space_format(x, y) for y in x] for x in fmt_columns))
+            )
+            if self.sparsify and len(str_columns):
+                str_columns = sparsify_labels(str_columns)
+
+            str_columns = [list(x) for x in zip(*str_columns)]
+            str_columns = [_make_fixed_width(x) for x in str_columns]
+        else:
+            fmt_columns = columns.format()
+            dtypes = self.frame.dtypes
+            need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
+            str_columns = [
+                [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
+                for i, x in enumerate(fmt_columns)
+            ]
+            str_columns = [_make_fixed_width(x) for x in str_columns]
+        # self.str_columns = str_columns
+        return str_columns
diff --git a/anesthetic/convert.py b/anesthetic/convert.py
@@ -15,6 +15,7 @@ def to_getdist(nested_samples):
         getdist equivalent samples
     """
     import getdist
+    nested_samples = nested_samples.drop_labels(1)
     samples = nested_samples.to_numpy()
     weights = nested_samples.get_weights()
     loglikes = -nested_samples.logL.to_numpy()

diff --git a/anesthetic/gui/plot.py b/anesthetic/gui/plot.py
@@ -159,7 +159,7 @@ def __init__(self, samples, params=None):
         if params:
             self.params = np.array(params)
         else:
-            self.params = np.array(self.samples.columns[:10])
+            self.params = np.array(self.samples.drop_labels(1).columns[:10])
 
         self.fig = plt.figure()
         self._set_up()
@@ -213,7 +213,8 @@ def _set_up(self):
 
     def redraw(self, _):
         """Redraw the triangle plot upon parameter updating."""
-        self.triangle.draw(self.param_choice(), self.samples.tex)
+        self.triangle.draw(self.param_choice(),
+                           self.samples.get_labels_map(axis=1))
         self.update(None)
         self.reset_range(None)
         self.fig.tight_layout()

diff --git a/anesthetic/gui/widgets.py b/anesthetic/gui/widgets.py
@@ -226,13 +226,13 @@ def __init__(self, fig, gridspec):
         self.fig.delaxes(self.ax)
         _, self.ax = make_2d_axes([], fig=self.fig, subplot_spec=self.gridspec)
 
-    def draw(self, labels, tex={}):
-        """Draw a new triangular grid for list of parameters labels.
+    def draw(self, params, labels={}):
+        """Draw a new triangular grid for list of parameters.
 
         Parameters
         ----------
-            labels: list(str)
-                labels for the triangular grid.
+            params: list(str)
+                params for the triangular grid.
 
         """
         # Remove any existing axes
@@ -244,7 +244,7 @@ def draw(self, labels, tex={}):
                     self.fig.delaxes(ax)
 
         # Set up the axes
-        _, self.ax = make_2d_axes(labels, upper=False, tex=tex,
+        _, self.ax = make_2d_axes(params, upper=False, labels=labels,
                                   fig=self.fig, subplot_spec=self.gridspec)
 
         # Plot no points  points.

diff --git a/anesthetic/labelled_pandas.py b/anesthetic/labelled_pandas.py
@@ -0,0 +1,216 @@
+"""Pandas DataFrame and Series with labelled columns."""
+from pandas import Series, DataFrame, MultiIndex
+from pandas.core.indexing import (_LocIndexer as _LocIndexer_,
+                                  _AtIndexer as _AtIndexer_)
+import numpy as np
+from functools import cmp_to_key
+
+
+def ac(funcs, *args):
+    """Accessor function helper.
+
+    Given a list of callables `funcs`, and their arguments `*args`, evaluate
+    each of these, catching exceptions, and then sort results by their
+    dimensionality, smallest first. Return the non-exceptional result with the
+    smallest dimensionality.
+    """
+    results = []
+    errors = []
+    for f in funcs:
+        try:
+            results.append(f(*args))
+        except Exception as e:
+            errors.append(e)
+
+    def cmp(x, y):
+        if x.ndim > y.ndim:
+            return 1
+        elif x.ndim < y.ndim:
+            return -1
+        else:
+            x_levels = 0
+            y_levels = 0
+            if x.ndim > 0:
+                x_levels += x.index.nlevels
+                y_levels += y.index.nlevels
+            if x.ndim > 1:
+                x_levels += x.columns.nlevels
+                y_levels += y.columns.nlevels
+
+            if x_levels < y_levels:
+                return 1
+            elif x_levels > y_levels:
+                return -1
+            else:
+                return 0
+
+    results.sort(key=cmp_to_key(cmp))
+
+    for s in results:
+        if s is not None:
+            return s
+    raise errors[-1]
+
+
+class _LocIndexer(_LocIndexer_):
+    def __getitem__(self, key):
+        return ac([_LocIndexer_("loc", self.obj.drop_labels(i)).__getitem__
+                   for i in self.obj._all_axes()] + [super().__getitem__], key)
+
+
+class _AtIndexer(_AtIndexer_):
+    def __getitem__(self, key):
+        return ac([_AtIndexer_("at", self.obj.drop_labels(i)).__getitem__
+                   for i in self.obj._all_axes()] + [super().__getitem__], key)
+
+
+class _LabelledObject(object):
+    """Common methods for LabelledSeries and LabelledDataFrame."""
+
+    def __init__(self, *args, **kwargs):
+        self._labels = ("labels", "labels")
+        labels = kwargs.pop(self._labels[0], None)
+        super().__init__(*args, **kwargs)
+        if labels is not None:
+            self.set_labels(labels, inplace=True)
+
+    def islabelled(self, axis=0):
+        """Determine if labels are actually present."""
+        return (self._labels[axis] is not None
+                and self._labels[axis] in self._get_axis(axis).names)
+
+    def get_labels(self, axis=0):
+        """Retrieve labels from an axis."""
+        if self.islabelled(axis):
+            return self._get_axis(axis).get_level_values(
+                    self._labels[axis]).to_numpy()
+        else:
+            return None
+
+    def get_labels_map(self, axis=0):
+        """Retrieve mapping from paramnames to labels from an axis."""
+        index = self._get_axis(axis)
+        if self.islabelled(axis):
+            return index.to_frame().droplevel('labels')['labels']
+        else:
+            return Series('', index=index)
+
+    def get_label(self, param, axis=0):
+        """Retrieve mapping from paramnames to labels from an axis."""
+        return self.get_labels_map(axis)[param]
+
+    def set_label(self, param, value, axis=0, inplace=False):
+        labels = self.get_labels_map(axis)
+        labels[param] = value
+        return self.set_labels(labels, axis=axis, inplace=inplace)
+
+    def drop_labels(self, axis=0):
+        axes = np.atleast_1d(axis)
+        result = self
+        for axis in axes:
+            if self.islabelled(axis):
+                result = result.droplevel(self._labels[axis], axis)
+        return result
+
+    def _all_axes(self):
+        if isinstance(self, LabelledSeries):
+            return [0]
+        else:
+            return [0, 1, [0, 1]]
+
+    @property
+    def loc(self):
+        return _LocIndexer("loc", self)
+
+    @property
+    def at(self):
+        return _AtIndexer("at", self)
+
+    def xs(self, key, axis=0, level=None, drop_level=True):
+        return ac([super(_LabelledObject, self.drop_labels(i)).xs
+                   for i in self._all_axes()] + [super().xs],
+                  key, axis, level, drop_level)
+
+    def __getitem__(self, key):
+        return ac([super(_LabelledObject, self.drop_labels(i)).__getitem__
+                   for i in self._all_axes()] + [super().__getitem__], key)
+
+    def __setitem__(self, key, val):
+        super().__setitem__(key, val)
+
+    def set_labels(self, labels, axis=0, inplace=False, level=None):
+        """Set labels along an axis."""
+        if inplace:
+            result = self
+        else:
+            result = self.copy()
+
+        if labels is None:
+            if result.islabelled(axis=axis):
+                result = result.drop_labels(axis)
+        else:
+            names = [n for n in result._get_axis(axis).names
+                     if n != self._labels[axis]]
+            index = [result._get_axis(axis).get_level_values(n) for n in names]
+            if level is None:
+                if result.islabelled(axis):
+                    level = result._get_axis(axis
+                                             ).names.index(self._labels[axis])
+                else:
+                    level = len(index)
+            index.insert(level, labels)
+            names.insert(level, self._labels[axis])
+
+            index = MultiIndex.from_arrays(index, names=names)
+            result.set_axis(index, axis=axis, inplace=True)
+
+        if inplace:
+            self._update_inplace(result)
+        else:
+            return result.__finalize__(self, "set_labels")
+
+    def reset_index(self, level=None, drop=False, inplace=False,
+                    *args, **kwargs):
+        """Reset the index, retaining labels."""
+        labels = self.get_labels()
+        answer = super().reset_index(level=level, drop=drop,
+                                     inplace=False, *args, **kwargs)
+        answer.set_labels(labels, inplace=True)
+        if inplace:
+            self._update_inplace(answer)
+        else:
+            return answer.__finalize__(self, "reset_index")
+
+
+class LabelledSeries(_LabelledObject, Series):
+    """Labelled version of pandas.Series."""
+
+    _metadata = Series._metadata + ['_labels']
+
+    @property
+    def _constructor(self):
+        return LabelledSeries
+
+    @property
+    def _constructor_expanddim(self):
+        return LabelledDataFrame
+
+
+class LabelledDataFrame(_LabelledObject, DataFrame):
+    """Labelled version of pandas.DataFrame."""
+
+    _metadata = DataFrame._metadata + ['_labels']
+
+    @property
+    def _constructor_sliced(self):
+        return LabelledSeries
+
+    @property
+    def _constructor(self):
+        return LabelledDataFrame
+
+    def transpose(self, copy=False):
+        """Transpose."""
+        result = super().transpose(copy=copy)
+        result._labels = (result._labels[1], result._labels[0])
+        return result