Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Groupby bins empty groups #1027

Merged
merged 5 commits into from
Oct 3, 2016
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions doc/data-structures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,10 @@ Transforming datasets
In addition to dictionary-like methods (described above), xarray has additional
methods (like pandas) for transforming datasets into new objects.

For removing variables, you can select and drop an explicit list of variables
by indexing with a list of names or using the :py:meth:`~xarray.Dataset.drop`
methods to return a new ``Dataset``. These operations keep around coordinates:
For removing variables, you can select and drop an explicit list of
variables by indexing with a list of names or using the
:py:meth:`~xray.Dataset.drop` methods to return a new ``Dataset``. These
operations keep around coordinates:

.. ipython:: python

Expand Down
8 changes: 6 additions & 2 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def groupby(self, group, squeeze=True):
return self.groupby_cls(self, group, squeeze=squeeze)

def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
include_lowest=False, squeeze=True):
include_lowest=False, squeeze=True, drop_empty_bins=True):
"""Returns a GroupBy object for performing grouped operations.

Rather than using all unique values of `group`, the values are discretized
Expand Down Expand Up @@ -378,6 +378,9 @@ def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
If "group" is a dimension of any arrays in this dataset, `squeeze`
controls whether the subarrays have a dimension of length 1 along
that dimension or if the dimension is squeezed out.
drop_empty_bins : boolean, optional
If true, empty bins are dropped from the group. If false, they are
filled with NaN.

Returns
-------
Expand All @@ -396,7 +399,8 @@ def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
return self.groupby_cls(self, group, squeeze=squeeze, bins=bins,
cut_kwargs={'right': right, 'labels': labels,
'precision': precision,
'include_lowest': include_lowest})
'include_lowest': include_lowest},
drop_empty_bins=drop_empty_bins)

def rolling(self, min_periods=None, center=False, **windows):
"""
Expand Down
7 changes: 6 additions & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class GroupBy(object):
DataArray.groupby
"""
def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
cut_kwargs={}):
cut_kwargs={}, drop_empty_bins=True):
"""Create a GroupBy object

Parameters
Expand All @@ -154,6 +154,9 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
specified bins by `pandas.cut`.
cut_kwargs : dict, optional
Extra keyword arguments to pass to `pandas.cut`
drop_empty_bins : boolean, optional
If true, empty bins are dropped from the group. If false, they are
filled with NaN.
"""
from .dataset import as_dataset
from .dataarray import DataArray
Expand Down Expand Up @@ -193,6 +196,8 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
binned = pd.cut(group.values, bins, **cut_kwargs)
new_dim_name = group.name + '_bins'
group = DataArray(binned, group.coords, name=new_dim_name)
if not drop_empty_bins:
full_index = binned.categories
if grouper is not None:
index = safe_cast_to_index(group)
if not index.is_monotonic:
Expand Down
19 changes: 19 additions & 0 deletions xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1416,6 +1416,25 @@ def test_groupby_bins(self):
# (would fail with shortcut=True above)
self.assertEqual(len(array.dim_0), 4)

def test_groupby_bins_empty(self):
array = DataArray(np.arange(4), dims='dim_0')
# one of these bins will be empty
bins = [0,4,5]
actual = array.groupby_bins('dim_0', bins, drop_empty_bins=True).sum()
print(actual)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prob don't want to keep this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops! yes forgot to take that out

expected = DataArray([6], dims='dim_0_bins',
coords={'dim_0_bins': ['(0, 4]']})
self.assertDataArrayIdentical(expected, actual)
# make sure original array is unchanged
# (was a problem in earlier versions)
self.assertEqual(len(array.dim_0), 4)

# now do the same
actual = array.groupby_bins('dim_0', bins, drop_empty_bins=False).sum()
expected = DataArray([6, np.nan], dims='dim_0_bins',
coords={'dim_0_bins': ['(0, 4]','(4, 5]']})
self.assertDataArrayIdentical(expected, actual)

def test_groupby_bins_multidim(self):
array = self.make_groupby_multidim_example_array()
bins = [0,15,20]
Expand Down