-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Multi-index levels as coordinates #947
Changes from 24 commits
f31a278
5e8a677
19ec381
1566938
9f4e4e3
2679318
723c99a
6afcb4a
76c937e
5009ba8
4c78ea9
d28e829
810b4f9
7738059
62b46f2
936ec55
1d6a96f
ec67bbd
f80d7a8
861c78b
37a0796
fdbf4aa
d237022
949fb46
bdaad9b
a447767
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,6 +166,13 @@ def _update_coords(self, coords): | |
self._data._coord_names.update(updated_coord_names) | ||
self._data._dims = dict(dims) | ||
|
||
def __setitem__(self, key, value): | ||
if key in self._data._level_coords: | ||
raise ValueError("cannot replace MultiIndex level %r, replace %r " | ||
"coordinate instead" | ||
% (key, self._data._level_coords[key])) | ||
return super(DatasetCoordinates, self).__setitem__(key, value) | ||
|
||
def __delitem__(self, key): | ||
if key in self: | ||
del self._data[key] | ||
|
@@ -208,13 +215,41 @@ def _to_dataset(self, shallow_copy=True): | |
def to_dataset(self): | ||
return self._to_dataset() | ||
|
||
def __setitem__(self, key, value): | ||
if key in self._data._level_coords: | ||
raise ValueError("cannot replace MultiIndex level %r, replace %r " | ||
"coordinate instead" | ||
% (key, self._data._level_coords[key])) | ||
return super(DataArrayCoordinates, self).__setitem__(key, value) | ||
|
||
def __delitem__(self, key): | ||
if key in self.dims: | ||
raise ValueError('cannot delete a coordinate corresponding to a ' | ||
'DataArray dimension') | ||
del self._data._coords[key] | ||
|
||
|
||
class DataArrayLevelCoordinates(AbstractCoordinates): | ||
"""Dictionary like container for DataArray MultiIndex level coordinates. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably good to clarify "Used for attribute style lookup. Not returned directly by any public methods." There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added this sentence on the line below... |
||
|
||
Used for attribute style lookup. Not returned directly by any | ||
public methods. | ||
""" | ||
def __init__(self, dataarray): | ||
self._data = dataarray | ||
|
||
@property | ||
def _names(self): | ||
return set(self._data._level_coords) | ||
|
||
@property | ||
def variables(self): | ||
level_coords = OrderedDict( | ||
(k, self._data[v].variable.get_level_variable(k)) | ||
for k, v in self._data._level_coords.items()) | ||
return Frozen(level_coords) | ||
|
||
|
||
class Indexes(Mapping, formatting.ReprMixin): | ||
"""Ordered Mapping[str, pandas.Index] for xarray objects. | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,11 +14,13 @@ | |
from . import utils | ||
from .alignment import align | ||
from .common import AbstractArray, BaseDataObject, squeeze | ||
from .coordinates import DataArrayCoordinates, Indexes | ||
from .coordinates import (DataArrayCoordinates, DataArrayLevelCoordinates, | ||
Indexes) | ||
from .dataset import Dataset | ||
from .pycompat import iteritems, basestring, OrderedDict, zip | ||
from .variable import (as_variable, Variable, as_compatible_data, IndexVariable, | ||
default_index_coordinate) | ||
default_index_coordinate, | ||
assert_unique_multiindex_level_names) | ||
from .formatting import format_item | ||
|
||
|
||
|
@@ -82,6 +84,8 @@ def _infer_coords_and_dims(shape, coords, dims): | |
'length %s on the data but length %s on ' | ||
'coordinate %r' % (d, sizes[d], s, k)) | ||
|
||
assert_unique_multiindex_level_names(new_coords) | ||
|
||
return new_coords, dims | ||
|
||
|
||
|
@@ -417,14 +421,29 @@ def _item_key_to_dict(self, key): | |
key = indexing.expanded_indexer(key, self.ndim) | ||
return dict(zip(self.dims, key)) | ||
|
||
@property | ||
def _level_coords(self): | ||
"""Return a mapping of all MultiIndex levels and their corresponding | ||
coordinate name. | ||
""" | ||
level_coords = OrderedDict() | ||
for cname, var in self._coords.items(): | ||
if var.ndim == 1: | ||
level_names = var.to_index_variable().level_names | ||
if level_names is not None: | ||
dim = var.dims[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use tuple unpacking instead: |
||
level_coords.update({lname: dim for lname in level_names}) | ||
return level_coords | ||
|
||
def __getitem__(self, key): | ||
if isinstance(key, basestring): | ||
from .dataset import _get_virtual_variable | ||
|
||
try: | ||
var = self._coords[key] | ||
except KeyError: | ||
_, key, var = _get_virtual_variable(self._coords, key) | ||
_, key, var = _get_virtual_variable( | ||
self._coords, key, self._level_coords) | ||
|
||
return self._replace_maybe_drop_dims(var, name=key) | ||
else: | ||
|
@@ -444,7 +463,7 @@ def __delitem__(self, key): | |
@property | ||
def _attr_sources(self): | ||
"""List of places to look-up items for attribute-style access""" | ||
return [self.coords, self.attrs] | ||
return [self.coords, DataArrayLevelCoordinates(self), self.attrs] | ||
|
||
def __contains__(self, key): | ||
return key in self._coords | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,34 +33,48 @@ | |
'quarter'] | ||
|
||
|
||
def _get_virtual_variable(variables, key): | ||
"""Get a virtual variable (e.g., 'time.year') from a dict of | ||
xarray.Variable objects (if possible) | ||
def _get_virtual_variable(variables, key, level_vars={}): | ||
"""Get a virtual variable (e.g., 'time.year' or a MultiIndex level) | ||
from a dict of xarray.Variable objects (if possible) | ||
""" | ||
if not isinstance(key, basestring): | ||
raise KeyError(key) | ||
|
||
split_key = key.split('.', 1) | ||
if len(split_key) != 2: | ||
if len(split_key) == 2: | ||
ref_name, var_name = split_key | ||
elif len(split_key) == 1: | ||
ref_name, var_name = key, None | ||
else: | ||
raise KeyError(key) | ||
|
||
ref_name, var_name = split_key | ||
ref_var = variables[ref_name] | ||
if ref_var.ndim == 1: | ||
date = ref_var.to_index() | ||
elif ref_var.ndim == 0: | ||
date = pd.Timestamp(ref_var.values) | ||
if ref_name in level_vars: | ||
dim_var = variables[level_vars[ref_name]] | ||
ref_var = dim_var.to_index_variable().get_level_variable(ref_name) | ||
else: | ||
raise KeyError(key) | ||
ref_var = variables[ref_name] | ||
|
||
if var_name == 'season': | ||
# TODO: move 'season' into pandas itself | ||
seasons = np.array(['DJF', 'MAM', 'JJA', 'SON']) | ||
month = date.month | ||
data = seasons[(month // 3) % 4] | ||
if var_name is None: | ||
virtual_var = ref_var | ||
var_name = key | ||
else: | ||
data = getattr(date, var_name) | ||
return ref_name, var_name, Variable(ref_var.dims, data) | ||
if ref_var.ndim == 1: | ||
date = ref_var.to_index() | ||
elif ref_var.ndim == 0: | ||
date = pd.Timestamp(ref_var.values) | ||
else: | ||
raise KeyError(key) | ||
|
||
if var_name == 'season': | ||
# TODO: move 'season' into pandas itself | ||
seasons = np.array(['DJF', 'MAM', 'JJA', 'SON']) | ||
month = date.month | ||
data = seasons[(month // 3) % 4] | ||
else: | ||
data = getattr(date, var_name) | ||
virtual_var = Variable(ref_var.dims, data) | ||
|
||
return ref_name, var_name, virtual_var | ||
|
||
|
||
def calculate_dimensions(variables): | ||
|
@@ -424,6 +438,21 @@ def _subset_with_all_valid_coords(self, variables, coord_names, attrs): | |
|
||
return self._construct_direct(variables, coord_names, dims, attrs) | ||
|
||
@property | ||
def _level_coords(self): | ||
"""Return a mapping of all MultiIndex levels and their corresponding | ||
coordinate name. | ||
""" | ||
level_coords = OrderedDict() | ||
for cname in self._coord_names: | ||
var = self.variables[cname] | ||
if var.ndim == 1: | ||
level_names = var.to_index_variable().level_names | ||
if level_names is not None: | ||
dim = var.dims[0] | ||
level_coords.update({lname: dim for lname in level_names}) | ||
return level_coords | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am I missing something here? Wouldn't this also work without the for name in self._coord_names:
var = self.variables[name]
if name == var.dims[0]:
level_coords.update(var.to_coord().get_level_coords())
return level_coords |
||
|
||
def _copy_listed(self, names): | ||
"""Create a new Dataset with the listed variables from this dataset and | ||
the all relevant coordinates. Skips all validation. | ||
|
@@ -436,7 +465,7 @@ def _copy_listed(self, names): | |
variables[name] = self._variables[name] | ||
except KeyError: | ||
ref_name, var_name, var = _get_virtual_variable( | ||
self._variables, name) | ||
self._variables, name, self._level_coords) | ||
variables[var_name] = var | ||
if ref_name in self._coord_names: | ||
coord_names.add(var_name) | ||
|
@@ -452,7 +481,8 @@ def _construct_dataarray(self, name): | |
try: | ||
variable = self._variables[name] | ||
except KeyError: | ||
_, name, variable = _get_virtual_variable(self._variables, name) | ||
_, name, variable = _get_virtual_variable( | ||
self._variables, name, self._level_coords) | ||
|
||
coords = OrderedDict() | ||
needed_dims = set(variable.dims) | ||
|
@@ -521,6 +551,12 @@ def __setitem__(self, key, value): | |
if utils.is_dict_like(key): | ||
raise NotImplementedError('cannot yet use a dictionary as a key ' | ||
'to set Dataset values') | ||
|
||
if key in self._level_coords: | ||
raise ValueError("%r is already a MultiIndex level of " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is already checked by But this is perhaps worth keeping anyways for the better error message? If so, please add a comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just checked and confirm that this is already checked by I'd be OK to remove this. Maybe it is a good thing not to have too many different messages for similar errors... |
||
"coordinate %r" | ||
% (key, self._level_coords[key])) | ||
|
||
self.update({key: value}) | ||
|
||
def __delitem__(self, key): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just define this method once on
AbstractCoordinates
instead of repeating it twiceThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, I think these should also be caught by the checks in
merge.py
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep! So I can remove this.