Skip to content

Commit

Permalink
Saver supports split-attributes saving (no tests yet).
Browse files Browse the repository at this point in the history
  • Loading branch information
pp-mo committed Aug 2, 2023
1 parent 8ddc88d commit 41e5523
Showing 1 changed file with 155 additions and 41 deletions.
196 changes: 155 additions & 41 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ def write(
An interable of cube attribute keys. Any cube attributes with
matching keys will become attributes on the data variable rather
than global attributes.
.. note:
Has no effect if :attr:`iris.FUTURE.save_split_attrs` is ``True``.
* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
Expand Down Expand Up @@ -709,20 +711,27 @@ def write(
# aux factory in the cube.
self._add_aux_factories(cube, cf_var_cube, cube_dimensions)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
global_attributes = {
k: v
for k, v in cube.attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)
if not iris.FUTURE.save_split_attrs:
# In the "old" way, we update global attributes as we go.
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
cube_attributes = cube.attributes
if iris.FUTURE.save_split_attrs:
# In this case, do *not* promote any 'local' attributes to global ones,
# only "global" cube attrs may be written as global file attributes
cube_attributes = cube_attributes.globals
global_attributes = {
k: v
for k, v in cube_attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)

if cf_profile_available:
cf_patch = iris.site_configuration.get("cf_patch")
Expand Down Expand Up @@ -778,6 +787,9 @@ def update_global_attributes(self, attributes=None, **kwargs):
CF global attributes to be updated.
"""
# TODO: when we no longer support combined attribute saving, this routine will
# only be called once: it can reasonably be renamed "_set_global_attributes",
# and the 'kwargs' argument can be removed.
if attributes is not None:
# Handle sequence e.g. [('fruit', 'apple'), ...].
if not hasattr(attributes, "keys"):
Expand Down Expand Up @@ -2219,6 +2231,8 @@ def _create_cf_data_variable(
The newly created CF-netCDF data variable.
"""
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.
# Get the values in a form which is valid for the file format.
data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)

Expand Down Expand Up @@ -2307,16 +2321,20 @@ def set_packing_ncattrs(cfvar):
if cube.units.calendar:
_setncattr(cf_var, "calendar", cube.units.calendar)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
if iris.FUTURE.save_split_attrs:
attr_names = cube.attributes.locals.keys()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)
for attr_name in sorted(attr_names):
# Do not output 'conventions' attribute.
if attr_name.lower() == "conventions":
Expand Down Expand Up @@ -2781,26 +2799,117 @@ def save(
else:
cubes = cube

if local_keys is None:
if iris.FUTURE.save_split_attrs:
# We don't actually use 'local_keys' in this case.
# TODO: can remove this when the iris.FUTURE.save_split_attrs is removed.
local_keys = set()

# Find any collisions in the cube global attributes and "demote" all those to
# local attributes (where possible, else warn they are lost).
# N.B. "collision" includes when not all cubes *have* that attribute.
global_names = set()
for cube in cubes:
global_names |= set(cube.attributes.globals.keys())

# Fnd any global attributes which are not the same on *all* cubes.
def attr_values_equal(val1, val2):
# An equality test which also works when some values are numpy arrays (!)
# As done in :meth:`iris.common.mixin.LimitedAttributeDict.__eq__`.
match = val1 == val2
try:
match = bool(match)
except ValueError:
match = match.all()
return match

cube0 = cubes[0]
invalid_globals = [
attrname
for attrname in global_names
if not all(
attr_values_equal(
cube.attributes[attrname], cube0.attributes[attrname]
)
for cube in cubes[1:]
)
]

# Establish all the global attributes which we will write to the file (at end).
global_attributes = {
attr: cube0.attributes.globals[attr]
for attr in global_names
if attr not in invalid_globals
}
if invalid_globals:
# Some cubes have different global attributes: modify cubes as required.
warnings.warn(
f"Saving the cube global attributes {invalid_globals} as local"
"(i.e. data-variable) attributes, where possible, since they are not '"
"the same on all input cubes."
)
cubes = list(cubes) # avoiding modifying the actual input arg.
for i_cube in range(len(cubes)):
# We iterate over cube *index*, so we can replace the list entries with
# with cube *copies* -- just to avoid changing our call args.
cube = cubes[i_cube]
demote_attrs = [
attr
for attr in cube.attributes.globals
if attr in invalid_globals
]
if any(demote_attrs):
# This cube contains some 'demoted' global attributes.
# Replace the input cube with a copy, so we can modify attributes.
cube = cube.copy()
cubes[i_cube] = cube
# Catch any demoted attrs where there is already a local version
blocked_attrs = [
attrname
for attrname in demote_attrs
if attrname in cube.attributes.locals
]
if blocked_attrs:
warnings.warn(
f"Global cube attributes {blocked_attrs} "
f'of cube "{cube.name()}" have been lost, overlaid '
"by existing local attributes with the same names."
)
for attr in demote_attrs:
if attr not in blocked_attrs:
cube.attributes.locals[
attr
] = cube.attributes.globals[attr]
cube.attributes.globals.pop(attr)

else:
local_keys = set(local_keys)

# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
attributes = cubes[0].attributes
common_keys = set(attributes)
for cube in cubes[1:]:
keys = set(cube.attributes)
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(attributes[key] != cube.attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)
# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
# NOTE: in 'legacy' mode, this code derives a common value for 'local_keys', which
# is employed in saving each cube.
# However, in `split_attrs` mode, this considers ONLY global attributes, and the
# resulting 'common_keys' is the fixed result : each cube is then saved like ...
# "sman.write(... localkeys=list(cube.attributes) - common_keys, ...)"
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)

common_attr_values = None
for cube in cubes:
cube_attributes = cube.attributes
keys = set(cube_attributes)
if common_attr_values is None:
common_attr_values = cube_attributes.copy()
common_keys = keys.copy()
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(common_attr_values[key] != cube_attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)

def is_valid_packspec(p):
"""Only checks that the datatype is valid."""
Expand Down Expand Up @@ -2902,7 +3011,12 @@ def is_valid_packspec(p):
warnings.warn(msg)

# Add conventions attribute.
sman.update_global_attributes(Conventions=conventions)
if iris.FUTURE.save_split_attrs:
# In the "new way", we just create all the global attributes at once.
global_attributes["Conventions"] = conventions
sman.update_global_attributes(global_attributes)
else:
sman.update_global_attributes(Conventions=conventions)

if compute:
# No more to do, since we used Saver(compute=True).
Expand Down

0 comments on commit 41e5523

Please sign in to comment.