Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract forecast reference time #118

Merged
merged 19 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
2a02719
Move fix_forecast_reference_time() to function.
truth-quark Sep 27, 2024
8b26728
Add forecast reference time constants.
truth-quark Sep 27, 2024
a874f0e
Modify DummyCube to allow coordinate updates post construction.
truth-quark Sep 27, 2024
e93b1e0
Add initial tests for forecast reference time fixes.
truth-quark Sep 27, 2024
48d383c
Move convert_proleptic() to gather time functionality.
truth-quark Sep 27, 2024
9215c89
Add basic reference time assertions.
truth-quark Sep 27, 2024
2240635
Update reference time test fixtures.
truth-quark Sep 27, 2024
5a69457
Update reference time constants.
truth-quark Sep 27, 2024
fd664c7
Add skeleton of forecast reference time testing.
truth-quark Sep 27, 2024
d711729
Improve forecast reference time test fixtures & comments.
truth-quark Oct 2, 2024
7ee59a2
Remove duplicate calendar constants & use cf_units.
truth-quark Oct 2, 2024
16a3768
Use cf_units calendar constants.
truth-quark Oct 2, 2024
2eaa205
Fix conflicts & merge.
truth-quark Oct 9, 2024
b3c0cf1
Add try/except refactoring comments from PR ideas.
truth-quark Oct 10, 2024
6e8b5e2
Add comments for forecast reference time fixes.
truth-quark Oct 10, 2024
0925960
Add forecast ref time fixture & use in test.
truth-quark Oct 10, 2024
43e3ba1
Flag tasks for forecast ref time fixes.
truth-quark Oct 10, 2024
d3164e3
Refactor DummyCube to delineate between the iris API & auxiliary help…
truth-quark Oct 10, 2024
3820528
Flag test fixtures for repair in future work.
truth-quark Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions test/test_um2netcdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import unittest.mock as mock
import warnings
from dataclasses import dataclass
from collections import namedtuple

import cf_units
from iris.exceptions import CoordinateNotFoundError
import operator

Expand Down Expand Up @@ -79,6 +82,7 @@ class DummyStash:
item: int


# TODO: would making this a dataclass provide any benefit?
class DummyCube:
"""
Imitation iris Cube for unit testing.
Expand Down Expand Up @@ -116,6 +120,20 @@ def coord(self, _name):
msg = f"{self.__class__}[{self.var_name}]: lacks coord for '{_name}'"
raise CoordinateNotFoundError(msg)

def remove_coord(self, coord):
del self._coordinates[coord]

# Auxiliary methods
# These methods DO NOT exist in the iris cube API, these are helper methods
# to configure DummyCubes for testing.
#
# All methods should see an `aux_` prefix

def aux_update_coords(self, coords):
# Mimic a coordinate dictionary keys for iris coordinate names. This
blimlim marked this conversation as resolved.
Show resolved Hide resolved
# ensures the access key for coord() matches the coordinate's name
self._coordinates = {c.name(): c for c in coords} if coords else {}


# NB: these cube fixtures have been chosen to mimic cubes for testing key parts
# of the process() workflow. Some cubes require pressure level masking with the
Expand Down Expand Up @@ -1107,3 +1125,109 @@ def test_convert_32_bit_with_float64(ua_plev_cube):
ua_plev_cube.data = array
um2nc.convert_32_bit(ua_plev_cube)
assert ua_plev_cube.data.dtype == np.float32


# fix forecast reference time tests
@pytest.fixture
def forecast_cube():
# NB: using a non-existent item code for fake forecast cube
return DummyCube(item_code=999)


@pytest.fixture
def time_points():
"""Use for cube.coord('time').points attribute."""
return [-16382964.]


@pytest.fixture
def forecast_ref_time_points():
"""Use for cube.coord('forecast_reference_time').points attribute."""
return [-16383336.]


# FIXME: unit.calendar needs updating as per:
# https://github.com/ACCESS-NRI/um2nc-standalone/pull/118#issuecomment-2404034473
@pytest.fixture
def forecast_ref_time_coord(forecast_ref_time_points):
# units & point data ripped from aiihca.paa1jan data file:
truth-quark marked this conversation as resolved.
Show resolved Hide resolved
# cubes = iris.load("aiihca.paa1jan")
# cubes[0].long_name --> 'atmosphere_optical_thickness_due_to_sulphate_ambient_aerosol'
# cubes[0].coord("time").points --> array([-16382964.])
unit = cf_units.Unit(unit="hours since 1970-01-01 00:00:00")
assert unit.calendar == cf_units.CALENDAR_STANDARD
blimlim marked this conversation as resolved.
Show resolved Hide resolved

return iris.coords.DimCoord(forecast_ref_time_points,
standard_name=um2nc.FORECAST_REFERENCE_TIME,
units=unit)


# FIXME: unit.calendar needs updating as per:
# https://github.com/ACCESS-NRI/um2nc-standalone/pull/118#issuecomment-2404034473
@pytest.fixture
def time_coord(time_points):
# units data ripped from aiihca data file
unit = cf_units.Unit(unit="hours since 1970-01-01 00:00:00",
calendar=cf_units.CALENDAR_GREGORIAN)
assert unit.calendar == cf_units.CALENDAR_STANDARD

return iris.coords.DimCoord(time_points,
standard_name=um2nc.TIME,
units=unit)


def test_fix_forecast_reference_time_exit_on_missing_ref_time(forecast_cube):
# verify fix_forecast_ref_time() exits early if the coord is missing
with pytest.raises(iris.exceptions.CoordinateNotFoundError):
forecast_cube.coord(um2nc.FORECAST_REFERENCE_TIME)

# TODO: fails to assert the cube is unmodified
# TODO: is this test even needed?
assert um2nc.fix_forecast_reference_time(forecast_cube) is None
truth-quark marked this conversation as resolved.
Show resolved Hide resolved


def test_fix_forecast_reference_time_exit_on_missing_time(forecast_cube,
forecast_ref_time_coord):
truth-quark marked this conversation as resolved.
Show resolved Hide resolved
# verify fix_forecast_ref_time() exits early if the coord is missing
forecast_cube.aux_update_coords([forecast_ref_time_coord])

with pytest.raises(iris.exceptions.CoordinateNotFoundError):
forecast_cube.coord(um2nc.TIME)

# TODO: fails to assert the cube is unmodified
# TODO: is this test even needed?
assert um2nc.fix_forecast_reference_time(forecast_cube) is None


def test_fix_forecast_reference_time_standard(forecast_cube,
forecast_ref_time_coord,
time_coord):
# TODO: executes part of the ref time fix code
# TODO: needs to assert the changes!
forecast_period = iris.coords.DimCoord([372.0],
standard_name=um2nc.FORECAST_PERIOD)

forecast_cube.aux_update_coords([forecast_ref_time_coord,
time_coord,
forecast_period])

assert um2nc.fix_forecast_reference_time(forecast_cube) is None

# TODO: add assertions here
warnings.warn("test_fix_forecast_reference_time_standard asserts nothing")


@pytest.mark.skip
def test_fix_forecast_reference_time_gregorian(forecast_cube,
forecast_ref_time_coord,
time_coord):
msg = "Is time.units.calendar == 'gregorian' branch & testing required?"
raise NotImplementedError(msg)


@pytest.mark.skip
def test_fix_forecast_reference_time_proleptic_gregorian(forecast_cube,
forecast_ref_time_coord,
time_coord):
msg = "Is time.units.calendar == 'proleptic_gregorian' branch & testing required?"
raise NotImplementedError(msg)
145 changes: 90 additions & 55 deletions umpost/um2netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@
SIGMA_THETA = "sigma_theta"
SIGMA_RHO = "sigma_rho"

# forecast reference time constants
FORECAST_REFERENCE_TIME = "forecast_reference_time"
FORECAST_PERIOD = "forecast_period"
TIME = "time"


blimlim marked this conversation as resolved.
Show resolved Hide resolved
class PostProcessingError(Exception):
"""Generic class for um2nc specific errors."""
Expand Down Expand Up @@ -110,34 +115,6 @@ def pg_calendar(self):
PPField.calendar = pg_calendar


# TODO: rename time to avoid clash with builtin time module
def convert_proleptic(time):
# Convert units from hours to days and shift origin from 1970 to 0001
newunits = cf_units.Unit("days since 0001-01-01 00:00", calendar='proleptic_gregorian')
tvals = np.array(time.points) # Need a copy because can't assign to time.points[i]
tbnds = np.array(time.bounds) if time.bounds is not None else None

for i in range(len(time.points)):
date = time.units.num2date(tvals[i])
newdate = cftime.DatetimeProlepticGregorian(date.year, date.month, date.day,
date.hour, date.minute, date.second)
tvals[i] = newunits.date2num(newdate)

if tbnds is not None: # Fields with instantaneous data don't have bounds
for j in range(2):
date = time.units.num2date(tbnds[i][j])
newdate = cftime.DatetimeProlepticGregorian(date.year, date.month, date.day,
date.hour, date.minute, date.second)
tbnds[i][j] = newunits.date2num(newdate)

time.points = tvals

if tbnds is not None:
time.bounds = tbnds

time.units = newunits


def fix_lat_coord_name(lat_coordinate, grid_type, dlat):
"""
Add a 'var_name' attribute to a latitude coordinate object
Expand Down Expand Up @@ -346,33 +323,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):

cube.attributes['missing_value'] = np.array([fill_value], cube.data.dtype)

# If reference date is before 1600 use proleptic gregorian
# calendar and change units from hours to days
try:
reftime = cube.coord('forecast_reference_time')
time = cube.coord('time')
refdate = reftime.units.num2date(reftime.points[0])
assert time.units.origin == 'hours since 1970-01-01 00:00:00'

if time.units.calendar == 'proleptic_gregorian' and refdate.year < 1600:
convert_proleptic(time)
else:
if time.units.calendar == 'gregorian':
new_calendar = 'proleptic_gregorian'
else:
new_calendar = time.units.calendar

time.units = cf_units.Unit("days since 1970-01-01 00:00", calendar=new_calendar)
time.points = time.points/24.

if time.bounds is not None:
time.bounds = time.bounds/24.

cube.remove_coord('forecast_period')
cube.remove_coord('forecast_reference_time')
except iris.exceptions.CoordinateNotFoundError:
# Dump files don't have forecast_reference_time
pass
fix_forecast_reference_time(cube)

# Check whether any of the coordinates is a pseudo-dimension with integer values and
# if so, reset to int32 to prevent problems with possible later conversion to netCDF3
Expand Down Expand Up @@ -414,6 +365,90 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
sman.write(cube, zlib=True, complevel=compression, fill_value=fill_value)


aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
# TODO: this review https://github.com/ACCESS-NRI/um2nc-standalone/pull/118
# indicates these time functions can be simplified. Suggestion: modularise
# functionality to simplify logic:
# * Extract origin time shift to a function?
# * Extract hours to days conversion to a function?
def fix_forecast_reference_time(cube):
# If reference date is before 1600 use proleptic gregorian
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
# calendar and change units from hours to days

# TODO: constrain the exception handler to the first 2 coord lookups?
# TODO: detect dump files & exit early (look up all coords early)
try:
reftime = cube.coord(FORECAST_REFERENCE_TIME)
time = cube.coord(TIME)
refdate = reftime.units.num2date(reftime.points[0])

# TODO: replace with `if` check to prevent assert vanishing in optimised Python mode
assert time.units.origin == 'hours since 1970-01-01 00:00:00'

# TODO: add reference year as a configurable arg?
# TODO: determine if the start year should be changed from 1600
# see https://github.com/ACCESS-NRI/um2nc-standalone/pull/118/files#r1792886613
if time.units.calendar == cf_units.CALENDAR_PROLEPTIC_GREGORIAN and refdate.year < 1600:
convert_proleptic(time)
else:
if time.units.calendar == cf_units.CALENDAR_GREGORIAN:
new_calendar = cf_units.CALENDAR_PROLEPTIC_GREGORIAN
else:
blimlim marked this conversation as resolved.
Show resolved Hide resolved
new_calendar = time.units.calendar

time.units = cf_units.Unit("days since 1970-01-01 00:00", calendar=new_calendar)
time.points = time.points / 24.

if time.bounds is not None:
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
time.bounds = time.bounds / 24.

# TODO: remove_coord() calls the coord() lookup, which raises
# CoordinateNotFoundError if the forecast period is missing, this
# ties remove_coords() to the exception handler below
#
# TODO: if removing the forecast period fails, forecast reference time is
# NOT removed. What is the desired behaviour?
cube.remove_coord(FORECAST_PERIOD)
aidanheerdegen marked this conversation as resolved.
Show resolved Hide resolved
cube.remove_coord(FORECAST_REFERENCE_TIME)
except iris.exceptions.CoordinateNotFoundError:
# Dump files don't have forecast_reference_time
pass


# TODO: rename time arg to pre-emptively avoid a clash with builtin time module
# TODO: refactor to add a calendar arg here, see
# https://github.com/ACCESS-NRI/um2nc-standalone/pull/118/files#r1794321677
def convert_proleptic(time):
# Convert units from hours to days and shift origin from 1970 to 0001
newunits = cf_units.Unit("days since 0001-01-01 00:00",
calendar=cf_units.CALENDAR_PROLEPTIC_GREGORIAN)
tvals = np.array(time.points) # Need a copy because can't assign to time.points[i]
tbnds = np.array(time.bounds) if time.bounds is not None else None

# TODO: refactor manual looping with pythonic iteration
#
# TODO: limit the looping as per this review suggestion:
# https://github.com/ACCESS-NRI/um2nc-standalone/pull/118/files#r1794315128
for i in range(len(time.points)):
date = time.units.num2date(tvals[i])
newdate = cftime.DatetimeProlepticGregorian(date.year, date.month, date.day,
date.hour, date.minute, date.second)
tvals[i] = newunits.date2num(newdate)

if tbnds is not None: # Fields with instantaneous data don't have bounds
for j in range(2):
date = time.units.num2date(tbnds[i][j])
newdate = cftime.DatetimeProlepticGregorian(date.year, date.month, date.day,
date.hour, date.minute, date.second)
tbnds[i][j] = newunits.date2num(newdate)

time.points = tvals

if tbnds is not None:
time.bounds = tbnds

time.units = newunits


def fix_cell_methods(cell_methods):
"""
Removes misleading 'hour' from interval naming, leaving other names intact.
Expand Down
Loading