Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/multi dates match #64

Merged
merged 16 commits into from
Oct 3, 2024
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
hooks:
- id: ruff
# Next line if for documenation cod snippets
exclude: '^[^_].*_\.py$'
exclude: '.*/[^_].*_\.py$'
args:
- --line-length=120
- --fix
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Keep it human-readable, your future self will thank you!
### Added

- Adding the user recipe in the dataset PR #59.
- Add `multi_dates_match` action in create.

### Changed

Expand Down
1 change: 1 addition & 0 deletions docs/building/sources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The following `sources` are currently available:
sources/netcdf
sources/opendap
sources/recentre
sources/repeated_dates
sources/xarray-kerchunk
sources/xarray-zarr
sources/zenodo
25 changes: 25 additions & 0 deletions docs/building/sources/repeated_dates.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
################
repeated_dates
################

The generale format of the `repeated_dates` source is:

.. literalinclude:: yaml/repeated_dates1.yaml

**********
constant
**********

.. literalinclude:: yaml/repeated_dates2.yaml

*************
climatology
*************

.. literalinclude:: yaml/repeated_dates3.yaml

*********
closest
*********

.. literalinclude:: yaml/repeated_dates4.yaml
6 changes: 6 additions & 0 deletions docs/building/sources/yaml/repeated_dates1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

repeated_dates:
mode: mode
# ... parameters related to the mode ...
source:
# ... a source definition ...
6 changes: 6 additions & 0 deletions docs/building/sources/yaml/repeated_dates2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
repeated_dates:
mode: constant
source:
xarray-zarr:
url: dem.zarr
variable: dem
8 changes: 8 additions & 0 deletions docs/building/sources/yaml/repeated_dates3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
repeated_dates:
mode: climatology
year: 2019
day: 15
source:
grib:
path: some/path/to/data.grib
param: [some_param]
9 changes: 9 additions & 0 deletions docs/building/sources/yaml/repeated_dates4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
repeated_dates:
mode: closest
frequency: 24h
maximum: 30d
skip_all_nans: true
source:
grib:
path: path/to/data.grib
param: [some_param]
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
.. _index-page:

####################################
Welcome to Anemoi's documentation!
####################################
#############################################
Welcome to `anemoi-datasets` documentation!
#############################################

.. warning::

Expand Down
6 changes: 3 additions & 3 deletions src/anemoi/datasets/create/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
from anemoi.utils.dates import frequency_to_timedelta
from anemoi.utils.humanize import compress_dates
from anemoi.utils.humanize import seconds_to_human
from earthkit.data.core.order import build_remapping

from anemoi.datasets import MissingDateError
from anemoi.datasets import open_dataset
from anemoi.datasets.create.input.trace import enable_trace
from anemoi.datasets.create.persistent import build_storage
from anemoi.datasets.data.misc import as_first_date
from anemoi.datasets.data.misc import as_last_date
Expand Down Expand Up @@ -309,7 +311,6 @@ def create_elements(self, config):


def build_input_(main_config, output_config):
from earthkit.data.core.order import build_remapping

builder = build_input(
main_config.input,
Expand Down Expand Up @@ -563,7 +564,7 @@ def _run(self):
# assert isinstance(group[0], datetime.datetime), type(group[0])
LOG.debug(f"Building data for group {igroup}/{self.n_groups}")

result = self.input.select(dates=group)
result = self.input.select(group_of_dates=group)
assert result.group_of_dates == group, (len(result.group_of_dates), len(group), group)

# There are several groups.
Expand Down Expand Up @@ -1031,7 +1032,6 @@ def run(self):

def creator_factory(name, trace=None, **kwargs):
if trace:
from anemoi.datasets.create.trace import enable_trace

enable_trace(trace)

Expand Down
6 changes: 6 additions & 0 deletions src/anemoi/datasets/create/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,15 @@ class StatisticsValueError(ValueError):

def check_data_values(arr, *, name: str, log=[], allow_nans=False):

shape = arr.shape

if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
arr = arr[~np.isnan(arr)]

if arr.size == 0:
warnings.warn(f"Empty array for {name} ({shape})")
return

assert arr.size > 0, (name, *log)

min, max = arr.min(), arr.max()
Expand Down
8 changes: 7 additions & 1 deletion src/anemoi/datasets/create/functions/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@


def _expand(paths):

if not isinstance(paths, list):
paths = [paths]

for path in paths:
if path.startswith("file://"):
path = path[7:]
Expand All @@ -40,8 +44,10 @@ def iterate_patterns(path, dates, **kwargs):
given_paths = path if isinstance(path, list) else [path]

dates = [d.isoformat() for d in dates]
if len(dates) > 0:
kwargs["date"] = dates

for path in given_paths:
paths = Pattern(path, ignore_missing_keys=True).substitute(date=dates, **kwargs)
paths = Pattern(path, ignore_missing_keys=True).substitute(**kwargs)
for path in _expand(paths):
yield path, dates
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ def accumulations(context, dates, **request):
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
("rr", "oper"): dict(data_accumulation_period=3, base_times=(0, 3, 6, 9, 12, 15, 18, 21)),
("l5", "oper"): dict(data_accumulation_period=1, base_times=(0,)),
}

kwargs = KWARGS.get((class_, stream), {})
Expand Down
2 changes: 1 addition & 1 deletion src/anemoi/datasets/create/functions/sources/grib.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def execute(context, dates, path, latitudes=None, longitudes=None, *args, **kwar
s = s.sel(valid_datetime=dates, **kwargs)
ds = ds + s

if kwargs:
if kwargs and not context.partial_ok:
check(ds, given_paths, valid_datetime=dates, **kwargs)

if geography is not None:
Expand Down
13 changes: 4 additions & 9 deletions src/anemoi/datasets/create/functions/sources/xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
import logging

from earthkit.data.core.fieldlist import MultiFieldList
from earthkit.data.indexing.fieldlist import FieldArray

from anemoi.datasets.data.stores import name_to_zarr_store
from anemoi.datasets.utils.fields import NewMetadataField
from anemoi.datasets.utils.fields import NewMetadataField as NewMetadataField

from .. import iterate_patterns
from .fieldlist import XarrayFieldList
Expand All @@ -31,7 +30,7 @@ def check(what, ds, paths, **kwargs):
raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")


def load_one(emoji, context, dates, dataset, options={}, match_all_dates=False, flavour=None, **kwargs):
def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs):
import xarray as xr

"""
Expand All @@ -52,12 +51,8 @@ def load_one(emoji, context, dates, dataset, options={}, match_all_dates=False,

fs = XarrayFieldList.from_xarray(data, flavour)

if match_all_dates:
match = fs.sel(**kwargs)
result = []
for date in dates:
result.append(FieldArray([NewMetadataField(f, valid_datetime=date) for f in match]))
result = MultiFieldList(result)
if len(dates) == 0:
return fs.sel(**kwargs)
else:
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])

Expand Down
Loading
Loading