Skip to content

Commit

Permalink
publish dataset (#59)
Browse files Browse the repository at this point in the history
* added "anemoi-datasets publish"

* save user recipe

* changelog

* fix serialisation error

---------

Co-authored-by: b8raoult <53792887+b8raoult@users.noreply.github.com>
  • Loading branch information
floriankrb and b8raoult authored Oct 1, 2024
1 parent 3130d73 commit 9049023
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ Keep it human-readable, your future self will thank you!

## [Unreleased](https://github.com/ecmwf/anemoi-datasets/compare/0.5.0...HEAD)

### Added

- Adding the user recipe in the dataset PR #59.

### Changed

- Bug fix in create/rename
Expand Down
30 changes: 30 additions & 0 deletions src/anemoi/datasets/commands/publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging

from . import Command

LOG = logging.getLogger(__name__)


class Publish(Command):
"""Publish a dataset."""

# This is a command that is used to publish a dataset.
# it is a class, inheriting from Command.

internal = True
timestamp = True

def add_arguments(self, parser):
parser.add_argument("path", help="Path of the dataset to publish.")

def run(self, args):
try:
from anemoi.registry import publish_dataset
except ImportError:
LOG.error("anemoi-registry is not installed. Please install it to use this command.")
return

publish_dataset(args.path)


command = Publish
39 changes: 39 additions & 0 deletions src/anemoi/datasets/create/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import time
import uuid
import warnings
from copy import deepcopy
from functools import cached_property

import numpy as np
Expand Down Expand Up @@ -323,6 +324,43 @@ def build_input_(main_config, output_config):
return builder


def tidy_recipe(config: object):
"""Remove potentially private information in the config"""
config = deepcopy(config)
if isinstance(config, (tuple, list)):
return [tidy_recipe(_) for _ in config]
if isinstance(config, (dict, DotDict)):
for k, v in config.items():
if k.startswith("_"):
config[k] = "*** REMOVED FOR SECURITY ***"
else:
config[k] = tidy_recipe(v)
if isinstance(config, str):
if config.startswith("_"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("s3://"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("gs://"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("http"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("ftp"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("file"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("ssh"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("scp"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("rsync"):
return "*** REMOVED FOR SECURITY ***"
if config.startswith("/"):
return "*** REMOVED FOR SECURITY ***"
if "@" in config:
return "*** REMOVED FOR SECURITY ***"
return config


class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
dataset_class = NewDataset
def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
Expand Down Expand Up @@ -409,6 +447,7 @@ def _run(self):
metadata.update(self.main_config.get("add_metadata", {}))

metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())

metadata["description"] = self.main_config.description
metadata["licence"] = self.main_config["licence"]
Expand Down

0 comments on commit 9049023

Please sign in to comment.