diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dc3e7c4..67db120c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ Keep it human-readable, your future self will thank you! ## [Unreleased](https://github.com/ecmwf/anemoi-datasets/compare/0.5.0...HEAD) +### Added + +- Adding the user recipe in the dataset PR #59. + ### Changed - Bug fix in create/rename diff --git a/src/anemoi/datasets/commands/publish.py b/src/anemoi/datasets/commands/publish.py new file mode 100644 index 00000000..ff407c64 --- /dev/null +++ b/src/anemoi/datasets/commands/publish.py @@ -0,0 +1,30 @@ +import logging + +from . import Command + +LOG = logging.getLogger(__name__) + + +class Publish(Command): + """Publish a dataset.""" + + # This is a command that is used to publish a dataset. + # it is a class, inheriting from Command. + + internal = True + timestamp = True + + def add_arguments(self, parser): + parser.add_argument("path", help="Path of the dataset to publish.") + + def run(self, args): + try: + from anemoi.registry import publish_dataset + except ImportError: + LOG.error("anemoi-registry is not installed. Please install it to use this command.") + return + + publish_dataset(args.path) + + +command = Publish diff --git a/src/anemoi/datasets/create/__init__.py b/src/anemoi/datasets/create/__init__.py index 461aad73..d1026de3 100644 --- a/src/anemoi/datasets/create/__init__.py +++ b/src/anemoi/datasets/create/__init__.py @@ -14,6 +14,7 @@ import time import uuid import warnings +from copy import deepcopy from functools import cached_property import numpy as np @@ -323,6 +324,43 @@ def build_input_(main_config, output_config): return builder +def tidy_recipe(config: object): + """Remove potentially private information in the config""" + config = deepcopy(config) + if isinstance(config, (tuple, list)): + return [tidy_recipe(_) for _ in config] + if isinstance(config, (dict, DotDict)): + for k, v in config.items(): + if k.startswith("_"): + config[k] = "*** REMOVED FOR SECURITY ***" + else: + config[k] = tidy_recipe(v) + if isinstance(config, str): + if config.startswith("_"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("s3://"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("gs://"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("http"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("ftp"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("file"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("ssh"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("scp"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("rsync"): + return "*** REMOVED FOR SECURITY ***" + if config.startswith("/"): + return "*** REMOVED FOR SECURITY ***" + if "@" in config: + return "*** REMOVED FOR SECURITY ***" + return config + + class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin): dataset_class = NewDataset def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip @@ -409,6 +447,7 @@ def _run(self): metadata.update(self.main_config.get("add_metadata", {})) metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict() + metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict()) metadata["description"] = self.main_config.description metadata["licence"] = self.main_config["licence"]