diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index b3e188c162..e1726d472b 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -248,6 +248,20 @@ available tables of the specified project. a given dataset) fx files are found in more than one table, ``mip`` needs to be specified, otherwise an error is raised. +Additionally, it is possible to search across all ensembles and experiments (or +any other keys) when specifying the fx variable, by using the ``*`` character, +which is useful for some projects where the location of the fx files is not +consistent. +This makes it possible to search for fx files under multiple ensemble members +or experiments, for example, ``ensemble: '*'`` or ``exp: '*'``. +Note that the ``*`` character must be quoted since ``*`` is a special charcter +in YAML. +This functionality is only supported for time-invariant fx variables (i.e. +frequency ``fx``). +Note also that if multiple directories of matching fx files are found, +ESMValTool will prioritize the ensemble ``r0i0p0`` (if it exists), otherwise +the first directory that is found will be used. + .. note:: To explicitly **not** use any fx variables in a preprocessor, use ``fx_variables: null``. While some of the preprocessors mentioned above do diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index ee9b790571..f2b5e983f1 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -119,7 +119,7 @@ def get_start_end_date(filename): if start_date is None or end_date is None: raise ValueError(f'File {filename} dates do not match a recognized' - 'pattern and time can not be read from the file') + f'pattern and time can not be read from the file') return start_date, end_date @@ -183,7 +183,7 @@ def get_start_end_year(filename): if start_year is None or end_year is None: raise ValueError(f'File {filename} dates do not match a recognized' - 'pattern and time can not be read from the file') + f'pattern and time can not be read from the file') return int(start_year), int(end_year) @@ -364,6 +364,52 @@ def _resolve_latestversion(dirname_template): return None +def _resolve_wildcards_and_version(dirname, basepath, project, drs): + """Resolve wildcards and latestversion tag.""" + if "{latestversion}" in dirname: + dirname_version_wildcard = dirname.replace('{latestversion}', '*') + + # Find all directories that match the template + all_dirs = sorted(glob.glob(dirname_version_wildcard)) + + # Sort directories by version + all_dirs_dict = {} + for directory in all_dirs: + version = dir_to_var( + directory, basepath, project, drs)['latestversion'] + all_dirs_dict.setdefault(version, []) + all_dirs_dict[version].append(directory) + + # Select latest version + if not all_dirs_dict: + dirnames = [] + elif 'latest' in all_dirs_dict: + dirnames = all_dirs_dict['latest'] + else: + all_versions = sorted(list(all_dirs_dict)) + dirnames = all_dirs_dict[all_versions[-1]] + + # No {latestversion} tag + else: + dirnames = sorted(glob.glob(dirname)) + + # No directories found + if not dirnames: + logger.debug("Unable to resolve %s", dirname) + return dirname + + # Exactly one directory found + if len(dirnames) == 1: + return dirnames[0] + + # Warn if multiple directories have been found and prioritize r0i0p0 + logger.warning("Multiple directories for fx variables found: %s", dirnames) + r0i0p0_matches = [d for d in dirnames if 'r0i0p0' in d] + if r0i0p0_matches: + return r0i0p0_matches[0] + return dirnames[0] + + def _select_drs(input_type, drs, project): """Select the directory structure of input path.""" cfg = get_project_config(project) @@ -376,8 +422,8 @@ def _select_drs(input_type, drs, project): return input_path[structure] raise KeyError( - 'drs {} for {} project not specified in config-developer file'.format( - structure, project)) + f"DRS {structure} for project {project} not specified in " + f"config-developer file") ROOTPATH_WARNED = set() @@ -409,7 +455,15 @@ def _find_input_dirs(variable, rootpath, drs): for dirname_template in _replace_tags(path_template, variable): for base_path in root: dirname = os.path.join(base_path, dirname_template) - dirname = _resolve_latestversion(dirname) + if variable['frequency'] == 'fx' and '*' in dirname: + dirname = _resolve_wildcards_and_version(dirname, base_path, + project, drs) + var_from_dir = dir_to_var(dirname, base_path, project, drs) + for (key, val) in variable.items(): + if val == '*': + variable[key] = var_from_dir.get(key, '*') + else: + dirname = _resolve_latestversion(dirname) if dirname is None: continue matches = glob.glob(dirname) @@ -449,9 +503,13 @@ def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" # change ensemble to fixed r0i0p0 for fx variables # this is needed and is not a duplicate effort - if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx': + if all([ + variable['project'] == 'CMIP5', variable['frequency'] == 'fx', + variable.get('ensemble') != '*' + ]): variable['ensemble'] = 'r0i0p0' (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) + # do time gating only for non-fx variables if variable['frequency'] != 'fx': files = select_files( @@ -513,3 +571,38 @@ def get_multiproduct_filename(attributes, preproc_dir): ) return outfile + + +def dir_to_var(dirname, basepath, project, drs): + """Convert directory path to variable :obj:`dict`.""" + if dirname != os.sep: + dirname = dirname.rstrip(os.sep) + if basepath != os.sep: + basepath = basepath.rstrip(os.sep) + path_template = _select_drs('input_dir', drs, project).rstrip(os.sep) + rel_dir = os.path.relpath(dirname, basepath) + keys = path_template.split(os.sep) + vals = rel_dir.split(os.sep) + if len(keys) != len(vals): + raise ValueError( + f"Cannot extract tags '{path_template}' from directory " + f"'{rel_dir}' (root: '{basepath}') with different numbers of " + f"elements") + variable = {} + for (idx, full_key) in enumerate(keys): + matches = re.findall(r'.*\{(.*)\}.*', full_key) + if len(matches) != 1: + continue + key = matches[0] + regex = rf"{full_key.replace(key, '(.*)')}" + regex = regex.replace('{', '').replace('}', '') + matches = re.findall(regex, vals[idx]) + while '' in matches: + matches.remove('') + if len(matches) != 1: + raise ValueError( + f"Regex pattern '{regex}' for '{full_key}' cannot be " + f"(uniquely) matched to element '{vals[idx]}' in directory " + f"'{dirname}'") + variable[key] = matches[0] + return variable diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f362b74b65..37d42b0af6 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -341,7 +341,8 @@ def _add_fxvar_keys(fx_info, variable, extra_facets_dir): fx_variable['variable_group'] = fx_info['short_name'] # add special ensemble for CMIP5 only - if fx_variable['project'] == 'CMIP5': + if (fx_variable['project'] == 'CMIP5' and + fx_variable.get('ensemble') != '*'): fx_variable['ensemble'] = 'r0i0p0' # add missing cmor info @@ -932,6 +933,11 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, else: missing_vars.add(ex.message) continue + + # Update output filename in case wildcards have been resolved + if '*' in variable['filename']: + variable['filename'] = get_output_file(variable, + config_user['preproc_dir']) product = PreprocessorFile( attributes=variable, settings=settings, diff --git a/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py b/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py index 38f96526e1..607ccae36b 100644 --- a/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py +++ b/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py @@ -1,5 +1,6 @@ """Fixes for BCC-CSM2-MR model.""" from ..common import ClFixHybridPressureCoord, OceanFixGrid +from ..fix import Fix Cl = ClFixHybridPressureCoord @@ -15,5 +16,36 @@ Siconc = OceanFixGrid +uo = OceanFixGrid + +#class Omon(Fix): +# """Fixes for ocean variables.""" +# +# def fix_metadata(self, cubes): +# """Fix ocean depth coordinate. +# +# Parameters +# ---------- +# cubes: iris CubeList +# List of cubes to fix +# +# Returns +# ------- +# iris.cube.CubeList +# +# """ +# cubes = OceanFixGrid.fix_metadata(cubes) +# +# for cube in cubes: +# if cube.coords('latitude'): +# cube.coord('latitude').var_name = 'lat' +# if cube.coords('longitude'): +# cube.coord('longitude').var_name = 'lon' +# +# if cube.coords(axis='Z'): +# z_coord = cube.coord(axis='Z') +# if z_coord.var_name == 'olevel': +# fix_ocean_depth_coord(cube) +# return cubes Sos = OceanFixGrid diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py index 80f2e58849..f85b1a509a 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py @@ -2,6 +2,10 @@ from .cesm2 import Cl as BaseCl from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Tas as BaseTas +from ..fix import Fix +from ..shared import fix_ocean_depth_coord +import numpy as np +import cf_units from ..common import SiconcFixScalarCoord @@ -21,3 +25,42 @@ Tas = BaseTas + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords('latitude'): + cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): + cube.coord('longitude').var_name = 'lon' + + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if str(z_coord.units).lower() in ['cm', 'centimeters'] and np.max(z_coord.points)>10000.: + z_coord.units = cf_units.Unit('m') + z_coord.points = z_coord.points /100. + if str(z_coord.units).lower() in ['cm', 'centimeters'] and np.max(z_coord.points)<10000.: + z_coord.units = cf_units.Unit('m') + #z_coord.points = z_coord.points /100. + + + #z_coord = cube.coord(axis='Z') + #if z_coord.var_name == 'olevel': + fix_ocean_depth_coord(cube) + return cubes + diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index d0014f308a..ad486687ac 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -4,6 +4,10 @@ from .cesm2 import Cl as BaseCl from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Tas as BaseTas +from ..fix import Fix +from ..shared import fix_ocean_depth_coord +import numpy as np +import cf_units from ..common import SiconcFixScalarCoord @@ -56,3 +60,39 @@ def fix_file(self, filepath, output_dir): Tas = BaseTas + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords('latitude'): + cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): + cube.coord('longitude').var_name = 'lon' + + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if str(z_coord.units).lower() in ['cm', 'centimeters'] and np.max(z_coord.points)>10000.: + z_coord.units = cf_units.Unit('m') + z_coord.points = z_coord.points /100. + if str(z_coord.units).lower() in ['cm', 'centimeters'] and np.max(z_coord.points)<10000.: + z_coord.units = cf_units.Unit('m') + + fix_ocean_depth_coord(cube) + return cubes + + diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py index bc8068af8a..757f41cb27 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py @@ -1,3 +1,5 @@ +"""Fixes for cesm2-waccm-fv2.""" +from iris.cube import CubeList """Fixes for CESM2-WACCM-FV2 model.""" from .cesm2 import Tas as BaseTas from .cesm2 import Fgco2 as BaseFgco2 @@ -6,15 +8,65 @@ from .cesm2_waccm import Clw as BaseClw from ..common import SiconcFixScalarCoord +from ..fix import Fix +from ..shared import fix_ocean_depth_coord -Cl = BaseCl +import numpy as np +import cf_units +class AllVars(Fix): + """Fixes for thetao.""" -Cli = BaseCli + def fix_metadata(self, cubes): + """ + Fix cell_area coordinate. + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix -Clw = BaseClw + Returns + ------- + iris.cube.CubeList + """ + cube = self.get_cube_from_list(cubes) + if cube.coords('latitude'): + cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): + cube.coord('longitude').var_name = 'lon' + return CubeList([cube]) + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if str(z_coord.units).lower() in ['cm', 'centimeters'] and np.max(z_coord.points)>10000.: + z_coord.units = cf_units.Unit('m') + z_coord.points = z_coord.points /100. + if str(z_coord.units).lower() in ['cm', 'centimeters'] and np.max(z_coord.points)<10000.: + z_coord.units = cf_units.Unit('m') +# z_coord.points = z_coord.points /100. + + fix_ocean_depth_coord(cube) + return cubes Fgco2 = BaseFgco2 diff --git a/esmvalcore/cmor/_fixes/cmip6/cnrm_cm6_1.py b/esmvalcore/cmor/_fixes/cmip6/cnrm_cm6_1.py index 1e2762bdd4..ef03d017f1 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cnrm_cm6_1.py +++ b/esmvalcore/cmor/_fixes/cmip6/cnrm_cm6_1.py @@ -3,8 +3,7 @@ from ..common import ClFixHybridPressureCoord from ..fix import Fix -from ..shared import add_aux_coords_from_cubes, get_bounds_cube - +from ..shared import add_aux_coords_from_cubes, get_bounds_cube, fix_ocean_depth_coord class Cl(ClFixHybridPressureCoord): """Fixes for ``cl``.""" @@ -77,3 +76,28 @@ def fix_metadata(self, cubes): Clw = Cl + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if z_coord.var_name in ['olevel', 'lev']: + fix_ocean_depth_coord(cube) + return cubes + + diff --git a/esmvalcore/cmor/_fixes/cmip6/fgoals_f3_l.py b/esmvalcore/cmor/_fixes/cmip6/fgoals_f3_l.py index 247ce44f24..ffa29a0f94 100644 --- a/esmvalcore/cmor/_fixes/cmip6/fgoals_f3_l.py +++ b/esmvalcore/cmor/_fixes/cmip6/fgoals_f3_l.py @@ -66,3 +66,39 @@ def fix_data(self, cube): if cube.units == "%" and da.max(cube.core_data()).compute() <= 1.: cube.data = cube.core_data() * 100. return cube + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if not cube.coord('latitude').bounds: + cube.coord('latitude').guess_bounds() + if not cube.coord('longitude').bounds: + cube.coord('longitude').guess_bounds() + + if cube.coords('latitude'): + cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): + cube.coord('longitude').var_name = 'lon' + + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if str(z_coords.units) == 'cm' and np.max(z_points)>10000.: + z_coord.units = cf_units.Unit('m') + fix_ocean_depth_coord(cube) + return cubes + + diff --git a/esmvalcore/cmor/_fixes/cmip6/ipsl_cm5a2_inca.py b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm5a2_inca.py new file mode 100644 index 0000000000..d67928c6bc --- /dev/null +++ b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm5a2_inca.py @@ -0,0 +1,78 @@ +"""Fixes for IPSL-CM5A2-INCA model.""" +from iris.cube import CubeList + +from ..fix import Fix +from ..shared import fix_ocean_depth_coord + + +class AllVars(Fix): + """Fixes for thetao.""" + + def fix_metadata(self, cubes): + """ + Fix cell_area coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + cube = self.get_cube_from_list(cubes) + if cube.coords('latitude'): + cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): + cube.coord('longitude').var_name = 'lon' + return CubeList([cube]) + + +class Clcalipso(Fix): + """Fixes for ``clcalipso``.""" + + def fix_metadata(self, cubes): + """Fix ``alt40`` coordinate. + + Parameters + ---------- + cubes : iris.cube.CubeList + Input cubes + + Returns + ------- + iris.cube.CubeList + + """ + cube = self.get_cube_from_list(cubes) + alt_40_coord = cube.coord('height') + alt_40_coord.long_name = 'altitude' + alt_40_coord.standard_name = 'altitude' + alt_40_coord.var_name = 'alt40' + return CubeList([cube]) + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + # z_coord = cube.coord(axis='Z') + # if z_coord.var_name == 'olevel': + fix_ocean_depth_coord(cube) + return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr_inca.py b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr_inca.py new file mode 100644 index 0000000000..bfc99c923c --- /dev/null +++ b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr_inca.py @@ -0,0 +1,78 @@ +"""Fixes for IPSL-CM6A-LR-INCA model.""" +from iris.cube import CubeList + +from ..fix import Fix +from ..shared import fix_ocean_depth_coord + + +class AllVars(Fix): + """Fixes for thetao.""" + + def fix_metadata(self, cubes): + """ + Fix cell_area coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + cube = self.get_cube_from_list(cubes) + if cube.coords('latitude'): + cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): + cube.coord('longitude').var_name = 'lon' + return CubeList([cube]) + + +class Clcalipso(Fix): + """Fixes for ``clcalipso``.""" + + def fix_metadata(self, cubes): + """Fix ``alt40`` coordinate. + + Parameters + ---------- + cubes : iris.cube.CubeList + Input cubes + + Returns + ------- + iris.cube.CubeList + + """ + cube = self.get_cube_from_list(cubes) + alt_40_coord = cube.coord('height') + alt_40_coord.long_name = 'altitude' + alt_40_coord.standard_name = 'altitude' + alt_40_coord.var_name = 'alt40' + return CubeList([cube]) + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if z_coord.var_name == 'olevel': + fix_ocean_depth_coord(cube) + return cubes diff --git a/esmvalcore/cmor/tables/cordex/Tables/CORDEX_fx b/esmvalcore/cmor/tables/cordex/Tables/CORDEX_fx index 30ecaf0997..0b7a026048 100644 --- a/esmvalcore/cmor/tables/cordex/Tables/CORDEX_fx +++ b/esmvalcore/cmor/tables/cordex/Tables/CORDEX_fx @@ -223,3 +223,26 @@ valid_max: 30.0 !---------------------------------- ! +!============ +variable_entry: sftof +!============ +modeling_realm: ocean +!---------------------------------- +! Variable attributes: +!---------------------------------- +standard_name: sea_area_fraction +units: % +cell_measures: area: areacello +long_name: Sea Area Fraction +comment: This is the area fraction at the ocean surface. +!---------------------------------- +! Additional variable information: +!---------------------------------- +dimensions: longitude latitude +out_name: sftof +type: real +valid_min: 0.0 +valid_max: 100.0 +!---------------------------------- +! + diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index a47cbbba39..90351fa952 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -472,8 +472,98 @@ get_input_filelist: ensemble: r1i1p1 diagnostic: test_diag preprocessor: test_preproc + available_files: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc + dirs: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf + file_patterns: + - sftlf_fx_HadGEM2-ES_historical_r0i0p0*.nc + found_files: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + + # CMIP6 wildcard specification for fx variable + + - drs: BADC + variable: + variable_group: test + short_name: sftlf + original_short_name: sftlf + dataset: HadGEM3-GC31-LL + activity: CMIP + project: CMIP6 + cmor_table: CMIP6 + institute: [MOHC] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: '*' + grid: gn + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/fx/sftlf/gn/v2020/sftlf_fx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc + - CMIP/MOHC/HadGEM3-GC31-LL/piControl/r1i1p1f1/fx/sftlf/gn/latest/sftlf_fx_HadGEM3-GC31-LL_piControl_r1i1p1f1_gn.nc + dirs: + - CMIP/MOHC/HadGEM3-GC31-LL/piControl/r1i1p1f1/fx/sftlf/gn/latest + file_patterns: + - sftlf_fx_HadGEM3-GC31-LL_piControl_r1i1p1f1_gn*.nc + found_files: + - CMIP/MOHC/HadGEM3-GC31-LL/piControl/r1i1p1f1/fx/sftlf/gn/latest/sftlf_fx_HadGEM3-GC31-LL_piControl_r1i1p1f1_gn.nc + + # CORDEX wildcard fx specification + + - drs: BADC + variable: + variable_group: test + short_name: sftlf + original_short_name: sftlf + dataset: RCA4 + driver: NCC-NorESM1-M + rcm_version: v1 + project: CORDEX + cmor_table: cordex + institute: [SMHI] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: '*' + domain: EUR-11 + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - EUR-11/SMHI/NCC-NorESM1-M/historical/r0i0p0/RCA4/v1/fx/sftlf/v20100101/sftlf_EUR-11_NCC-NorESM1-M_historical_r0i0p0_RCA4_v1_fx.nc + - EUR-11/SMHI/NCC-NorESM1-M/historical/r0i0p0/RCA4/v1/fx/sftlf/v20180820/sftlf_EUR-11_NCC-NorESM1-M_historical_r0i0p0_RCA4_v1_fx.nc + - EUR-11/SMHI/NCC-NorESM1-M/rcp85/r0i0p0/RCA4/v1/fx/sftlf/v20100101/sftlf_EUR-11_NCC-NorESM1-M_rcp85_r0i0p0_RCA4_v1_fx.nc + - EUR-11/SMHI/NCC-NorESM1-M/rcp85/r0i0p0/RCA4/v1/fx/sftlf/v20180820/sftlf_EUR-11_NCC-NorESM1-M_rcp85_r0i0p0_RCA4_v1_fx.nc + dirs: + - EUR-11/SMHI/NCC-NorESM1-M/historical/r0i0p0/RCA4/v1/fx/sftlf/v20180820 + file_patterns: + - sftlf_EUR-11_NCC-NorESM1-M_historical_r0i0p0_RCA4_v1_fx*.nc + found_files: + - EUR-11/SMHI/NCC-NorESM1-M/historical/r0i0p0/RCA4/v1/fx/sftlf/v20180820/sftlf_EUR-11_NCC-NorESM1-M_historical_r0i0p0_RCA4_v1_fx.nc + + - drs: DKRZ + variable: + variable_group: test + short_name: sftlf + original_short_name: sftlf + dataset: HadGEM2-ES + project: CMIP5 + cmor_table: CMIP5 + institute: [INPE, MOHC] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc available_files: - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r2i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r2i1p1.nc - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc dirs: - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf @@ -482,6 +572,140 @@ get_input_filelist: found_files: - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + - drs: DKRZ + variable: + variable_group: test + short_name: sftlf + original_short_name: sftlf + dataset: HadGEM2-ES + project: CMIP5 + cmor_table: CMIP5 + institute: [INPE, MOHC] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r2i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r2i1p1.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r3i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r3i1p1.nc + dirs: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf + file_patterns: + - sftlf_fx_HadGEM2-ES_historical_r1i1p1*.nc + found_files: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc + + - drs: DKRZ + variable: + variable_group: test + short_name: areacella + original_short_name: areacella + dataset: CanESM2 + project: CMIP5 + cmor_table: CMIP5 + institute: [CCCma] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CCCma/CanESM2/historical/fx/atmos/fx/r0i0p0/v20120410/sftlf/sftlf_fx_CanESM2_historical_r0i0p0.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r1i1p1/v20000101/areacella/areacella_fx_CanESM2_historical_r1i1p1.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r1i1p1/v20120410/areacella/areacella_fx_CanESM2_historical_r1i1p1.nc + dirs: + - CCCma/CanESM2/historical/fx/atmos/fx/r1i1p1/v20120410/areacella + file_patterns: + - areacella_fx_CanESM2_historical_r1i1p1*.nc + found_files: + - CCCma/CanESM2/historical/fx/atmos/fx/r1i1p1/v20120410/areacella/areacella_fx_CanESM2_historical_r1i1p1.nc + + - drs: DKRZ + variable: + variable_group: test + short_name: areacella + original_short_name: areacella + dataset: CanESM2 + project: CMIP5 + cmor_table: CMIP5 + institute: [CCCma] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: [] + dirs: [] + file_patterns: + - areacella_fx_CanESM2_historical_**.nc + found_files: [] + + - drs: DKRZ + variable: + variable_group: test + short_name: areacella + original_short_name: areacella + dataset: CanESM2 + project: CMIP5 + cmor_table: CMIP5 + institute: [CCCma] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CCCma/CanESM2/historical/fx/atmos/fx/r0i0p0/v20120410/sftlf/sftlf_fx_CanESM2_historical_r0i0p0.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r0i0p0/v20200101/sftlf/sftlf_fx_CanESM2_historical_r0i0p0.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r1i1p1/v20120410/areacella/areacella_fx_CanESM2_historical_r1i1p1.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r2i1p1/v20200101/areacella/areacella_fx_CanESM2_historical_r2i1p1.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r2i1p1/latest/areacella/areacella_fx_CanESM2_historical_r2i1p1.nc + - CCCma/CanESM2/historical/fx/atmos/fx/r2i1p2/v20300101/areacella/areacella_fx_CanESM2_historical_r2i1p2.nc + dirs: + - CCCma/CanESM2/historical/fx/atmos/fx/r2i1p1/latest/areacella + file_patterns: + - areacella_fx_CanESM2_historical_r2i1p1*.nc + found_files: + - CCCma/CanESM2/historical/fx/atmos/fx/r2i1p1/latest/areacella/areacella_fx_CanESM2_historical_r2i1p1.nc + + - drs: ETHZ + variable: + variable_group: test + short_name: areacella + original_short_name: areacella + dataset: CanESM2 + project: CMIP5 + cmor_table: CMIP5 + institute: [CCCma] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: '*' + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - historical/fx/sftlf/CanESM2/r0i0p0/sftlf_fx_CanESM2_historical_r0i0p0.nc + - historical/fx/sftlf/CanESM2/r1i1p1/sftlf_fx_CanESM2_historical_r1i1p1.nc + - historical/fx/areacella/CanESM2/r2i2p2/areacella_fx_CanESM2_historical_r2i2p2.nc + - historical/fx/areacella/CanESM2/r1i1p1/areacella_fx_CanESM2_historical_r1i1p1.nc + dirs: + - historical/fx/areacella/CanESM2/r1i1p1 + file_patterns: + - areacella_fx_CanESM2_historical_r1i1p1*.nc + found_files: + - historical/fx/areacella/CanESM2/r1i1p1/areacella_fx_CanESM2_historical_r1i1p1.nc + - drs: DKRZ variable: variable_group: test @@ -499,8 +723,8 @@ get_input_filelist: diagnostic: test_diag preprocessor: test_preproc available_files: - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/areacella/areacella_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/areacella/areacella_fx_HadGEM2-ES_historical_r0i0p0.nc dirs: [] file_patterns: - orog_fx_HadGEM2-ES_historical_r0i0p0*.nc @@ -729,3 +953,79 @@ get_input_filelist: file_patterns: - OBS6_ERA-Interim_reanaly_42_Omon_deptho[_.]*nc found_files: [] + + +dir_to_var: + - dirname: /this/is/root/ + basepath: /this/is/root/ + project: CMIP5 + drs: default + variable: {} + - dirname: / + basepath: / + project: CMIP5 + drs: default + variable: {} + - dirname: /this/is/root/TYPE/PROJECT/EXP/data + basepath: /this/is/root/ + project: CMIP5 + drs: BSC + variable: + type: TYPE + project: PROJECT + exp: EXP + dataset.lower: data + - dirname: /this/is/root/TEST/NAME + basepath: /this/is/root/ + project: CMIP5 + drs: default + variable: null + - dirname: /this/is/root/INST/DATA/EXP/FREQ/REALM/MIP/ENS/VERSION/NAME + basepath: /this/is/root + project: CMIP5 + drs: DKRZ + variable: + institute: INST + dataset: DATA + exp: EXP + frequency: FREQ + modeling_realm: REALM + mip: MIP + ensemble: ENS + latestversion: VERSION + short_name: NAME + - dirname: /this/is/root/EXP/MIP/NAME/DATA/ENS/GRID/ + basepath: /this/is/root/ + project: CMIP6 + drs: ETHZ + variable: + dataset: DATA + exp: EXP + mip: MIP + ensemble: ENS + short_name: NAME + grid: GRID + - dirname: /this/is/root/THIS/FAILS + basepath: /this/is/root/ + project: CMIP6 + drs: ETHZ + variable: null + - dirname: /this/is/root/Tier2/DATA + basepath: /this/is/root/ + project: OBS + drs: default + variable: + tier: '2' + dataset: DATA + - dirname: /Tier1/DATA + basepath: / + project: OBS + drs: default + variable: + tier: '1' + dataset: DATA + - dirname: /this/is/root/ThisFails/DATA + basepath: /this/is/root/ + project: OBS + drs: default + variable: null diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 07028b2610..5225a4f239 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -9,6 +9,7 @@ import esmvalcore._config from esmvalcore._data_finder import ( _find_input_files, + dir_to_var, get_input_filelist, get_output_file, ) @@ -112,3 +113,15 @@ def test_get_input_filelist(root, cfg): assert sorted(input_filelist) == sorted(ref_files) assert sorted(dirnames) == sorted(ref_dirs) assert sorted(filenames) == sorted(ref_patterns) + + +@pytest.mark.parametrize('cfg', CONFIG['dir_to_var']) +def test_dir_to_var(cfg): + """Test converting directory path to variable :obj:`dict`.""" + drs = {cfg['project']: cfg['drs']} + if cfg['variable'] is None: + with pytest.raises(ValueError): + dir_to_var(cfg['dirname'], cfg['basepath'], cfg['project'], drs) + return + output = dir_to_var(cfg['dirname'], cfg['basepath'], cfg['project'], drs) + assert output == cfg['variable']