ESMValGroup · thomascrocker · Apr 26, 2021 · Apr 26, 2021 · Apr 26, 2021 · May 5, 2021
diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
@@ -38,19 +38,19 @@ def get_start_end_year(filename):
     start_year = end_year = None
 
     # First check for a block of two potential dates separated by _ or -
-    daterange = re.findall(r'([0-9]{4,12}[-_][0-9]{4,12})', stem)
+    daterange = re.findall(r"([0-9]{4,12}[-_][0-9]{4,12})", stem)
     if daterange:
-        start_date, end_date = re.findall(r'([0-9]{4,12})', daterange[0])
+        start_date, end_date = re.findall(r"([0-9]{4,12})", daterange[0])
         start_year = start_date[:4]
         end_year = end_date[:4]
     else:
         # Check for single dates in the filename
-        dates = re.findall(r'([0-9]{4,12})', stem)
+        dates = re.findall(r"([0-9]{4,12})", stem)
         if len(dates) == 1:
             start_year = end_year = dates[0][:4]
         elif len(dates) > 1:
             # Check for dates at start or end of filename
-            outerdates = re.findall(r'^[0-9]{4,12}|[0-9]{4,12}$', stem)
+            outerdates = re.findall(r"^[0-9]{4,12}|[0-9]{4,12}$", stem)
             if len(outerdates) == 1:
                 start_year = end_year = outerdates[0][:4]
 
@@ -61,16 +61,18 @@ def get_start_end_year(filename):
         for cube in cubes:
             logger.debug(cube)
             try:
-                time = cube.coord('time')
+                time = cube.coord("time")
             except iris.exceptions.CoordinateNotFoundError:
                 continue
             start_year = time.cell(0).point.year
             end_year = time.cell(-1).point.year
             break
 
     if start_year is None or end_year is None:
-        raise ValueError(f'File {filename} dates do not match a recognized'
-                         'pattern and time can not be read from the file')
+        raise ValueError(
+            f"File {filename} dates do not match a recognized"
+            "pattern and time can not be read from the file"
+        )
 
     logger.debug("Found start_year %s and end_year %s", start_year, end_year)
     return int(start_year), int(end_year)
@@ -92,26 +94,28 @@ def select_files(filenames, start_year, end_year):
 def _replace_tags(paths, variable):
     """Replace tags in the config-developer's file with actual values."""
     if isinstance(paths, str):
-        paths = (paths.strip('/'), )
+        paths = (paths.strip("/"),)
     else:
-        paths = [path.strip('/') for path in paths]
+        paths = [path.strip("/") for path in paths]
     tlist = set()
 
     for path in paths:
-        tlist = tlist.union(re.findall(r'{([^}]*)}', path))
+        tlist = tlist.union(re.findall(r"{([^}]*)}", path))
     logger.debug(tlist)
 
     for tag in tlist:
         original_tag = tag
         tag, _, _ = _get_caps_options(tag)
 
-        if tag == 'latestversion':  # handled separately later
+        if tag == "latestversion":  # handled separately later
             continue
         if tag in variable:
             replacewith = variable[tag]
         else:
-            raise KeyError("Dataset key {} must be specified for {}, check "
-                           "your recipe entry".format(tag, variable))
+            raise KeyError(
+                "Dataset key {} must be specified for {}, check "
+                "your recipe entry".format(tag, variable)
+            )
 
         paths = _replace_tag(paths, original_tag, replacewith)
     return paths
@@ -126,17 +130,17 @@ def _replace_tag(paths, tag, replacewith):
             result.extend(_replace_tag(paths, tag, item))
     else:
         text = _apply_caps(str(replacewith), lower, upper)
-        result.extend(p.replace('{' + tag + '}', text) for p in paths)
+        result.extend(p.replace("{" + tag + "}", text) for p in paths)
     return result
 
 
 def _get_caps_options(tag):
     lower = False
     upper = False
-    if tag.endswith('.lower'):
+    if tag.endswith(".lower"):
         lower = True
         tag = tag[0:-6]
-    elif tag.endswith('.upper'):
+    elif tag.endswith(".upper"):
         upper = True
         tag = tag[0:-6]
     return tag, lower, upper
@@ -156,16 +160,26 @@ def _resolve_latestversion(dirname_template):
     This implementation avoid globbing on centralized clusters with very
     large data root dirs (i.e. ESGF nodes like Jasmin/DKRZ).
     """
-    if '{latestversion}' not in dirname_template:
+    if "{latestversion}" not in dirname_template:
         return dirname_template
 
     # Find latest version
-    part1, part2 = dirname_template.split('{latestversion}')
+    part1, part2 = dirname_template.split("{latestversion}")
+    # resolve any wildcards entered for fx variables
+    if "/fx/" in part1:
+        dirs = glob.glob(part1)
+        # if multiple folders are found, use the first one
+        if len(dirs) > 0:
+            part1 = dirs[0]
+        else:
+            # nothing found, so return
+            logger.debug("Unable to resolve %s", dirname_template)
+            return dirname_template
     part2 = part2.lstrip(os.sep)
     if os.path.exists(part1):
         versions = os.listdir(part1)
         versions.sort(reverse=True)
-        for version in ['latest'] + versions:
+        for version in ["latest"] + versions:
             dirname = os.path.join(part1, version, part2)
             if os.path.isdir(dirname):
                 return dirname
@@ -180,30 +194,32 @@ def _select_drs(input_type, drs, project):
     if isinstance(input_path, str):
         return input_path
 
-    structure = drs.get(project, 'default')
+    structure = drs.get(project, "default")
     if structure in input_path:
         return input_path[structure]
 
     raise KeyError(
-        'drs {} for {} project not specified in config-developer file'.format(
-            structure, project))
+        "drs {} for {} project not specified in config-developer file".format(
+            structure, project
+        )
+    )
 
 
 def get_rootpath(rootpath, project):
     """Select the rootpath."""
     if project in rootpath:
         return rootpath[project]
-    if 'default' in rootpath:
-        return rootpath['default']
-    raise KeyError('default rootpath must be specified in config-user file')
+    if "default" in rootpath:
+        return rootpath["default"]
+    raise KeyError("default rootpath must be specified in config-user file")
 
 
 def _find_input_dirs(variable, rootpath, drs):
     """Return a the full paths to input directories."""
-    project = variable['project']
+    project = variable["project"]
 
     root = get_rootpath(rootpath, project)
-    path_template = _select_drs('input_dir', drs, project)
+    path_template = _select_drs("input_dir", drs, project)
 
     dirnames = []
     for dirname_template in _replace_tags(path_template, variable):
@@ -224,73 +240,59 @@ def _find_input_dirs(variable, rootpath, drs):
 
 def _get_filenames_glob(variable, drs):
     """Return patterns that can be used to look for input files."""
-    path_template = _select_drs('input_file', drs, variable['project'])
+    path_template = _select_drs("input_file", drs, variable["project"])
     filenames_glob = _replace_tags(path_template, variable)
     return filenames_glob
 
 
 def _find_input_files(variable, rootpath, drs):
-    short_name = variable['short_name']
-    variable['short_name'] = variable['original_short_name']
+    short_name = variable["short_name"]
+    variable["short_name"] = variable["original_short_name"]
     input_dirs = _find_input_dirs(variable, rootpath, drs)
     filenames_glob = _get_filenames_glob(variable, drs)
     files = find_files(input_dirs, filenames_glob)
-    variable['short_name'] = short_name
+    variable["short_name"] = short_name
     return (files, input_dirs, filenames_glob)
 
 
 def get_input_filelist(variable, rootpath, drs):
     """Return the full path to input files."""
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
-    if (
-        variable['frequency'] == 'fx'
-        and variable['ensemble'] != 'r0i0p0'
-        and dirnames == []
-    ):
-        # fx files not found. Try to find fx files again with ensemble r0i0p0
-        logger.debug(
-            'fx data not found under %s, searching under r0i0p0',
-            variable['ensemble']
-        )
-        variable['ensemble'] = 'r0i0p0'
-        (files, dirnames, filenames) = _find_input_files(
-            variable, rootpath, drs
-            )
+
     # do time gating only for non-fx variables
-    if variable['frequency'] != 'fx':
-        files = select_files(files, variable['start_year'],
-                             variable['end_year'])
+    if variable["frequency"] != "fx":
+        files = select_files(files, variable["start_year"], variable["end_year"])
     return (files, dirnames, filenames)
 
 
 def get_output_file(variable, preproc_dir):
     """Return the full path to the output (preprocessed) file."""
-    cfg = get_project_config(variable['project'])
+    cfg = get_project_config(variable["project"])
 
     # Join different experiment names
-    if isinstance(variable.get('exp'), (list, tuple)):
+    if isinstance(variable.get("exp"), (list, tuple)):
         variable = dict(variable)
-        variable['exp'] = '-'.join(variable['exp'])
+        variable["exp"] = "-".join(variable["exp"])
 
     outfile = os.path.join(
         preproc_dir,
-        variable['diagnostic'],
-        variable['variable_group'],
-        _replace_tags(cfg['output_file'], variable)[0],
+        variable["diagnostic"],
+        variable["variable_group"],
+        _replace_tags(cfg["output_file"], variable)[0],
     )
-    if variable['frequency'] != 'fx':
-        outfile += '_{start_year}-{end_year}'.format(**variable)
-    outfile += '.nc'
+    if variable["frequency"] != "fx":
+        outfile += "_{start_year}-{end_year}".format(**variable)
+    outfile += ".nc"
     return outfile
 
 
 def get_statistic_output_file(variable, preproc_dir):
     """Get multi model statistic filename depending on settings."""
     template = os.path.join(
         preproc_dir,
-        '{diagnostic}',
-        '{variable_group}',
-        '{dataset}_{mip}_{short_name}_{start_year}-{end_year}.nc',
+        "{diagnostic}",
+        "{variable_group}",
+        "{dataset}_{mip}_{short_name}_{start_year}-{end_year}.nc",
     )
 
     outfile = template.format(**variable)

diff --git a/esmvalcore/cmor/tables/cordex/Tables/CORDEX_fx b/esmvalcore/cmor/tables/cordex/Tables/CORDEX_fx
@@ -223,3 +223,26 @@ valid_max:         30.0
 !----------------------------------
 !
 
+!============
+variable_entry:    sftof
+!============
+modeling_realm:    ocean
+!----------------------------------
+! Variable attributes:
+!----------------------------------
+standard_name:     sea_area_fraction
+units:             %
+cell_measures:     area: areacello
+long_name:         Sea Area Fraction
+comment:            This is the area fraction at the ocean surface.
+!----------------------------------
+! Additional variable information:
+!----------------------------------
+dimensions:        longitude latitude
+out_name:          sftof
+type:              real
+valid_min:         0.0
+valid_max:         100.0
+!----------------------------------
+!
+