From a98257730ad6037b39c1b044f1bec5d8d34a71ca Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Sun, 18 Dec 2022 21:22:22 +0000
Subject: [PATCH 01/22] Add ORCID

---
 esmvaltool/config-references.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvaltool/config-references.yml b/esmvaltool/config-references.yml
index 3a8324045e..6285edd6f0 100644
--- a/esmvaltool/config-references.yml
+++ b/esmvaltool/config-references.yml
@@ -593,7 +593,7 @@ authors:
   sellar_alistair:
     name: Sellar, Alistair
     institute: MetOffice, UK
-    orcid:
+    orcid: 0000-0002-2955-7254
   wyser_klaus:
     name: Wyser, Klaus
     institute: SMHI, Sweden

From fb5cb1433e4f53bbcc6de159fa6349cc17b9e211 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Sun, 18 Dec 2022 21:32:32 +0000
Subject: [PATCH 02/22] Remove reference to climatology files

---
 .../recipes/recipe_autoassess_landsurface_soilmoisture.yml    | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
index a5491cc0b9..e96e9998ee 100644
--- a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
+++ b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
@@ -4,9 +4,6 @@
 documentation:
   description: |
     Recipe that runs the Autoassess Land-surface assessment area diagnostic.
-    Climatological files are stored externally to avoid overloading the
-    ESMValTool source. See /gws/nopw/j04/esmeval/autoassess_specific_files
-    (on JASMIN).
 
   authors:
     - predoi_valeriu
@@ -48,7 +45,6 @@ diagnostics:
         obs_models: []
         start: 1993/12/01
         end: 2002/12/01
-        climfiles_root: '/gws/nopw/j04/esmeval/autoassess_specific_files/files'  # on JASMIN
 
   plot_standard:
     description: Wrapper to collect and plot previously calculated metrics

From 874bbb39fca3065c06507e8fbd795e9385cd14ea Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Sat, 14 Jan 2023 12:54:24 +0000
Subject: [PATCH 03/22] Try adding CCI obs

---
 .../recipes/recipe_autoassess_landsurface_soilmoisture.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
index e96e9998ee..89d411db6a 100644
--- a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
+++ b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
@@ -22,10 +22,12 @@ documentation:
 datasets:
     - {dataset: ACCESS-CM2,  project: CMIP6, exp: historical,  grid: gn,  ensemble: r1i1p1f1,  start_year: 1992,  end_year: 2002}
     - {dataset: E3SM-1-0,  project: CMIP6, exp: historical,  grid: gr,  ensemble: r1i1p1f1,  start_year: 1992,  end_year: 2002}
+    - {dataset: ESACCI-SOILMOISTURE, project: OBS, type: sat, version: L3S-SSMV-COMBINED-v4.2, tier: 2, start_year: 1980}
+    #TODO: revise start_year, add end_year
 
 preprocessors:
   pp_aa_area:
-    regrid:  # NOT USED
+      regrid:  # NOT USED  TODO: really?
       target_grid: 0.15x0.15
       scheme: linear
 
@@ -35,8 +37,11 @@ diagnostics:
     variables:
       mrsos: # moisture_content_of_soil_layer
         mip: Lmon
+        preprocessor: seasonal
     scripts:
       autoassess_landsurf_soilmoisture: &autoassess_landsurf_soilmoisture_settings
+      #TODO: simplify to skip autoassess_area_base? simpler to write from
+      #scratch?
         script: autoassess/autoassess_area_base.py
         title: "Autoassess Land-Surface Soilmoisture Diagnostic"
         area: land_surface_soilmoisture

From 413311a0d6152e54baeaea2cc3b016241b65ccee Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Wed, 10 May 2023 17:46:08 +0100
Subject: [PATCH 04/22] Make CCI additional_dataset and add preprocessor

---
 ...ipe_autoassess_landsurface_soilmoisture.yml | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
index 89d411db6a..0cb549ed31 100644
--- a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
+++ b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
@@ -22,14 +22,13 @@ documentation:
 datasets:
     - {dataset: ACCESS-CM2,  project: CMIP6, exp: historical,  grid: gn,  ensemble: r1i1p1f1,  start_year: 1992,  end_year: 2002}
     - {dataset: E3SM-1-0,  project: CMIP6, exp: historical,  grid: gr,  ensemble: r1i1p1f1,  start_year: 1992,  end_year: 2002}
-    - {dataset: ESACCI-SOILMOISTURE, project: OBS, type: sat, version: L3S-SSMV-COMBINED-v4.2, tier: 2, start_year: 1980}
-    #TODO: revise start_year, add end_year
 
 preprocessors:
-  pp_aa_area:
-      regrid:  # NOT USED  TODO: really?
-      target_grid: 0.15x0.15
-      scheme: linear
+  seasonal:
+    climate_statistics:
+      operator: mean
+      period: seasonal
+      seasons: ['DJF', 'MAM', 'JJA', 'SON']
 
 diagnostics:
   aa_landsurf_soilmoisture:
@@ -38,6 +37,13 @@ diagnostics:
       mrsos: # moisture_content_of_soil_layer
         mip: Lmon
         preprocessor: seasonal
+      sm: # Volumetric Moisture in Upper Portion of Soil Column
+        mip: Lmon
+        project: CMIP5
+        derive: true
+        preprocessor: seasonal
+        additional_datasets:
+            - {dataset: ESACCI-SOILMOISTURE, project: OBS, type: sat, version: L3S-SSMV-COMBINED-v4.2, tier: 2, start_year: 1999, end_year: 2008}
     scripts:
       autoassess_landsurf_soilmoisture: &autoassess_landsurf_soilmoisture_settings
       #TODO: simplify to skip autoassess_area_base? simpler to write from

From 50c4d61fe077e76756c6910e73778f34a3f929d0 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Wed, 10 May 2023 17:53:29 +0100
Subject: [PATCH 05/22] Decouple metrics script from autoassess wrapper

Required in order to access preprocessed obs via main esmvaltool config,
and in any case our strategy is now to port autoassess metrics as
"native" esmvaltool code.
Component changes:
- name diag_script directly in recipe
- add main() function to handle config and extract filenames
- update land_sm_top() to use these preprocessed files
---
 .../land_surface_soilmoisture/soilmoisture.py | 91 ++++++++++++++-----
 ...pe_autoassess_landsurface_soilmoisture.yml |  8 +-
 2 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 85d6a7dc02..4dd77b032f 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -6,7 +6,12 @@
 import iris
 from esmvalcore.preprocessor import regrid
 from esmvaltool.diag_scripts.shared._base import ProvenanceLogger
-from esmvaltool.diag_scripts.shared._supermeans import get_supermean
+from esmvaltool.diag_scripts.shared import group_metadata, run_diagnostic
+
+# Order of seasons must agree with preprocessor definition in recipe
+SEASONS = ("djf", "mam", "jja", "son")
+
+
 
 logger = logging.getLogger(__name__)
 
@@ -33,46 +38,36 @@ def get_provenance_record(caption, run):
     return record
 
 
-def land_sm_top(run):
+def land_sm_top(clim_file, model_file, work_dir):
     """
     Calculate median absolute errors for soil mosture against CCI data.
 
     Parameters
     ----------
-    run: dict
-        dictionary containing model run metadata
-        (see auto_assess/model_run.py for description)
+    clim_file : path to observation climatology file
+    model_file : path to model file
+    work_dir : directory for intermediate working files
 
     Returns
     -------
     metrics: dict
         a dictionary of metrics names and values
     """
-    supermean_data_dir = os.path.join(run['data_root'], run['runid'],
-                                      run['_area'] + '_supermeans')
-
-    seasons = ['djf', 'mam', 'jja', 'son']
 
     # Constant: density of water
     rhow = 1000.
 
     # Work through each season
     metrics = dict()
-    for season in seasons:
-        fname = 'ecv_soil_moisture_{}.nc'.format(season)
-        clim_file = os.path.join(run['climfiles_root'], fname)
-        ecv_clim = iris.load_cube(clim_file)
-        # correct invalid units
-        if (ecv_clim.units == 'unknown' and
-                'invalid_units' in ecv_clim.attributes):
-            if ecv_clim.attributes['invalid_units'] == 'm^3m^-3':
-                ecv_clim.units = 'm3 m-3'
+    for index, season in enumerate(SEASONS):
+
+        constr_season = iris.Constraint(season_number=index)
+        ecv_clim = iris.load_cube(clim_file, constr_season)
 
         # m01s08i223
         # CMOR name: mrsos (soil moisture in top model layer kg/m2)
-        mrsos = get_supermean('mass_content_of_water_in_soil_layer',
-                              season,
-                              supermean_data_dir)
+        mrsos_std_name = "mass_content_of_water_in_soil_layer"
+        mrsos = iris.load_cube(model_file, mrsos_std_name & constr_season)
 
         # Set soil moisture to missing data on ice points (i.e. no soil)
         np.ma.masked_where(mrsos.data == 0, mrsos.data, copy=False)
@@ -117,14 +112,59 @@ def land_sm_top(run):
         dff = vol_sm1_run - ecv_clim
 
         # save output and populate metric
-        iris.save(dff, os.path.join(run['dump_output'],
+        iris.save(dff, os.path.join(work_dir,
                                     'soilmoist_diff_{}.nc'.format(season)))
         name = 'soilmoisture MedAbsErr {}'.format(season)
         dffs = dff.data
         dffs = np.ma.abs(dffs)
         metrics[name] = float(np.ma.median(dffs))
 
-    # record provenance
+    return metrics
+
+
+def main(config):
+    """
+    Top-level function for soil moisture metrics.
+
+    Parameters
+    ----------
+    config : dict
+        The ESMValTool configuration.
+    """
+    logger = logging.getLogger(__name__)
+
+    input_data = config["input_data"]
+
+    # Separate OBS from model datasets
+    # (and check there is only one obs dataset)
+    obs = [v for v in input_data.values() if v["project"] == "OBS"]
+    if len(obs) != 1:
+        msg = "Expected exactly 1 OBS dataset: found {}".format(len(obs))
+        raise RuntimeError(msg)
+    clim_file = obs[0]["filename"]
+
+    models = group_metadata(
+        [v for v in input_data.values() if v["project"] != "OBS"],
+        "dataset")
+
+    for model_dataset, group in models.items():
+        # 'model_dataset' is the name of the model dataset.
+        # 'group' is a list of dictionaries containing metadata.
+        logger.info("Processing data for %s", model_dataset)
+        model_file = [item["filename"] for item in group]
+        metrics = land_sm_top(clim_file, model_file, config["work_dir"])
+
+        # Write metrics
+        metrics_dir = os.path.join(
+            config["plot_dir"],
+            "{}_vs_{}".format(config["exp_model"], config["control_model"]),
+            config["area"],
+            model_dataset,
+        )
+
+        write_metrics(metrics_dir, metrics)
+
+    # Record provenance
     plot_file = "Autoassess soilmoisture metrics"
     caption = 'Autoassess soilmoisture MedAbsErr for {}'.format(str(seasons))
     provenance_record = get_provenance_record(caption, run)
@@ -136,4 +176,7 @@ def land_sm_top(run):
         with ProvenanceLogger(cfg) as provenance_logger:
             provenance_logger.log(plot_file, provenance_record)
 
-    return metrics
+
+if __name__ == "__main__":
+    with run_diagnostic() as CONFIG:
+        main(CONFIG)
diff --git a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
index 0cb549ed31..061a6a6dfa 100644
--- a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
+++ b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
@@ -46,16 +46,10 @@ diagnostics:
             - {dataset: ESACCI-SOILMOISTURE, project: OBS, type: sat, version: L3S-SSMV-COMBINED-v4.2, tier: 2, start_year: 1999, end_year: 2008}
     scripts:
       autoassess_landsurf_soilmoisture: &autoassess_landsurf_soilmoisture_settings
-      #TODO: simplify to skip autoassess_area_base? simpler to write from
-      #scratch?
-        script: autoassess/autoassess_area_base.py
-        title: "Autoassess Land-Surface Soilmoisture Diagnostic"
+        script: autoassess/land_surface_soilmoisture/soilmoisture.py
         area: land_surface_soilmoisture
         control_model: ACCESS-CM2
         exp_model: E3SM-1-0
-        obs_models: []
-        start: 1993/12/01
-        end: 2002/12/01
 
   plot_standard:
     description: Wrapper to collect and plot previously calculated metrics

From 04e7d3ea2f997b87a1ca1e6ced7f8e37bacbe40d Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Wed, 10 May 2023 17:59:37 +0100
Subject: [PATCH 06/22] New function to write metrics files

---
 .../land_surface_soilmoisture/soilmoisture.py | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 4dd77b032f..6205b72ce6 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -3,6 +3,7 @@
 import os
 import logging
 import numpy as np
+import csv
 import iris
 from esmvalcore.preprocessor import regrid
 from esmvaltool.diag_scripts.shared._base import ProvenanceLogger
@@ -38,6 +39,31 @@ def get_provenance_record(caption, run):
     return record
 
 
+def write_metrics(output_dir, metrics):
+    """Write metrics to CSV file.
+
+    The CSV file will have the name ``metrics.csv`` and can be
+    used for the normalised metric assessment plot.
+
+    Parameters
+    ----------
+    output_dir : string
+        The full path to the directory in which the CSV file will be written.
+    seasonal_data : dictionary of metric,value pairs
+        The seasonal data to write.
+    """
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    file_name = f"metrics.csv"
+    file_path = os.path.join(output_dir, file_name)
+
+    with open(file_path, "w", newline="") as csvfile:
+        csv_writer = csv.writer(csvfile)
+        for line in metrics.items():
+            csv_writer.writerow(line)
+
+
 def land_sm_top(clim_file, model_file, work_dir):
     """
     Calculate median absolute errors for soil mosture against CCI data.

From e93d2a3d4f4243b5f489f3c851c0bf9e5c43e69f Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Wed, 10 May 2023 18:00:40 +0100
Subject: [PATCH 07/22] Fix provenance

---
 esmvaltool/config-references.yml               |  4 ++++
 .../land_surface_soilmoisture/soilmoisture.py  | 18 ++++++++----------
 ...ipe_autoassess_landsurface_soilmoisture.yml |  2 +-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/esmvaltool/config-references.yml b/esmvaltool/config-references.yml
index 6285edd6f0..2667ab02a7 100644
--- a/esmvaltool/config-references.yml
+++ b/esmvaltool/config-references.yml
@@ -664,6 +664,10 @@ authors:
     institute:
     orcid:
     github: mcreader97
+  rumbold_heather:
+    name: Heather, Rumbold
+    institute: Met Office, UK
+    orcid:
   senftleben_daniel:
     name: Senftleben, Daniel
     institute: DLR, Germany
diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 6205b72ce6..839997f715 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -17,7 +17,7 @@
 logger = logging.getLogger(__name__)
 
 
-def get_provenance_record(caption, run):
+def get_provenance_record(caption, filenames):
     """Create a provenance record describing the diagnostic data and plot."""
     record = {
         'caption': caption,
@@ -33,7 +33,7 @@ def get_provenance_record(caption, run):
             'dorigo17rse',
             'gruber19essd',
         ],
-        'ancestors': run,
+        "ancestors": filenames,
     }
 
     return record
@@ -192,15 +192,13 @@ def main(config):
 
     # Record provenance
     plot_file = "Autoassess soilmoisture metrics"
-    caption = 'Autoassess soilmoisture MedAbsErr for {}'.format(str(seasons))
-    provenance_record = get_provenance_record(caption, run)
+    caption = "Autoassess soilmoisture MedAbsErr for {}".format(str(SEASONS))
+    filenames = [item["filename"] for item in input_data.values()]
+    provenance_record = get_provenance_record(caption, filenames)
     cfg = {}
-    cfg['run_dir'] = run['out_dir']
-    # avoid rewriting provenance when running the plot diag
-    if not os.path.isfile(os.path.join(cfg['run_dir'],
-                                       'diagnostic_provenance.yml')):
-        with ProvenanceLogger(cfg) as provenance_logger:
-            provenance_logger.log(plot_file, provenance_record)
+    cfg["run_dir"] = config["run_dir"]
+    with ProvenanceLogger(cfg) as provenance_logger:
+        provenance_logger.log(plot_file, provenance_record)
 
 
 if __name__ == "__main__":
diff --git a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
index 061a6a6dfa..4f04cc6d8c 100644
--- a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
+++ b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
@@ -11,7 +11,7 @@ documentation:
 
   title: Land-surface diagnostic that computes soilmoisture indices (from Autoassess).
 
-  references:
+  observation_references:
     - esacci-soilmoisture
     - dorigo17rse
     - gruber19essd

From adb36fd322a9c3177021ad64ebd8f5fcbc20ca18 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 2 Jun 2023 11:21:55 +0100
Subject: [PATCH 08/22] Fix the easy Codacy errors

Last one requires a minor refactor
---
 .../land_surface_soilmoisture/soilmoisture.py | 41 +++++++++----------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 839997f715..aec7c065a1 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -2,8 +2,8 @@
 
 import os
 import logging
-import numpy as np
 import csv
+import numpy as np
 import iris
 from esmvalcore.preprocessor import regrid
 from esmvaltool.diag_scripts.shared._base import ProvenanceLogger
@@ -12,8 +12,6 @@
 # Order of seasons must agree with preprocessor definition in recipe
 SEASONS = ("djf", "mam", "jja", "son")
 
-
-
 logger = logging.getLogger(__name__)
 
 
@@ -49,16 +47,15 @@ def write_metrics(output_dir, metrics):
     ----------
     output_dir : string
         The full path to the directory in which the CSV file will be written.
-    seasonal_data : dictionary of metric,value pairs
+    metrics : dictionary of metric,value pairs
         The seasonal data to write.
     """
-
     os.makedirs(output_dir, exist_ok=True)
 
-    file_name = f"metrics.csv"
+    file_name = "metrics.csv"
     file_path = os.path.join(output_dir, file_name)
 
-    with open(file_path, "w", newline="") as csvfile:
+    with open(file_path, "w", newline="", encoding="utf-8") as csvfile:
         csv_writer = csv.writer(csvfile)
         for line in metrics.items():
             csv_writer.writerow(line)
@@ -70,21 +67,23 @@ def land_sm_top(clim_file, model_file, work_dir):
 
     Parameters
     ----------
-    clim_file : path to observation climatology file
-    model_file : path to model file
-    work_dir : directory for intermediate working files
+    clim_file : string
+        Path to observation climatology file
+    model_file : string
+        Path to model file
+    work_dir : string
+        Directory for intermediate working files
 
     Returns
     -------
     metrics: dict
         a dictionary of metrics names and values
     """
-
     # Constant: density of water
     rhow = 1000.
 
     # Work through each season
-    metrics = dict()
+    metrics = {}
     for index, season in enumerate(SEASONS):
 
         constr_season = iris.Constraint(season_number=index)
@@ -92,8 +91,10 @@ def land_sm_top(clim_file, model_file, work_dir):
 
         # m01s08i223
         # CMOR name: mrsos (soil moisture in top model layer kg/m2)
-        mrsos_std_name = "mass_content_of_water_in_soil_layer"
-        mrsos = iris.load_cube(model_file, mrsos_std_name & constr_season)
+        mrsos = iris.load_cube(
+            model_file,
+            "mass_content_of_water_in_soil_layer" & constr_season
+        )
 
         # Set soil moisture to missing data on ice points (i.e. no soil)
         np.ma.masked_where(mrsos.data == 0, mrsos.data, copy=False)
@@ -139,8 +140,8 @@ def land_sm_top(clim_file, model_file, work_dir):
 
         # save output and populate metric
         iris.save(dff, os.path.join(work_dir,
-                                    'soilmoist_diff_{}.nc'.format(season)))
-        name = 'soilmoisture MedAbsErr {}'.format(season)
+                                    f"soilmoist_diff_{season}.nc"))
+        name = f"soilmoisture MedAbsErr {season}"
         dffs = dff.data
         dffs = np.ma.abs(dffs)
         metrics[name] = float(np.ma.median(dffs))
@@ -157,15 +158,13 @@ def main(config):
     config : dict
         The ESMValTool configuration.
     """
-    logger = logging.getLogger(__name__)
-
     input_data = config["input_data"]
 
     # Separate OBS from model datasets
     # (and check there is only one obs dataset)
     obs = [v for v in input_data.values() if v["project"] == "OBS"]
     if len(obs) != 1:
-        msg = "Expected exactly 1 OBS dataset: found {}".format(len(obs))
+        msg = f"Expected exactly 1 OBS dataset: found {len(obs)}"
         raise RuntimeError(msg)
     clim_file = obs[0]["filename"]
 
@@ -183,7 +182,7 @@ def main(config):
         # Write metrics
         metrics_dir = os.path.join(
             config["plot_dir"],
-            "{}_vs_{}".format(config["exp_model"], config["control_model"]),
+            f"{config['exp_model']}_vs_{config['control_model']}",
             config["area"],
             model_dataset,
         )
@@ -192,7 +191,7 @@ def main(config):
 
     # Record provenance
     plot_file = "Autoassess soilmoisture metrics"
-    caption = "Autoassess soilmoisture MedAbsErr for {}".format(str(SEASONS))
+    caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
     filenames = [item["filename"] for item in input_data.values()]
     provenance_record = get_provenance_record(caption, filenames)
     cfg = {}

From 8ff4a0141e00c1a099ed8b6a1dc6dfecba9d626c Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 2 Jun 2023 11:35:14 +0100
Subject: [PATCH 09/22] Break provenance into own function for pylint

Fixes problem of too-many-locals
(Alternative approach would be to find another pub?)
---
 .../land_surface_soilmoisture/soilmoisture.py | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index aec7c065a1..d828218901 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -149,6 +149,18 @@ def land_sm_top(clim_file, model_file, work_dir):
     return metrics
 
 
+def record_provenance(input_data, config):
+    """Record provenance"""
+    plot_file = "Autoassess soilmoisture metrics"
+    caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
+    filenames = [item["filename"] for item in input_data.values()]
+    provenance_record = get_provenance_record(caption, filenames)
+    cfg = {}
+    cfg["run_dir"] = config["run_dir"]
+    with ProvenanceLogger(cfg) as provenance_logger:
+        provenance_logger.log(plot_file, provenance_record)
+
+
 def main(config):
     """
     Top-level function for soil moisture metrics.
@@ -189,15 +201,7 @@ def main(config):
 
         write_metrics(metrics_dir, metrics)
 
-    # Record provenance
-    plot_file = "Autoassess soilmoisture metrics"
-    caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
-    filenames = [item["filename"] for item in input_data.values()]
-    provenance_record = get_provenance_record(caption, filenames)
-    cfg = {}
-    cfg["run_dir"] = config["run_dir"]
-    with ProvenanceLogger(cfg) as provenance_logger:
-        provenance_logger.log(plot_file, provenance_record)
+    record_provenance(input_data, config)
 
 
 if __name__ == "__main__":

From 5ab7e63c1c85a1148e5fa3fbe492523ab64d7ad2 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 2 Jun 2023 11:38:08 +0100
Subject: [PATCH 10/22] Fix new style error

---
 .../autoassess/land_surface_soilmoisture/soilmoisture.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index d828218901..99c63ab9b2 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -150,7 +150,7 @@ def land_sm_top(clim_file, model_file, work_dir):
 
 
 def record_provenance(input_data, config):
-    """Record provenance"""
+    """Record provenance."""
     plot_file = "Autoassess soilmoisture metrics"
     caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
     filenames = [item["filename"] for item in input_data.values()]

From 686ea3ea18d2473ddce55d36927897195652fa28 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 2 Jun 2023 12:16:55 +0100
Subject: [PATCH 11/22] Remove doc mention of autoassess_area_base...

...since diag_script is called directly
---
 ...pe_autoassess_landsurface_soilmoisture.rst | 50 -------------------
 1 file changed, 50 deletions(-)

diff --git a/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst b/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
index 9734498df8..e7e6d6b48a 100644
--- a/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
+++ b/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
@@ -123,55 +123,5 @@ Example plots
 
    Normalised metrics plot comparing a control and experiment simulation
 
-
-Additional notes on usage
--------------------------
-The ``landsurface_soilmoisture`` area metric is part of the ``esmvaltool/diag_scripts/autoassess`` diagnostics,
-and, as any other ``autoassess`` metric, it uses the ``autoassess_area_base.py`` as general purpose
-wrapper. This wrapper accepts a number of input arguments that are read through from the recipe.
-
-This recipe is part of the larger group of Autoassess metrics ported to ESMValTool
-from the native Autoassess package from the UK's Met Office. The ``diagnostics`` settings
-are almost the same as for the other Autoassess metrics.
-
 **Currently this recipe is marked as broken, because it only runs on Jasmin due to a dependency on some 
 external climatology files.**
-
-.. note::
-
-   **Time gating for autoassess metrics.**
-
-   To preserve the native Autoassess functionalities,
-   data loading and selection on time is done somewhat
-   differently for ESMValTool's autoassess metrics: the
-   time selection is done in the preprocessor as per usual but
-   a further time selection is performed as part of the diagnostic.
-   For this purpose the user will specify a ``start:`` and ``end:``
-   pair of arguments of ``scripts: autoassess_script`` (see below
-   for example). These are formatted as ``YYYY/MM/DD``; this is
-   necessary since the Autoassess metrics are computed from 1-Dec
-   through 1-Dec rather than 1-Jan through 1-Jan. This is a temporary
-   implementation to fully replicate the native Autoassess functionality
-   and a minor user inconvenience since they need to set an extra set of
-   ``start`` and ``end`` arguments in the diagnostic; this will be phased
-   when all the native Autoassess metrics have been ported to ESMValTool
-   review has completed.
-
-
-An example of standard inputs as read by ``autoassess_area_base.py`` and passed
-over to the diagnostic/metric is listed below.
-
-
-.. code-block:: yaml
-
-    scripts:
-      autoassess_landsurf_soilmoisture: &autoassess_landsurf_soilmoisture_settings
-        script: autoassess/autoassess_area_base.py
-        title: "Autoassess Land-Surface Soilmoisture Diagnostic"
-        area: land_surface_soilmoisture
-        control_model: IPSL-CM5A-LR
-        exp_model: inmcm4
-        obs_models: []
-        start: 1997/12/01
-        end: 2002/12/01
-        climfiles_root: '/gws/nopw/j04/esmeval/autoassess_specific_files/files'  # on JASMIN

From 2efbc261c22083e6534759bcf3b6c5ad4767a10a Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 2 Jun 2023 12:18:28 +0100
Subject: [PATCH 12/22] Remove from broken recipes list

---
 doc/sphinx/source/recipes/broken_recipe_list.rst              | 4 ----
 .../recipes/recipe_autoassess_landsurface_soilmoisture.rst    | 3 ---
 2 files changed, 7 deletions(-)

diff --git a/doc/sphinx/source/recipes/broken_recipe_list.rst b/doc/sphinx/source/recipes/broken_recipe_list.rst
index 7da6f41437..5720261827 100644
--- a/doc/sphinx/source/recipes/broken_recipe_list.rst
+++ b/doc/sphinx/source/recipes/broken_recipe_list.rst
@@ -16,10 +16,6 @@ More details can be found in the :ref:`broken recipe policy
      - Affected diagnostics
      - Problem
      - GitHub issue 
-   * - :ref:`recipe_autoassess_landsurface_soilmoisture.yml <recipe_autoassess_landsurface_soilmoisture.rst>`
-     - All
-     - Dependency on some external climatology files
-     - `#2309 <https://github.com/ESMValGroup/ESMValTool/issues/2309>`_
    * - `recipe_check_obs.yml`
      - `ERA5_native6`
      - Derivation of custom variables `rlus` and `rsus`
diff --git a/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst b/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
index e7e6d6b48a..80e6fa751d 100644
--- a/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
+++ b/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
@@ -122,6 +122,3 @@ Example plots
    :alt: Soilmoisture_Metrics.png
 
    Normalised metrics plot comparing a control and experiment simulation
-
-**Currently this recipe is marked as broken, because it only runs on Jasmin due to a dependency on some 
-external climatology files.**

From 50812a9953cf95b44a756514b7f4f98f4949147b Mon Sep 17 00:00:00 2001
From: Alistair Sellar <alistair.sellar@metoffice.gov.uk>
Date: Wed, 14 Jun 2023 08:51:18 +0100
Subject: [PATCH 13/22] Use correct references key

Co-authored-by: Bouwe Andela <b.andela@esciencecenter.nl>
---
 .../recipes/recipe_autoassess_landsurface_soilmoisture.yml      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
index 4f04cc6d8c..061a6a6dfa 100644
--- a/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
+++ b/esmvaltool/recipes/recipe_autoassess_landsurface_soilmoisture.yml
@@ -11,7 +11,7 @@ documentation:
 
   title: Land-surface diagnostic that computes soilmoisture indices (from Autoassess).
 
-  observation_references:
+  references:
     - esacci-soilmoisture
     - dorigo17rse
     - gruber19essd

From 57e55d8ffa9b6e0ddb41725e765bf973943b6fe5 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Wed, 14 Jun 2023 09:12:09 +0100
Subject: [PATCH 14/22] Correct documentation

---
 .../recipe_autoassess_landsurface_soilmoisture.rst  | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst b/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
index 80e6fa751d..5ba790b093 100644
--- a/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
+++ b/doc/sphinx/source/recipes/recipe_autoassess_landsurface_soilmoisture.rst
@@ -17,6 +17,7 @@ Performance metrics:
 
 Metrics are calculated using model and observation multi-year climatologies (seasonal means) 
 for meteorological seasons:
+
 * December-January-February (djf)
 * March-April-May (mam)
 * June-July-August (jja)
@@ -38,7 +39,6 @@ Recipes are stored in esmvaltool/recipes/
 
 Diagnostics are stored in esmvaltool/diag_scripts/autoassess/
 
-    * autoassess_area_base.py: wrapper for autoassess scripts
     * land_surface_soilmoisture/soilmoisture.py: script to calculate soil moisture
       metrics
     * plot_autoassess_metrics.py: plot normalised assessment metrics
@@ -47,21 +47,17 @@ Diagnostics are stored in esmvaltool/diag_scripts/autoassess/
 User settings in recipe
 -----------------------
 
-#. Script autoassess_area_base.py
+#. Script soilmoisture.py
 
    *Required settings for script*
 
    * area: must equal land_surface_soilmoisture to select this diagnostic
    * control_model: name of model to be used as control
    * exp_model: name of model to be used as experiment
-   * start: date (YYYY/MM/DD) at which period begins (see note on time gating)
-   * end: date (YYYY/MM/DD) at which period ends (see note on time gating)
-   * climfiles_root: path to observation climatologies
 
    *Optional settings for script*
 
-   * title: arbitrary string with name of diagnostic
-   * obs_models: unused for this recipe
+   none
 
    *Required settings for variables*
 
@@ -97,7 +93,8 @@ User settings in recipe
 Variables
 ---------
 
-* mrsos (land, monthly mean, longitude latitude time)
+* mrsos (from models: land, monthly mean, longitude latitude time)
+* sm (from observations: land, monthly mean, longitude latitude time)
 
 
 Observations and reformat scripts

From 6f79a0ecacbe6b58b70dbcb07ff7efdbda19db88 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 16 Jun 2023 16:41:30 +0100
Subject: [PATCH 15/22] Fix provenance to associate with output file

---
 .../land_surface_soilmoisture/soilmoisture.py  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 99c63ab9b2..1b9a6d11c3 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -37,7 +37,7 @@ def get_provenance_record(caption, filenames):
     return record
 
 
-def write_metrics(output_dir, metrics):
+def write_metrics(output_dir, metrics, config):
     """Write metrics to CSV file.
 
     The CSV file will have the name ``metrics.csv`` and can be
@@ -52,6 +52,7 @@ def write_metrics(output_dir, metrics):
     """
     os.makedirs(output_dir, exist_ok=True)
 
+    #TODO use esmvaltool.diag_scripts.shared.get_diagnostic_filename("metrics", config, extension=csv)
     file_name = "metrics.csv"
     file_path = os.path.join(output_dir, file_name)
 
@@ -60,6 +61,8 @@ def write_metrics(output_dir, metrics):
         for line in metrics.items():
             csv_writer.writerow(line)
 
+    record_provenance(file_path, config)
+
 
 def land_sm_top(clim_file, model_file, work_dir):
     """
@@ -149,16 +152,15 @@ def land_sm_top(clim_file, model_file, work_dir):
     return metrics
 
 
-def record_provenance(input_data, config):
+def record_provenance(diagnostic_file, config):
     """Record provenance."""
-    plot_file = "Autoassess soilmoisture metrics"
     caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
-    filenames = [item["filename"] for item in input_data.values()]
+    filenames = [item["filename"] for item in config["input_data"].values()]
     provenance_record = get_provenance_record(caption, filenames)
     cfg = {}
-    cfg["run_dir"] = config["run_dir"]
+    cfg["run_dir"] = config["run_dir"]  #TODO needed?
     with ProvenanceLogger(cfg) as provenance_logger:
-        provenance_logger.log(plot_file, provenance_record)
+        provenance_logger.log(diagnostic_file, provenance_record)
 
 
 def main(config):
@@ -199,9 +201,7 @@ def main(config):
             model_dataset,
         )
 
-        write_metrics(metrics_dir, metrics)
-
-    record_provenance(input_data, config)
+        write_metrics(metrics_dir, metrics, config)
 
 
 if __name__ == "__main__":

From 332392b6f6a4a94e51f3fb5c2b55e22d46a4cc38 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 16 Jun 2023 16:45:21 +0100
Subject: [PATCH 16/22] Remove redundant mini config dict

---
 .../autoassess/land_surface_soilmoisture/soilmoisture.py      | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 1b9a6d11c3..cd3abeff6d 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -157,9 +157,7 @@ def record_provenance(diagnostic_file, config):
     caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
     filenames = [item["filename"] for item in config["input_data"].values()]
     provenance_record = get_provenance_record(caption, filenames)
-    cfg = {}
-    cfg["run_dir"] = config["run_dir"]  #TODO needed?
-    with ProvenanceLogger(cfg) as provenance_logger:
+    with ProvenanceLogger(config) as provenance_logger:
         provenance_logger.log(diagnostic_file, provenance_record)
 
 

From 4e8d4e35d6ea6831ab0154974ee0fbbdf7cc29f8 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 16 Jun 2023 17:10:46 +0100
Subject: [PATCH 17/22] Document new argument

---
 .../autoassess/land_surface_soilmoisture/soilmoisture.py       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index cd3abeff6d..f7978acf81 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -49,10 +49,11 @@ def write_metrics(output_dir, metrics, config):
         The full path to the directory in which the CSV file will be written.
     metrics : dictionary of metric,value pairs
         The seasonal data to write.
+    config : dictionary
+        ESMValTool configuration object
     """
     os.makedirs(output_dir, exist_ok=True)
 
-    #TODO use esmvaltool.diag_scripts.shared.get_diagnostic_filename("metrics", config, extension=csv)
     file_name = "metrics.csv"
     file_path = os.path.join(output_dir, file_name)
 

From f266afc2539f4578b6b7960d981e4e074d5ae5cc Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Fri, 16 Jun 2023 18:21:11 +0100
Subject: [PATCH 18/22] Add provenance for AutoAssess metrics plots

Use share save_figure() instead of plt.savefig()
---
 .../autoassess/_plot_mo_metrics.py            | 34 +++++++++++++++----
 .../autoassess/plot_autoassess_metrics.py     |  7 ++--
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/_plot_mo_metrics.py b/esmvaltool/diag_scripts/autoassess/_plot_mo_metrics.py
index 425cdbd0b6..52eca796ab 100644
--- a/esmvaltool/diag_scripts/autoassess/_plot_mo_metrics.py
+++ b/esmvaltool/diag_scripts/autoassess/_plot_mo_metrics.py
@@ -13,6 +13,8 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
+from esmvaltool.diag_scripts.shared import save_figure
+
 # Define some colours
 BLACK = '#000000'
 RED = '#FF0000'
@@ -596,7 +598,8 @@ def plot_nac(cref,
              acc=None,
              extend_y=False,
              title=None,
-             ofile=None):
+             ofile=None,
+             config=None):
     """
     Routine to produce NAC plot.
 
@@ -611,6 +614,7 @@ def plot_nac(cref,
     :param bool extend_y: Extend y-axis to include obs/acc ranges
     :param str title: Plot title
     :param str ofile: Plot file name
+    :param dict config: ESMValTool configuration object
     """
     # initialize
     if metrics is None:
@@ -682,15 +686,31 @@ def plot_nac(cref,
     legend.set_title('Vs %s' % cref, prop={'size': 'small'})
 
     # Display or produce file
-    if ofile:
-        # Create directory to write file to
-        odir = os.path.dirname(ofile)
-        if not os.path.isdir(odir):
-            os.makedirs(odir)
+    if ofile and config:
+        os.makedirs(config['plot_dir'], exist_ok=True)
+        provenance = get_provenance_record(config)
         # Note that bbox_inches only works for png plots
-        plt.savefig(ofile, bbox_extra_artists=(legend, ), bbox_inches='tight')
+        save_figure(ofile, provenance, config, fig,
+                    bbox_extra_artists=(legend, ), bbox_inches='tight')
     else:
         # Need the following to attempt to display legend in frame
         fig.subplots_adjust(right=0.85)
         plt.show()
     plt.close()
+
+
+def get_provenance_record(config):
+    """Create a provenance record describing the diagnostic data and plot."""
+    filenames = [item["filename"] for item in config["input_data"].values()]
+    record = {
+        'caption': 'Normalised assessment criteria plot',
+        'plot_type': 'metrics',
+        'authors': [
+            'williams_keith',
+            'predoi_valeriu',
+            'sellar_alistair'
+        ],
+        "ancestors": filenames,
+    }
+
+    return record
diff --git a/esmvaltool/diag_scripts/autoassess/plot_autoassess_metrics.py b/esmvaltool/diag_scripts/autoassess/plot_autoassess_metrics.py
index ccf3051d64..201ae0c248 100644
--- a/esmvaltool/diag_scripts/autoassess/plot_autoassess_metrics.py
+++ b/esmvaltool/diag_scripts/autoassess/plot_autoassess_metrics.py
@@ -43,10 +43,12 @@ def main():
         cfg['diag_name'], vsloc, cfg['area'], control_model, 'metrics.csv')
 
     plot_title = ' '.join([cfg['area'], control_model, 'vs', exp_model])
-    # Read metrics files
+    # Read (and record) metrics files
     # metrics = read_order_metrics(args.file_ord)
     ref = read_model_metrics(file_ref)
     tests = [read_model_metrics(file_exp)]
+    cfg['input_data'] = {'ref': {'filename': file_ref},
+                         'exp': {'filename': file_exp}}
     # var = read_model_metrics(args.file_var)
     obs, acc = None, None
     if 'additional_metrics' in cfg:
@@ -68,7 +70,8 @@ def main():
         acc=acc,
         extend_y=False,
         title=plot_title,
-        ofile=os.path.join(cfg['plot_dir'], cfg['plot_name'] + '.png'))
+        ofile=cfg['plot_name'],
+        config=cfg)
 
 
 if __name__ == '__main__':

From 473af6de365e2b03b11675a618abc77bcbe19c40 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Mon, 19 Jun 2023 10:24:29 +0100
Subject: [PATCH 19/22] Add provenance for intermediate cube save

---
 .../land_surface_soilmoisture/soilmoisture.py | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index f7978acf81..7755ae7e3e 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -7,7 +7,11 @@
 import iris
 from esmvalcore.preprocessor import regrid
 from esmvaltool.diag_scripts.shared._base import ProvenanceLogger
-from esmvaltool.diag_scripts.shared import group_metadata, run_diagnostic
+from esmvaltool.diag_scripts.shared import (
+    group_metadata,
+    run_diagnostic,
+    save_data,
+)
 
 # Order of seasons must agree with preprocessor definition in recipe
 SEASONS = ("djf", "mam", "jja", "son")
@@ -65,7 +69,7 @@ def write_metrics(output_dir, metrics, config):
     record_provenance(file_path, config)
 
 
-def land_sm_top(clim_file, model_file, work_dir):
+def land_sm_top(clim_file, model_file, model_dataset, config):
     """
     Calculate median absolute errors for soil mosture against CCI data.
 
@@ -75,8 +79,10 @@ def land_sm_top(clim_file, model_file, work_dir):
         Path to observation climatology file
     model_file : string
         Path to model file
-    work_dir : string
-        Directory for intermediate working files
+    model_dataset : string
+        Name of model dataset
+    config : dict
+        ESMValTool configuration object
 
     Returns
     -------
@@ -86,6 +92,9 @@ def land_sm_top(clim_file, model_file, work_dir):
     # Constant: density of water
     rhow = 1000.
 
+    # Input filenames for provenance
+    filenames = [item["filename"] for item in config["input_data"].values()]
+
     # Work through each season
     metrics = {}
     for index, season in enumerate(SEASONS):
@@ -143,8 +152,11 @@ def land_sm_top(clim_file, model_file, work_dir):
         dff = vol_sm1_run - ecv_clim
 
         # save output and populate metric
-        iris.save(dff, os.path.join(work_dir,
-                                    f"soilmoist_diff_{season}.nc"))
+        caption = f"Model minus CCI soil moisture climatology for {season}"
+        provenance_record = get_provenance_record(caption, filenames)
+        save_data(f"soilmoist_diff_{model_dataset}_{season}",
+                  provenance_record, config, dff)
+
         name = f"soilmoisture MedAbsErr {season}"
         dffs = dff.data
         dffs = np.ma.abs(dffs)
@@ -190,7 +202,7 @@ def main(config):
         # 'group' is a list of dictionaries containing metadata.
         logger.info("Processing data for %s", model_dataset)
         model_file = [item["filename"] for item in group]
-        metrics = land_sm_top(clim_file, model_file, config["work_dir"])
+        metrics = land_sm_top(clim_file, model_file, model_dataset, config)
 
         # Write metrics
         metrics_dir = os.path.join(

From e8cebfe4b3dc11512ea0e1d973fc88a5e758f406 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Mon, 19 Jun 2023 10:45:24 +0100
Subject: [PATCH 20/22] Move unit conversion into its own function

To keep pylint happy
---
 .../land_surface_soilmoisture/soilmoisture.py | 77 ++++++++++++-------
 1 file changed, 48 insertions(+), 29 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 7755ae7e3e..8dd548ab35 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -69,6 +69,52 @@ def write_metrics(output_dir, metrics, config):
     record_provenance(file_path, config)
 
 
+def volumetric_soil_moisture(model_file, constr_season):
+    """
+    Read moisture mass content and convert to volumetric soil moisture 
+
+    Parameters
+    ----------
+    model_file : string
+        Path to model file
+    constr_season : iris constraint
+        Constraint on season to load
+
+    Returns
+    -------
+    vol_sm1_run : cube
+        Volumetric soil moisture
+    """
+    # Constant: density of water
+    rhow = 1000.
+
+    # m01s08i223
+    # CMOR name: mrsos (soil moisture in top model layer kg/m2)
+    mrsos = iris.load_cube(
+        model_file,
+        "mass_content_of_water_in_soil_layer" & constr_season
+    )
+
+    # Set soil moisture to missing data where no soil (moisture=0)
+    np.ma.masked_where(mrsos.data == 0, mrsos.data, copy=False)
+
+    # first soil layer depth
+    dz1 = mrsos.coord('depth').bounds[0, 1] - \
+        mrsos.coord('depth').bounds[0, 0]
+
+    # Calculate the volumetric soil moisture in m3/m3
+    # volumetric soil moisture = volume of water / volume of soil layer
+    # = depth equivalent of water / thickness of soil layer
+    # = (soil moisture content (kg m-2) / water density (kg m-3) )  /
+    #      soil layer thickness (m)
+    # = mosrs / (rhow * dz1)
+    vol_sm1_run = mrsos / (rhow * dz1)
+    vol_sm1_run.units = "m3 m-3"
+    vol_sm1_run.long_name = "Top layer Soil Moisture"
+
+    return vol_sm1_run
+
+
 def land_sm_top(clim_file, model_file, model_dataset, config):
     """
     Calculate median absolute errors for soil mosture against CCI data.
@@ -89,9 +135,6 @@ def land_sm_top(clim_file, model_file, model_dataset, config):
     metrics: dict
         a dictionary of metrics names and values
     """
-    # Constant: density of water
-    rhow = 1000.
-
     # Input filenames for provenance
     filenames = [item["filename"] for item in config["input_data"].values()]
 
@@ -102,29 +145,7 @@ def land_sm_top(clim_file, model_file, model_dataset, config):
         constr_season = iris.Constraint(season_number=index)
         ecv_clim = iris.load_cube(clim_file, constr_season)
 
-        # m01s08i223
-        # CMOR name: mrsos (soil moisture in top model layer kg/m2)
-        mrsos = iris.load_cube(
-            model_file,
-            "mass_content_of_water_in_soil_layer" & constr_season
-        )
-
-        # Set soil moisture to missing data on ice points (i.e. no soil)
-        np.ma.masked_where(mrsos.data == 0, mrsos.data, copy=False)
-
-        # first soil layer depth
-        dz1 = mrsos.coord('depth').bounds[0, 1] - \
-            mrsos.coord('depth').bounds[0, 0]
-
-        # Calculate the volumetric soil moisture in m3/m3
-        # volumetric soil moisture = volume of water / volume of soil layer
-        # = depth equivalent of water / thickness of soil layer
-        # = (soil moisture content (kg m-2) / water density (kg m-3) )  /
-        #      soil layer thickness (m)
-        # = mosrs / (rhow * dz1)
-        vol_sm1_run = mrsos / (rhow * dz1)
-        vol_sm1_run.units = "m3 m-3"
-        vol_sm1_run.long_name = "Top layer Soil Moisture"
+        vol_sm1_run = volumetric_soil_moisture(model_file, constr_season)
 
         # update the coordinate system ECV data with a WGS84 coord system
         # unify coord systems for regridder
@@ -158,9 +179,7 @@ def land_sm_top(clim_file, model_file, model_dataset, config):
                   provenance_record, config, dff)
 
         name = f"soilmoisture MedAbsErr {season}"
-        dffs = dff.data
-        dffs = np.ma.abs(dffs)
-        metrics[name] = float(np.ma.median(dffs))
+        metrics[name] = float(np.ma.median(np.ma.abs(dff.data)))
 
     return metrics
 

From e64ee646939d0affa918a4bcdf1f5efad03a856d Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Mon, 19 Jun 2023 10:56:36 +0100
Subject: [PATCH 21/22] Fix style error

---
 .../autoassess/land_surface_soilmoisture/soilmoisture.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 8dd548ab35..90af0b0b94 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -71,7 +71,7 @@ def write_metrics(output_dir, metrics, config):
 
 def volumetric_soil_moisture(model_file, constr_season):
     """
-    Read moisture mass content and convert to volumetric soil moisture 
+    Read moisture mass content and convert to volumetric soil moisture.
 
     Parameters
     ----------

From 10cb722f91d09e42625ee69ad9ae76323f70af77 Mon Sep 17 00:00:00 2001
From: "alistair.sellar" <alistair.sellar@metoffice.gov.uk>
Date: Mon, 19 Jun 2023 17:12:46 +0100
Subject: [PATCH 22/22] Make provenance specific to model dataset

---
 .../land_surface_soilmoisture/soilmoisture.py | 65 ++++++++++++++-----
 1 file changed, 48 insertions(+), 17 deletions(-)

diff --git a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
index 90af0b0b94..0533b94eed 100644
--- a/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
+++ b/esmvaltool/diag_scripts/autoassess/land_surface_soilmoisture/soilmoisture.py
@@ -3,6 +3,7 @@
 import os
 import logging
 import csv
+from collections.abc import Iterable
 import numpy as np
 import iris
 from esmvalcore.preprocessor import regrid
@@ -19,7 +20,7 @@
 logger = logging.getLogger(__name__)
 
 
-def get_provenance_record(caption, filenames):
+def get_provenance_record(caption, ancestor_filenames):
     """Create a provenance record describing the diagnostic data and plot."""
     record = {
         'caption': caption,
@@ -35,13 +36,13 @@ def get_provenance_record(caption, filenames):
             'dorigo17rse',
             'gruber19essd',
         ],
-        "ancestors": filenames,
+        "ancestors": ancestor_filenames,
     }
 
     return record
 
 
-def write_metrics(output_dir, metrics, config):
+def write_metrics(output_dir, metrics, config, ancestors):
     """Write metrics to CSV file.
 
     The CSV file will have the name ``metrics.csv`` and can be
@@ -55,6 +56,8 @@ def write_metrics(output_dir, metrics, config):
         The seasonal data to write.
     config : dictionary
         ESMValTool configuration object
+    ancestors : list
+        Filenames of input files for provenance
     """
     os.makedirs(output_dir, exist_ok=True)
 
@@ -66,7 +69,7 @@ def write_metrics(output_dir, metrics, config):
         for line in metrics.items():
             csv_writer.writerow(line)
 
-    record_provenance(file_path, config)
+    record_provenance(file_path, config, ancestors)
 
 
 def volumetric_soil_moisture(model_file, constr_season):
@@ -115,7 +118,31 @@ def volumetric_soil_moisture(model_file, constr_season):
     return vol_sm1_run
 
 
-def land_sm_top(clim_file, model_file, model_dataset, config):
+def flatten(list_of_lists):
+    """
+    Convert list of lists into a flat list, allowing some items to be non-list.
+
+    Parameters
+    ----------
+    list_of_lists : list
+        List containing iterables to flatten, plus optionally non-list items
+
+    Returns
+    -------
+    flattened : list
+        Flattened list with one level of nesting removed
+    """
+    flattened = []
+    for item in list_of_lists:
+        if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):
+            flattened.extend(item)
+        else:
+            flattened.append(item)
+
+    return flattened
+
+
+def land_sm_top(clim_file, model_file, model_dataset, config, ancestors):
     """
     Calculate median absolute errors for soil mosture against CCI data.
 
@@ -123,21 +150,20 @@ def land_sm_top(clim_file, model_file, model_dataset, config):
     ----------
     clim_file : string
         Path to observation climatology file
-    model_file : string
-        Path to model file
+    model_file : list
+        Paths to model files
     model_dataset : string
         Name of model dataset
     config : dict
         ESMValTool configuration object
+    ancestors : list
+        Filenames of input files for provenance
 
     Returns
     -------
     metrics: dict
         a dictionary of metrics names and values
     """
-    # Input filenames for provenance
-    filenames = [item["filename"] for item in config["input_data"].values()]
-
     # Work through each season
     metrics = {}
     for index, season in enumerate(SEASONS):
@@ -173,8 +199,8 @@ def land_sm_top(clim_file, model_file, model_dataset, config):
         dff = vol_sm1_run - ecv_clim
 
         # save output and populate metric
-        caption = f"Model minus CCI soil moisture climatology for {season}"
-        provenance_record = get_provenance_record(caption, filenames)
+        caption = f"{model_dataset} minus CCI soil moisture clim for {season}"
+        provenance_record = get_provenance_record(caption, ancestors)
         save_data(f"soilmoist_diff_{model_dataset}_{season}",
                   provenance_record, config, dff)
 
@@ -184,11 +210,10 @@ def land_sm_top(clim_file, model_file, model_dataset, config):
     return metrics
 
 
-def record_provenance(diagnostic_file, config):
+def record_provenance(diagnostic_file, config, ancestors):
     """Record provenance."""
     caption = f"Autoassess soilmoisture MedAbsErr for {SEASONS}"
-    filenames = [item["filename"] for item in config["input_data"].values()]
-    provenance_record = get_provenance_record(caption, filenames)
+    provenance_record = get_provenance_record(caption, ancestors)
     with ProvenanceLogger(config) as provenance_logger:
         provenance_logger.log(diagnostic_file, provenance_record)
 
@@ -221,7 +246,13 @@ def main(config):
         # 'group' is a list of dictionaries containing metadata.
         logger.info("Processing data for %s", model_dataset)
         model_file = [item["filename"] for item in group]
-        metrics = land_sm_top(clim_file, model_file, model_dataset, config)
+
+        # Input filenames for provenance
+        ancestors = flatten([model_file, clim_file])
+
+        # Calculate metrics
+        metrics = land_sm_top(clim_file, model_file, model_dataset, config,
+                              ancestors)
 
         # Write metrics
         metrics_dir = os.path.join(
@@ -231,7 +262,7 @@ def main(config):
             model_dataset,
         )
 
-        write_metrics(metrics_dir, metrics, config)
+        write_metrics(metrics_dir, metrics, config, ancestors)
 
 
 if __name__ == "__main__":