From 14eb39543a14d64d5df0534dd0ba0d63a2286b4d Mon Sep 17 00:00:00 2001 From: Henry Addison Date: Mon, 15 Nov 2021 12:05:34 +0000 Subject: [PATCH] open_mfdataset not playing well with encoding time when re-saving annoying issue with xarray https://github.com/pydata/xarray/issues/2436 --- deterministic/preprocessing/split_by_year.py | 19 ++++++++++++++----- deterministic/split-gcm.py | 6 +++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/deterministic/preprocessing/split_by_year.py b/deterministic/preprocessing/split_by_year.py index d17c37e9..e0fbb731 100644 --- a/deterministic/preprocessing/split_by_year.py +++ b/deterministic/preprocessing/split_by_year.py @@ -2,26 +2,35 @@ import cftime import xarray +import glob """ Split up an nc multifile dataset into single file per year. """ class SplitByYear: - def __init__(self, input_dir, output_filepath_prefix, years = itertools.chain(range(1980, 2000), range(2020, 2040), range(2060, 2080))) -> None: - self.input_dir = input_dir + def __init__(self, input_filepath_prefix, output_filepath_prefix, years = itertools.chain(range(1980, 2000), range(2020, 2040), range(2060, 2080))) -> None: + self.input_filepath_prefix = input_filepath_prefix self.output_filepath_prefix = output_filepath_prefix self.years = years pass - def run(self): - output_files = [] + def gcm_file(self, year): + if (year % 10) <= 8: + start = (year // 10) * 10 - 1 + else: + start = year + + end = start + 10 - input = xarray.open_mfdataset(str(self.input_dir/"*.nc")) + return f"{start}1201-{end}1130.nc" + def run(self): + output_files = [] for year in self.years: + input = xarray.load_dataset(f"{self.input_filepath_prefix}_{self.gcm_file(year)}.nc") single_year_input = input.sel(time=slice(cftime.Datetime360Day(year, 12, 1, 12, 0, 0, 0) , cftime.Datetime360Day(year+1, 11, 30, 12, 0, 0, 0))) output_filepath = f"{self.output_filepath_prefix}_{year}1201-{year+1}1130.nc" diff --git a/deterministic/split-gcm.py b/deterministic/split-gcm.py index adbb236b..f2396c0b 100644 --- a/deterministic/split-gcm.py +++ b/deterministic/split-gcm.py @@ -7,8 +7,8 @@ def get_args(): parser = argparse.ArgumentParser(description='Regrid GCM data to match the CPM data', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--input', dest='input_dir', type=Path, required=True, - help='Path to directory holding raw files') + parser.add_argument('--input-prefix', dest='input_prefix', type=Path, required=True, + help='Prefix of input files to split up (so filepath up to the date part)') parser.add_argument('--output-prefix', dest='output_prefix', type=str, required=True, help='Prefix of output files including directory path') parser.add_argument('--years', dest='years', nargs='+', type=int, required=True, @@ -22,6 +22,6 @@ def get_args(): os.makedirs(os.path.dirname(args.output_prefix), exist_ok=True) - outputs = SplitByYear(args.input_dir, args.output_prefix, args.years).run() + outputs = SplitByYear(args.input_prefix, args.output_prefix, args.years).run() print(outputs)