Skip to content
This repository has been archived by the owner on Mar 22, 2023. It is now read-only.

Commit

Permalink
open_mfdataset not playing well with encoding time when re-saving
Browse files Browse the repository at this point in the history
annoying issue with xarray pydata/xarray#2436
  • Loading branch information
henryaddison committed Nov 15, 2021
1 parent ee19628 commit 0b2a355
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
19 changes: 14 additions & 5 deletions preprocessing/split_by_year.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,35 @@

import cftime
import xarray
import glob

"""
Split up an nc multifile dataset into single file per year.
"""
class SplitByYear:

def __init__(self, input_dir, output_filepath_prefix, years = itertools.chain(range(1980, 2000), range(2020, 2040), range(2060, 2080))) -> None:
self.input_dir = input_dir
def __init__(self, input_filepath_prefix, output_filepath_prefix, years = itertools.chain(range(1980, 2000), range(2020, 2040), range(2060, 2080))) -> None:
self.input_filepath_prefix = input_filepath_prefix
self.output_filepath_prefix = output_filepath_prefix
self.years = years

pass

def run(self):
output_files = []
def gcm_file(self, year):
if (year % 10) <= 8:
start = (year // 10) * 10 - 1
else:
start = year

end = start + 10

input = xarray.open_mfdataset(str(self.input_dir/"*.nc"))
return f"{start}1201-{end}1130.nc"

def run(self):
output_files = []

for year in self.years:
input = xarray.load_dataset(f"{self.input_filepath_prefix}_{self.gcm_file(year)}.nc")
single_year_input = input.sel(time=slice(cftime.Datetime360Day(year, 12, 1, 12, 0, 0, 0) , cftime.Datetime360Day(year+1, 11, 30, 12, 0, 0, 0)))

output_filepath = f"{self.output_filepath_prefix}_{year}1201-{year+1}1130.nc"
Expand Down
6 changes: 3 additions & 3 deletions split-gcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
def get_args():
parser = argparse.ArgumentParser(description='Regrid GCM data to match the CPM data',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--input', dest='input_dir', type=Path, required=True,
help='Path to directory holding raw files')
parser.add_argument('--input-prefix', dest='input_prefix', type=Path, required=True,
help='Prefix of input files to split up (so filepath up to the date part)')
parser.add_argument('--output-prefix', dest='output_prefix', type=str, required=True,
help='Prefix of output files including directory path')
parser.add_argument('--years', dest='years', nargs='+', type=int, required=True,
Expand All @@ -22,6 +22,6 @@ def get_args():

os.makedirs(os.path.dirname(args.output_prefix), exist_ok=True)

outputs = SplitByYear(args.input_dir, args.output_prefix, args.years).run()
outputs = SplitByYear(args.input_prefix, args.output_prefix, args.years).run()

print(outputs)

0 comments on commit 0b2a355

Please sign in to comment.