Skip to content

Commit

Permalink
add gen_netcdf parameter
Browse files Browse the repository at this point in the history
allows skipping the usual .nc generation behavior in case caller wants better control for file creation, eg. after a  multi-day parallel processing

see pydata/xarray#3781
  • Loading branch information
carueda committed Oct 22, 2023
1 parent a7feac4 commit 1af760a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
9 changes: 7 additions & 2 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def save_dataset_to_netcdf(logger: PbpLogger, ds: xr.Dataset, filename: str) ->
wait_secs = 3
# TODO similar re-attempt logic for the CSV (or other) output

for attempt in range(1, max_attempts):
for attempt in range(1, max_attempts + 1):
try:
ds.to_netcdf(
filename,
Expand All @@ -25,9 +25,14 @@ def save_dataset_to_netcdf(logger: PbpLogger, ds: xr.Dataset, filename: str) ->
"sensitivity": {"_FillValue": None},
},
)
logger.info(f" - saved dataset to: {filename} ({attempt=}")
return True
except Exception as e: # pylint: disable=broad-exception-caught
error = f"Unable to save {filename}: {e} (attempt {attempt} of {max_attempts})"
error = (
f"Unable to save {filename}:\n"
f" {e}\n"
f" (attempt {attempt} of {max_attempts})"
)
logger.error(error)
print(error)
if attempt < max_attempts:
Expand Down
1 change: 0 additions & 1 deletion src/main_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ def main():
file_helper=file_helper,
output_dir=generated_dir,
output_prefix=output_prefix,
gen_csv=False,
global_attrs_uri=global_attrs_uri,
variable_attrs_uri=variable_attrs_uri,
voltage_multiplier=voltage_multiplier,
Expand Down
15 changes: 12 additions & 3 deletions src/process_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
file_helper: FileHelper,
output_dir: str,
output_prefix: str,
gen_netcdf: bool = True,
gen_csv: bool = False,
global_attrs_uri: Optional[str] = None,
variable_attrs_uri: Optional[str] = None,
Expand All @@ -54,6 +55,8 @@ def __init__(
Output directory.
:param output_prefix:
Output filename prefix.
:param gen_netcdf:
True to generate the netCDF file.
:param gen_csv:
True to also generate CSV version of the result.
:param global_attrs_uri:
Expand All @@ -80,6 +83,7 @@ def __init__(
"Creating ProcessHelper:"
+ f"\n output_dir: {output_dir}"
+ f"\n output_prefix: {output_prefix}"
+ f"\n gen_netcdf: {gen_netcdf}"
+ f"\n gen_csv: {gen_csv}"
+ f"\n global_attrs_uri: {global_attrs_uri}"
+ f"\n variable_attrs_uri: {variable_attrs_uri}"
Expand All @@ -97,6 +101,7 @@ def __init__(
self.file_helper = file_helper
self.output_dir = output_dir
self.output_prefix = output_prefix
self.gen_netcdf = gen_netcdf
self.gen_csv = gen_csv

self.metadata_helper = MetadataHelper(
Expand Down Expand Up @@ -217,10 +222,14 @@ def process_day(self, date: str) -> Optional[ProcessDayResult]:
attrs=self._get_global_attributes(year, month, day),
)

generated_filenames = []
basename = f"{self.output_dir}/{self.output_prefix}{year:04}{month:02}{day:02}"
nc_filename = f"{basename}.nc"
save_dataset_to_netcdf(self.logger, ds_result, nc_filename)
generated_filenames = [nc_filename]

if self.gen_netcdf:
nc_filename = f"{basename}.nc"
save_dataset_to_netcdf(self.logger, ds_result, nc_filename)
generated_filenames.append(nc_filename)

if self.gen_csv:
csv_filename = f"{basename}.csv"
save_dataset_to_csv(self.logger, ds_result, csv_filename)
Expand Down

0 comments on commit 1af760a

Please sign in to comment.