added netcdf4_classic to example script and docs

Deltares · Aug 23, 2024 · abb0d02 · abb0d02
1 parent f2a504f
commit abb0d02
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 3 deletions.
diff --git a/ddlpy/utils.py b/ddlpy/utils.py
@@ -76,7 +76,11 @@ def dataframe_to_xarray(df: pd.DataFrame, drop_if_constant=[]):
     
     The timestamps are converted to UTC since xarray does not support non-UTC timestamps.
     These can be converted to different timezones after loading the netcdf and converting 
-    to a pandas dataframe with df.index.tz_convert()
+    to a pandas dataframe with df.index.tz_convert().
+
+    When writing the dataset to disk with ds.to_netcdf() it is recommended to use
+    `format="NETCDF3_CLASSIC"` or `format="NETCDF4_CLASSIC"` since this automatically
+    converts variables of dtype <U to |S which saves a lot of disk space for DDL data.
     """
 
     # create list of columns with duplicate info (often not constant), will be dropped

diff --git a/docs/examples/retrieve_parallel_to_netcdf.py b/docs/examples/retrieve_parallel_to_netcdf.py
@@ -39,8 +39,9 @@ def get_data(location, start_date, end_date, dir_output, overwrite=True):
     ds = simplified.to_xarray()
     ds = ds.assign_attrs(simplified.attrs)
 
-    # write to netcdf file    
-    ds.to_netcdf(filename)
+    # write to netcdf file. NETCDF3_CLASSIC or NETCDF4_CLASSIC automatically converts 
+    # variables of dtype <U to |S which saves a lot of disk space
+    ds.to_netcdf(filename, format="NETCDF4_CLASSIC")
 
 
 if ( __name__ == "__main__" ):