Skip to content

Commit

Permalink
FIX: IO Warning fixes. (#719)
Browse files Browse the repository at this point in the history
* FIX: Fixes for some warnings.

* FIX: Fix for more warnings.

* MNT: Remove final warnings.

* FIX: Add missing dtype conversion.

* FIX: Fix for datatype.

* MNT: Revert cftime call.
  • Loading branch information
zssherman authored Sep 29, 2023
1 parent 2d9d683 commit cf6ee7a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 20 deletions.
14 changes: 8 additions & 6 deletions act/io/armfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import tempfile
import warnings

from cftime import num2date
import numpy as np
import xarray as xr
import datetime as dt
Expand Down Expand Up @@ -177,8 +178,8 @@ def read_netcdf(
# If requested use base_time and time_offset to derive time. Assumes that the units
# of both are in seconds and that the value is number of seconds since epoch.
if use_base_time:
time = (ds['base_time'].values + ds['time_offset'].values) * 1000000.0
time = np.array(time, dtype='datetime64[us]')
time = num2date(ds['base_time'].values + ds['time_offset'].values, ds['base_time'].attrs['units'])
time = time.astype('datetime64[ns]')

# Need to use a new Dataset creation to correctly index time for use with
# .group and .resample methods in Xarray Datasets.
Expand All @@ -196,10 +197,8 @@ def read_netcdf(
# https://github.com/pydata/xarray/issues/3644
# To ensure the times are read in correctly need to set use_cftime=True.
# This will read in time as cftime object. But Xarray uses numpy datetime64
# natively. This will convert the cftime time values to numpy datetime64. cftime
# does not preserve the time past ms precision. We will use ms precision for
# the conversion.
desired_time_precision = 'datetime64[ms]'
# natively. This will convert the cftime time values to numpy datetime64.
desired_time_precision = 'datetime64[ns]'
for var_name in ['time', 'time_offset']:
try:
if 'time' in ds.dims and type(ds[var_name].values[0]).__module__.startswith('cftime.'):
Expand Down Expand Up @@ -762,6 +761,9 @@ def write_netcdf(
if 'history' in list(write_ds.attrs.keys()):
write_ds.attrs['history'] += ''.join(['\n', str(current_time), ' created by ACT ', str(act.__version__),
' act.io.write.write_netcdf'])

if hasattr(write_ds, 'time_bounds') and not write_ds.time.encoding:
write_ds.time.encoding.update(write_ds.time_bounds.encoding)

write_ds.to_netcdf(encoding=encoding, **kwargs)

Expand Down
22 changes: 11 additions & 11 deletions act/io/noaagml.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def read_gml_halo(filename, **kwargs):
elif var_name.endswith('min'):
min_name = var_name

timestamp = np.full(ds[var_names[0]].size, np.nan, dtype='datetime64[s]')
timestamp = np.full(ds[var_names[0]].size, np.nan, dtype="datetime64[ns]")
for ii in range(0, len(timestamp)):
if min_name is not None:
ts = datetime(
Expand All @@ -285,7 +285,7 @@ def read_gml_halo(filename, **kwargs):
ts = datetime(
ds[year_name].values[ii], ds[month_name].values[ii], 1)

timestamp[ii] = np.datetime64(ts)
timestamp[ii] = np.datetime64(ts, "ns")

for var_name in [year_name, month_name, day_name, hour_name, min_name]:
try:
Expand Down Expand Up @@ -421,7 +421,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs):
ds = act.io.csvfiles.read_csv(
filename, sep=r'\s+', skiprows=skiprows, **kwargs)

timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]")
for ii in range(0, len(timestamp)):
ts = datetime(
ds['year'].values[ii],
Expand All @@ -431,7 +431,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs):
ds['minute'].values[ii],
ds['second'].values[ii],
)
timestamp[ii] = np.datetime64(ts)
timestamp[ii] = np.datetime64(ts, "ns")

ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
Expand Down Expand Up @@ -542,15 +542,15 @@ def read_gml_ozone(filename=None, **kwargs):
filename, sep=r'\s+', skiprows=skiprows, **kwargs)
ds.attrs['station'] = str(ds['STN'].values[0]).lower()

timestamp = np.full(ds['YEAR'].size, np.nan, dtype='datetime64[s]')
timestamp = np.full(ds['YEAR'].size, np.nan, dtype="datetime64[ns]")
for ii in range(0, len(timestamp)):
ts = datetime(
ds['YEAR'].values[ii],
ds['MON'].values[ii],
ds['DAY'].values[ii],
ds['HR'].values[ii],
)
timestamp[ii] = np.datetime64(ts)
timestamp[ii] = np.datetime64(ts, "ns")

ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
Expand Down Expand Up @@ -820,7 +820,7 @@ def read_gml_radiation(filename=None, convert_missing=True,
)
ds.attrs['location'] = station

timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]")
for ii in range(0, len(timestamp)):
ts = datetime(
ds['year'].values[ii],
Expand All @@ -829,7 +829,7 @@ def read_gml_radiation(filename=None, convert_missing=True,
ds['hour'].values[ii],
ds['minute'].values[ii],
)
timestamp[ii] = np.datetime64(ts)
timestamp[ii] = np.datetime64(ts, "ns")

ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
Expand Down Expand Up @@ -999,7 +999,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs):
column_names=column_names.keys(), **kwargs)

if ds is not None:
timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
timestamp = np.full(ds['year'].size, np.nan, dtype="datetime64[ns]")
for ii in range(0, len(timestamp)):
if minutes:
ts = datetime(
Expand All @@ -1017,7 +1017,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs):
ds['hour'].values[ii],
)

timestamp[ii] = np.datetime64(ts)
timestamp[ii] = np.datetime64(ts, "ns")

ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
Expand Down Expand Up @@ -1080,7 +1080,7 @@ def read_surfrad(filename, **kwargs):
'relative_humidity', 'qc_relative_humidity', 'wind_speed', 'qc_wind_speed', 'wind_direction',
'qc_wind_direction', 'pressure', 'qc_pressure']
for i, f in enumerate(filename):
new_df = pd.read_csv(f, names=names, skiprows=2, delimiter='\s+', header=None)
new_df = pd.read_csv(f, names=names, skiprows=2, delimiter=r'\s+', header=None)
if i == 0:
df = new_df
else:
Expand Down
2 changes: 1 addition & 1 deletion act/io/noaapsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def read_psl_surface_met(filenames, conf_file=None):
minute = [int(str(ii)[3:]) for ii in hourmin]
minute = np.array(minute, dtype='timedelta64[m]')
time = time + day + hour + minute

time = time.astype("datetime64[ns]")
# Update Dataset to use "time" coordinate and assigned calculated times
ds = ds.assign_coords(index=time)
ds = ds.rename(index='time')
Expand Down
12 changes: 10 additions & 2 deletions act/io/sodar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import fsspec
import numpy as np
import pandas as pd
import xarray as xr

from act.io.noaapsl import filter_list

Expand Down Expand Up @@ -74,7 +75,7 @@ def read_mfas_sodar(filepath):
# tmp_columns is used to removed '#' column that causes
# columns to move over by one.
df = pd.read_table(filepath,
sep='\s+',
sep=r'\s+',
skiprows=skip_full_ind,
names=tmp_columns,
usecols=columns)
Expand Down Expand Up @@ -103,7 +104,14 @@ def read_mfas_sodar(filepath):

# Use unique time and height values to reindex data to be two dimensional.
ind = pd.MultiIndex.from_product((time_dim, height_dim), names=('time', 'height'))
ds = ds.assign(Dates=ind).unstack("Dates")

# Xarray 2023.9 contains new syntax, adding try and except for
# previous version.
try:
mindex_coords = xr.Coordinates.from_pandas_multiindex(ind, 'Dates')
ds = ds.assign_coords(mindex_coords).unstack("Dates")
except AttributeError:
ds = ds.assign(Dates=ind).unstack("Dates")

# Add file metadata.
for key in file_dict.keys():
Expand Down

0 comments on commit cf6ee7a

Please sign in to comment.