Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
callumrollo committed Feb 23, 2023
2 parents bb24105 + 827f7f7 commit 6205c43
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 29 deletions.
10 changes: 5 additions & 5 deletions pyglider/ncprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,19 @@ def extract_timeseries_profiles(inname, outdir, deploymentyaml):

# outname = outdir + '/' + utils.get_file_id(dss) + '.nc'
_log.info('Writing %s', outname)
if 'units' in dss.profile_time.attrs:
dss.profile_time.attrs.pop('units')
timeunits = 'seconds since 1970-01-01T00:00:00Z'
timecalendar = 'gregorian'
dss.to_netcdf(outname, encoding={'time': {'units': timeunits,
'calendar': timecalendar}})
'calendar': timecalendar},
'profile_time':
{'units': timeunits}})

# add traj_strlen using bare ntcdf to make IOOS happy
with netCDF4.Dataset(outname, 'r+') as nc:
nc.renameDimension('string%d' % trajlen, 'traj_strlen')


def make_gridfiles(inname, outdir, deploymentyaml, dz=1):
def make_gridfiles(inname, outdir, deploymentyaml, *, fnamesuffix='', dz=1):
"""
Turn a timeseries netCDF file into a vertically gridded netCDF.
Expand Down Expand Up @@ -241,7 +241,7 @@ def make_gridfiles(inname, outdir, deploymentyaml, dz=1):
'water_velocity_northward'])
dsout.attrs = ds.attrs

outname = outdir + '/' + ds.attrs['deployment_name'] + '_grid.nc'
outname = outdir + '/' + ds.attrs['deployment_name'] + '_grid' + fnamesuffix + '.nc'
_log.info('Writing %s', outname)
timeunits = 'seconds since 1970-01-01T00:00:00Z'
dsout.to_netcdf(outname, encoding={'time': {'units': timeunits}})
Expand Down
46 changes: 25 additions & 21 deletions pyglider/seaexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,22 @@ def _interp_pld_to_pld(pld, ds, val, indctd):
return val


def _remove_fill_values(df, fill_value=9999):
"""
For input dataframe df, this function converts all Float values equaling fill_values to null. Columns of other
datatypes are not affected.
"""
df = df.with_columns(
pl.when(pl.col(pl.Float64) == fill_value)
.then(None)
.otherwise(pl.col(pl.Float64))
.keep_name()
)
return df


def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
profile_filt_time=100, profile_min_time=300, interpolate=False):
profile_filt_time=100, profile_min_time=300, interpolate=False, fnamesuffix=''):
"""
A little different than above, for the 4-file version of the data set.
"""
Expand All @@ -303,6 +317,7 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
gli = pl.read_parquet(f'{indir}/{id}-rawgli.parquet')
_log.info(f'Opening combined payload file {indir}/{id}-{kind}pld.parquet')
sensor = pl.read_parquet(f'{indir}/{id}-{kind}pld.parquet')
sensor = _remove_fill_values(sensor)

# build a new data set based on info in `deploymentyaml.`
# We will use ctd as the interpolant
Expand All @@ -313,25 +328,14 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
if atts != 'coordinates':
attr[atts] = ncvar[name][atts]

# If present, use the timebase specified in ncva: timebase in the
# deployment yaml. Otherwise, the ctd will be our timebase.
# It oversamples the nav data, but mildly undersamples the optics and
# oxygen....
if 'timebase' in ncvar:
vals = sensor.select([ncvar['timebase']['source']]).to_numpy()[:, 0]
indctd = np.where(~np.isnan(vals))[0]
elif 'GPCTD_TEMPERATURE' in list(sensor.variables):
_log.warning('No timebase specified. Using GPCTD_TEMPERATURE as time'
'base')
indctd = np.where(~np.isnan(sensor.GPCTD_TEMPERATURE))[0]
elif 'LEGATO_TEMPERATURE' in list(sensor.variables):
_log.warning('No timebase specified. Using LEGATO_TEMPERATURE as time'
'base')
indctd = np.where(~np.isnan(sensor.LEGATO_TEMPERATURE))[0]
else:
_log.warning('No gpctd or legato data found. Using NAV_DEPTH as time'
'base')
indctd = np.where(~np.isnan(sensor.NAV_DEPTH))[0]
# If present, use the timebase specified in ncvar: timebase in the
# deployment yaml.
if 'timebase' not in ncvar:
raise ValueError("Must specify timebase:source in netcdf_variables section of deployment yaml")
if ncvar['timebase']['source'] not in sensor.columns:
raise ValueError(f"timebase source: {ncvar['timebase']['source']} not found in pld1 columns")
vals = sensor.select([ncvar['timebase']['source']]).to_numpy()[:, 0]
indctd = np.where(~np.isnan(vals))[0]
ds['time'] = (('time'), sensor.select('time').to_numpy()[indctd, 0], attr)
thenames = list(ncvar.keys())
# Check yaml to see if interpolate has been set to True
Expand Down Expand Up @@ -473,7 +477,7 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
except:
pass
id0 = ds.attrs['deployment_name']
outname = outdir + id0 + '.nc'
outname = outdir + id0 + fnamesuffix + '.nc'
_log.info('writing %s', outname)
if 'units' in ds.time.attrs.keys():
ds.time.attrs.pop('units')
Expand Down
7 changes: 4 additions & 3 deletions pyglider/slocum.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,8 +789,9 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, *,


def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
search='*.[D|E]BD', time_base='sci_water_temp',
profile_filt_time=100, profile_min_time=300):
search='*.[D|E]BD', fnamesuffix='',
time_base='sci_water_temp', profile_filt_time=100,
profile_min_time=300):
"""
Convert directly from binary files to netcdf timeseries file. Requires
dbdreader to be installed.
Expand Down Expand Up @@ -934,7 +935,7 @@ def binary_to_timeseries(indir, cachedir, outdir, deploymentyaml, *,
os.mkdir(outdir)
except:
pass
outname = (outdir + '/' + ds.attrs['deployment_name'] + '.nc')
outname = (outdir + '/' + ds.attrs['deployment_name'] + fnamesuffix + '.nc')
_log.info('writing %s', outname)
ds.to_netcdf(outname, 'w',
encoding={'time': {'units': 'seconds since 1970-01-01T00:00:00Z'}})
Expand Down
33 changes: 33 additions & 0 deletions tests/test_seaexplorer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import polars as pl
import numpy as np
import pytest
from pathlib import Path
import os
import yaml
os.system('rm tests/data/realtime_rawnc/*')
library_dir = Path(__file__).parent.parent.absolute()
example_dir = library_dir / 'tests/example-data/'
Expand Down Expand Up @@ -63,6 +65,14 @@ def test_merge_rawnc():
kind='sub')
assert result_default is False
assert result_sub is True


def test__remove_fill_values():
# This should convert values equallling 9999 in the original df to nan
df_in = pl.read_parquet('tests/data/realtime_rawnc/sea035.0012.pld1.sub.0036.parquet')
df_out = seaexplorer._remove_fill_values(df_in)
assert (df_in.select("GPCTD_DOF").to_numpy()[:, 0] == 9999).all()
assert np.isnan(df_out.select("GPCTD_DOF").to_numpy()[:, 0]).all()


def test__interp_gli_to_pld():
Expand All @@ -88,3 +98,26 @@ def test_raw_to_timeseries():
assert 'No such file or directory' in str(missing_file_exc)
assert result_sub == 'tests/data/l0-profiles/dfo-eva035-20190718.nc'


def test_missing_bad_timebase():
# Prepare yaml files with bad timebase and no timebase
with open(example_dir / 'example-seaexplorer/deploymentRealtime.yml') as fin:
deployment = yaml.safe_load(fin)
deployment['netcdf_variables']['timebase']['source'] = "non existing sensor"
with open(example_dir / 'example-seaexplorer/bad_timebase.yml', "w") as fin:
yaml.dump(deployment, fin)
deployment['netcdf_variables'].pop('timebase')
with open(example_dir / 'example-seaexplorer/no_timebase.yml', "w") as fin:
yaml.dump(deployment, fin)
with pytest.raises(ValueError) as bad_timebase_exc:
result_bad_timebase = seaexplorer.raw_to_timeseries('tests/data/realtime_rawnc/',
'tests/data/l0-profiles/',
example_dir / 'example-seaexplorer/bad_timebase.yml',
kind='sub')
with pytest.raises(ValueError) as no_timebase_exc:
result_no_timebase = seaexplorer.raw_to_timeseries('tests/data/realtime_rawnc/',
'tests/data/l0-profiles/',
example_dir / 'example-seaexplorer/no_timebase.yml',
kind='sub')
assert "sensor not found in pld1 columns" in str(bad_timebase_exc)
assert "Must specify timebase" in str(no_timebase_exc)

0 comments on commit 6205c43

Please sign in to comment.