Skip to content

Commit

Permalink
replace 9999 data values with nan (#143)
Browse files Browse the repository at this point in the history
* replace 9999 data values with nan

* add test for _remove_fill_values
  • Loading branch information
callumrollo authored Jan 17, 2023
1 parent f496be6 commit d3950ce
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
15 changes: 15 additions & 0 deletions pyglider/seaexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,20 @@ def _interp_pld_to_pld(pld, ds, val, indctd):
return val


def _remove_fill_values(df, fill_value=9999):
"""
For input dataframe df, this function converts all Float values equaling fill_values to null. Columns of other
datatypes are not affected.
"""
df = df.with_columns(
pl.when(pl.col(pl.Float64) == fill_value)
.then(None)
.otherwise(pl.col(pl.Float64))
.keep_name()
)
return df


def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
profile_filt_time=100, profile_min_time=300):
"""
Expand All @@ -303,6 +317,7 @@ def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw',
gli = pl.read_parquet(f'{indir}/{id}-rawgli.parquet')
_log.info(f'Opening combined payload file {indir}/{id}-{kind}pld.parquet')
sensor = pl.read_parquet(f'{indir}/{id}-{kind}pld.parquet')
sensor = _remove_fill_values(sensor)

# build a new data set based on info in `deploymentyaml.`
# We will use ctd as the interpolant
Expand Down
9 changes: 9 additions & 0 deletions tests/test_seaexplorer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import polars as pl
import numpy as np
import pytest
from pathlib import Path
import os
Expand Down Expand Up @@ -64,6 +65,14 @@ def test_merge_rawnc():
kind='sub')
assert result_default is False
assert result_sub is True


def test__remove_fill_values():
# This should convert values equallling 9999 in the original df to nan
df_in = pl.read_parquet('tests/data/realtime_rawnc/sea035.0012.pld1.sub.0036.parquet')
df_out = seaexplorer._remove_fill_values(df_in)
assert (df_in.select("GPCTD_DOF").to_numpy()[:, 0] == 9999).all()
assert np.isnan(df_out.select("GPCTD_DOF").to_numpy()[:, 0]).all()


def test__interp_gli_to_pld():
Expand Down

0 comments on commit d3950ce

Please sign in to comment.