Skip to content

Commit

Permalink
Update DatastreamParser to accecpt pathlib.PosixPath (#809)
Browse files Browse the repository at this point in the history
* Changing time to ns precision to stop warning.

* Updated ValueError message to use correct term. Changing time to ns precision to stop warning

* Updated to accecpt pathlib.PosixPath. Updated to return expected values for site only and site and class only.

* Changed to use ns precision for time to stop warning. Adding more testing for DatastreamParser() and updated to consolidate testing code.

* Fix for linting

* Fix for linting2
  • Loading branch information
kenkehoe authored Mar 11, 2024
1 parent f8ca2f8 commit ef3ea10
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 24 deletions.
6 changes: 3 additions & 3 deletions act/qc/comparison_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ def compare_time_series_trends(
sum_diff = np.array([], dtype=float)
time_diff = np.array([], dtype=np.int32)
for tm_shift in range(-1 * time_shift, time_shift + int(time_step), int(time_step)):
self_da_shifted = self_da.assign_coords(
time=self_da.time.values.astype('datetime64[s]') + tm_shift
)
time = self_da.time.values.astype('datetime64[s]') + tm_shift
time = time.astype('datetime64[ns]')
self_da_shifted = self_da.assign_coords(time=time)

data_matched, comp_data_matched = xr.align(self_da, comp_da)
self_da_shifted = self_da_shifted.reindex(
Expand Down
14 changes: 8 additions & 6 deletions act/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pathlib import Path
import re
import requests
from os import PathLike

spec = importlib.util.find_spec('pyart')
if spec is not None:
Expand Down Expand Up @@ -176,11 +177,10 @@ def __init__(self, ds=''):
The datastream or filename to parse
'''

if isinstance(ds, str):
if isinstance(ds, (str, PathLike)):
self.__datastream = Path(ds).name
else:
raise ValueError('Datastream or filename name must be a string')
raise ValueError('Datastream or filename name must be a string or pathlib.PosixPath.')

try:
self.__parse_datastream()
Expand Down Expand Up @@ -237,15 +237,17 @@ def __parse_datastream(self):
match = True

if not match:
m = re.search(r'(^[a-z]{3})(\w+)$', tempstring[0])
m = re.search(r'(^[a-z]{3})([^A-Z]+)$', tempstring[0])
if m is not None:
self.__site = m.group(1)
self.__class = m.group(2)
match = True

if not match and len(tempstring[0]) == 3:
self.__site = tempstring[0]
match = True
m = re.search(r'(^[a-z]{3})', tempstring[0])
if m is not None:
self.__site = m.group(1)
match = True

if not match:
raise ValueError(self.__datastream)
Expand Down
3 changes: 2 additions & 1 deletion act/utils/datetime_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,15 @@ def adjust_timestamp(ds, time_bounds='time_bounds', align='left', offset=None):
for t in time_bounds
]
else:
raise ValueError('Align should be set to one of [left, right, middle]')
raise ValueError('Align should be set to one of [left, right, center]')

elif offset is not None:
time = ds['time'].values
time_start = [t + np.timedelta64(offset, 's') for t in time]
else:
raise ValueError('time_bounds variable is not available')

time_start = np.array(time_start).astype('datetime64[ns]')
ds = ds.assign_coords({'time': time_start})

return ds
72 changes: 58 additions & 14 deletions tests/utils/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from numpy.testing import assert_almost_equal
from contextlib import redirect_stdout
from io import StringIO
from pathlib import Path

import act
from act.utils.data_utils import DatastreamParserARM as DatastreamParser
Expand All @@ -20,7 +21,7 @@
def test_add_in_nan():
# Make a 1D array of 10 minute data
time = np.arange('2019-01-01T01:00', '2019-01-01T01:10', dtype='datetime64[m]')
time = time.astype('datetime64[us]')
time = time.astype('datetime64[ns]')
time = np.delete(time, range(3, 8))
data = np.linspace(0.0, 8.0, time.size)

Expand Down Expand Up @@ -347,10 +348,12 @@ def test_height_adjusted_pressure():


def test_datastreamparser():
pytest.raises(ValueError, DatastreamParser, 123)
test_values = [1234, 4321.0, True, ['sgpmetE13.b1'], ('sgpmetE13.b1',)]
for test_value in test_values:
pytest.raises(ValueError, DatastreamParser, test_value)

fn_obj = DatastreamParser()
pytest.raises(ValueError, fn_obj.set_datastream, None)
fn_obj = DatastreamParser()
pytest.raises(ValueError, fn_obj.set_datastream, test_values)

fn_obj = DatastreamParser()
assert fn_obj.site is None
Expand All @@ -373,6 +376,16 @@ def test_datastreamparser():
assert fn_obj.time == '024254'
assert fn_obj.ext == 'nc'

fn_obj = DatastreamParser(Path('/data/sgp/sgpmetE13.b1/sgpmetE13.b1.20190501.024254.nc'))
assert fn_obj.site == 'sgp'
assert fn_obj.datastream_class == 'met'
assert fn_obj.facility == 'E13'
assert fn_obj.level == 'b1'
assert fn_obj.datastream == 'sgpmetE13.b1'
assert fn_obj.date == '20190501'
assert fn_obj.time == '024254'
assert fn_obj.ext == 'nc'

fn_obj.set_datastream('nsatwrC1.a0.19991230.233451.cdf')
assert fn_obj.site == 'nsa'
assert fn_obj.datastream_class == 'twr'
Expand Down Expand Up @@ -403,46 +416,77 @@ def test_datastreamparser():
assert fn_obj.time is None
assert fn_obj.ext is None

fn_obj = DatastreamParser('sgpmetE13')
fn_obj = DatastreamParser(Path('sgpmetE13.b1'))
assert fn_obj.site == 'sgp'
assert fn_obj.datastream_class == 'met'
assert fn_obj.facility == 'E13'
assert fn_obj.level is None
assert fn_obj.datastream is None
assert fn_obj.level == 'b1'
assert fn_obj.datastream == 'sgpmetE13.b1'
assert fn_obj.date is None
assert fn_obj.time is None
assert fn_obj.ext is None

fn_obj = DatastreamParser('sgpmet')
fn_obj = DatastreamParser('sgpmetE13')
assert fn_obj.site == 'sgp'
assert fn_obj.datastream_class == 'met'
assert fn_obj.facility is None
assert fn_obj.facility == 'E13'
assert fn_obj.level is None
assert fn_obj.datastream is None
assert fn_obj.date is None
assert fn_obj.time is None
assert fn_obj.ext is None

fn_obj = DatastreamParser('sgp')
fn_obj = DatastreamParser('sgpmet')
assert fn_obj.site == 'sgp'
assert fn_obj.datastream_class is None
assert fn_obj.datastream_class == 'met'
assert fn_obj.facility is None
assert fn_obj.level is None
assert fn_obj.datastream is None
assert fn_obj.date is None
assert fn_obj.time is None
assert fn_obj.ext is None

fn_obj = DatastreamParser('sg')
assert fn_obj.site is None
fn_obj = DatastreamParser('sgp')
assert fn_obj.site == 'sgp'
assert fn_obj.datastream_class is None
assert fn_obj.facility is None
assert fn_obj.level is None
assert fn_obj.datastream is None
assert fn_obj.date is None
assert fn_obj.time is None
assert fn_obj.ext is None
del fn_obj

fn_obj = DatastreamParser(Path('zzzasoinfaoianasdfkansfaiZ99.s9.123456789.987654321.superlong'))
assert fn_obj.site == 'zzz'
assert fn_obj.datastream_class == 'asoinfaoianasdfkansfai'
assert fn_obj.facility == 'Z99'
assert fn_obj.level == 's9'
assert fn_obj.datastream == 'zzzasoinfaoianasdfkansfaiZ99.s9'
assert fn_obj.date == '123456789'
assert fn_obj.time == '987654321'
assert fn_obj.ext == 'superlong'

values = [
'',
' ',
'sg',
'SGP',
'SGPMETE13.B1',
Path('zzzasoinfaoianasdfkansfaiZ999.z1.123456789.987654321.superlong'),
Path('/data/not/a/real/path/AsgpmetE13.b1.20190501.024254.nc'),
'/data/not/a/real/path/AsgpmetE13.b1.20190501.024254.nc',
'zzzasoinfaoianasdfkansfaiZ999.z1.123456789.987654321.superlong',
]
for value in values:
fn_obj = DatastreamParser(value)
assert fn_obj.site is None
assert fn_obj.datastream_class is None
assert fn_obj.facility is None
assert fn_obj.level is None
assert fn_obj.datastream is None
assert fn_obj.date is None
assert fn_obj.time is None
assert fn_obj.ext is None


def test_arm_site_location_search():
Expand Down

0 comments on commit ef3ea10

Please sign in to comment.