Skip to content

Commit

Permalink
346 astrog datetime index instead of column (#347)
Browse files Browse the repository at this point in the history
* tryout for astrog_culminations and phases

* fixed for all other astrog functions and fixed example script

* fixed tests

* fixed futurewarning

* update whatsnew
  • Loading branch information
veenstrajelmer authored Sep 6, 2024
1 parent 75e63f8 commit d9f5d15
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 74 deletions.
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- flexible timezone support in astrog in [#333](https://github.com/Deltares/hatyan/pull/333)
- support for missing `comp` timezone attribute in `hatyan.prediction()` in [#335](https://github.com/Deltares/hatyan/pull/335)
- read status from diafile in `hatyan.read_dia()` and aligned status/qualitycode columns in `hatyan.ddlpy_to_hatyan()` in [#337](https://github.com/Deltares/hatyan/pull/337)
- astrog datetimes as index instead of column in [#347](https://github.com/Deltares/hatyan/pull/347)

### Fix
- repaired support for equidistant multiblock diafiles with varying timesteps in [#314](https://github.com/Deltares/hatyan/pull/314)
Expand Down
38 changes: 21 additions & 17 deletions hatyan/astrog.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ def astrog_culminations(tFirst, tLast, dT_fortran=False):
DEC = astrabOutput['DECMOO']

# make dataframe
astrog_df = pd.DataFrame({'datetime':CULTIM,'type':CULTYP,'parallax':PAR,'declination':DEC}) #CULTIM.round('S') decreases fortran reproduction
astrog_df = pd.DataFrame({'datetime':CULTIM, # CULTIM.round('S') decreases fortran reproduction
'type':CULTYP,'parallax':PAR,'declination':DEC})
astrog_df['type_str'] = astrog_df['type'].astype(str).replace('1','lowerculmination').replace('2','upperculmination')
astrog_df = astrog_df.set_index("datetime")

#set timezone, check datetime order and filter datetimerange
astrog_df = check_crop_dataframe(astrog_df, tFirst, tLast, tzone)
Expand Down Expand Up @@ -157,8 +159,9 @@ def astrog_phases(tFirst,tLast,dT_fortran=False):
FATIM = astrac(FAEST,dT_fortran=dT_fortran,mode=FATYP+2) + TIMDIF

# make dataframe
astrog_df = pd.DataFrame({'datetime':FATIM.round('s'),'type':FATYP})
astrog_df = pd.DataFrame({'datetime':FATIM.round('s'), 'type':FATYP})
astrog_df['type_str'] = astrog_df['type'].astype(str).replace('1','FQ').replace('2','FM').replace('3','LQ').replace('4','NM')
astrog_df = astrog_df.set_index("datetime")

#set timezone, check datetime order and filter datetimerange
astrog_df = check_crop_dataframe(astrog_df, tFirst, tLast, tzone)
Expand Down Expand Up @@ -220,6 +223,7 @@ def astrog_sunriseset(tFirst,tLast,dT_fortran=False,lon=5.3876,lat=52.1562):
astrog_df = pd.DataFrame({'datetime':np.concatenate((OPTIM.round('s'),ONTIM.round('s'))),'type':np.repeat([1,2],len(OPTIM))})
astrog_df = astrog_df.sort_values('datetime').reset_index(drop=True)
astrog_df['type_str'] = astrog_df['type'].astype(str).replace('1','sunrise').replace('2','sunset')
astrog_df = astrog_df.set_index("datetime")

#set timezone, check datetime order and filter datetimerange
astrog_df = check_crop_dataframe(astrog_df, tFirst, tLast, tzone)
Expand Down Expand Up @@ -293,6 +297,7 @@ def astrog_moonriseset(tFirst,tLast,dT_fortran=False,lon=5.3876,lat=52.1562):
astrog_df = pd.DataFrame({'datetime':np.concatenate((OPTIM.round('s'),ONTIM.round('s'))),'type':np.repeat([1,2],len(OPTIM))})
astrog_df = pd.DataFrame(astrog_df).sort_values('datetime').reset_index(drop=True)
astrog_df['type_str'] = astrog_df['type'].astype(str).replace('1','moonrise').replace('2','moonset')
astrog_df = astrog_df.set_index("datetime")

#set timezone, check datetime order and filter datetimerange
astrog_df = check_crop_dataframe(astrog_df, tFirst, tLast, tzone)
Expand Down Expand Up @@ -362,6 +367,7 @@ def astrog_anomalies(tFirst,tLast,dT_fortran=False):
# make dataframe
astrog_df = pd.DataFrame({'datetime':ANOTIM.round('s'),'type':ANOTYP})
astrog_df['type_str'] = astrog_df['type'].astype(str).replace('1','perigeum').replace('2','apogeum')
astrog_df = astrog_df.set_index("datetime")

#set timezone, check datetime order and filter datetimerange
astrog_df = check_crop_dataframe(astrog_df, tFirst, tLast, tzone)
Expand Down Expand Up @@ -413,6 +419,7 @@ def astrog_seasons(tFirst,tLast,dT_fortran=False):
# make dataframe
astrog_df = pd.DataFrame({'datetime':SEITIM.round('s'),'type':SEITYP})
astrog_df['type_str'] = astrog_df['type'].astype(str).replace('1','spring').replace('2','summer').replace('3','autumn').replace('4','winter')
astrog_df = astrog_df.set_index("datetime")

#set timezone, check datetime order and filter datetimerange
astrog_df = check_crop_dataframe(astrog_df, tFirst, tLast, tzone)
Expand Down Expand Up @@ -936,14 +943,13 @@ def dT(dateIn,dT_fortran=False):


def check_crop_dataframe(astrog_df, tFirst, tLast, tzone):

#set timezone, check datetime order and filter datetimerange
astrog_df['datetime'] = pd.to_datetime(astrog_df['datetime']).dt.tz_localize('UTC',ambiguous=False,nonexistent='shift_forward') # set timezone (UTC)
astrog_df['datetime'] = astrog_df['datetime'].dt.tz_convert(tzone) #convert timezone to tzone
if (np.diff(astrog_df.sort_values('datetime').index)!=1).any():
# check datetime order
if not astrog_df.index.is_monotonic_increasing:
raise Exception('something went wrong which resulted in off ordering of the dataframe')
astrog_df_dtnaive = astrog_df['datetime'].dt.tz_localize(None)
astrog_df = astrog_df[np.logical_and(astrog_df_dtnaive>=tFirst,astrog_df_dtnaive<=tLast)].reset_index(drop=True)
# crop on time range
astrog_df = astrog_df.loc[tFirst:tLast]
# set and convert timezone
astrog_df = astrog_df.tz_localize('UTC').tz_convert(tzone)
return astrog_df


Expand Down Expand Up @@ -988,13 +994,12 @@ def convert2perday(dataframeIn, timeformat='%H:%M %Z'):
"""

dataframeOut = dataframeIn.copy()
dataframeOut = dataframeIn.reset_index()
dataframeOut.index = dataframeOut['datetime'].dt.date
for type_sel in dataframeOut['type_str'].unique():
dataframeOut[type_sel] = dataframeOut['datetime'][dataframeOut['type_str']==type_sel].dt.strftime(timeformat)
dataframeOut.drop(['type','type_str'],axis='columns',inplace=True)
dataframeOut = dataframeOut.drop(['type','type_str','datetime'],axis='columns')
dataframeOut = dataframeOut[~dataframeOut.index.duplicated(keep='first')]
dataframeOut['datetime'] = dataframeOut.index #overwrite datetime with dates

return dataframeOut

Expand Down Expand Up @@ -1030,13 +1035,12 @@ def plot_astrog_diff(pd_python, pd_fortran, typeCol="type", typeUnit='-', typeLa
"""

if hasattr(pd_python['datetime'].dtype,'tz'):
pd_python = pd_python.copy() #do not overwrite original dataframe, so make a copy
pd_python['datetime'] = pd_python['datetime'].dt.tz_localize(None) #Passing None will remove the time zone information preserving local time. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_localize.html#pandas.Series.dt.tz_localize
if hasattr(pd_python.index,'tz'):
pd_python = pd_python.tz_localize(None) #Passing None will remove the time zone information preserving local time. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.tz_localize.html#pandas.Series.dt.tz_localize

# reset index
pd_python = pd_python.reset_index(drop=True)
pd_fortran = pd_fortran.reset_index(drop=True)
pd_python = pd_python.reset_index(drop=False)
pd_fortran = pd_fortran.reset_index(drop=False)

fig, (ax1,ax2,ax3) = plt.subplots(3,1,figsize=(15,9),sharex=True)
ax1.set_title('%s'%(typeCol))
Expand Down
40 changes: 21 additions & 19 deletions tests/examples/astrog_test.py → tests/examples/astrog_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,30 +80,33 @@
pkl_seas = os.path.join(dir_testdata,'other','astrog30_seasons_2000_2011.pkl')

culminations_fortran = pd.read_pickle(pkl_culm)
culminations_fortran = culminations_fortran[np.logical_and(culminations_fortran['datetime']>=start_date_naive,culminations_fortran['datetime']<=end_date_naive)].reset_index(drop=True)
culminations_fortran['datetime'] = pd.to_datetime(culminations_fortran['datetime'])
culminations_fortran = culminations_fortran.set_index('datetime')
culminations_fortran = culminations_fortran.loc[start_date_naive:end_date_naive]

phases_fortran = pd.read_pickle(pkl_phas)
phases_fortran = phases_fortran[np.logical_and(phases_fortran['datetime']>=start_date_naive,phases_fortran['datetime']<=end_date_naive)].reset_index(drop=True)
phases_fortran = pd.read_pickle(pkl_phas).set_index('datetime')
phases_fortran = phases_fortran.loc[start_date_naive:end_date_naive]

phases_long_fortran = pd.read_csv(txt_phas, sep=';', names=['date','time','type_str'], skiprows=1) # long time series 2021-2035 (Koos Doekes)
phases_long_fortran['datetime']=pd.to_datetime(phases_long_fortran['date'].astype(str)+phases_long_fortran['time'].astype(str).str.zfill(4))
phases_long_fortran['type'] = phases_long_fortran['type_str'].replace('EK',1).replace('VM',2).replace('LK',3).replace('NM',4)
phases_long_python = hatyan.astrog_phases(phases_long_fortran['datetime'].iloc[0]-dt.timedelta(days=5), phases_long_fortran['datetime'].iloc[-1]+dt.timedelta(days=5), dT_fortran=dT_fortran)
phases_long_python['datetime'] = phases_long_python['datetime'].dt.tz_convert(tz_EurAms) #convert to local timezone
phases_long_fortran['datetime'] = pd.to_datetime(phases_long_fortran['date'].astype(str)+phases_long_fortran['time'].astype(str).str.zfill(4))
phases_long_fortran = phases_long_fortran.set_index('datetime')
phases_long_fortran['type'] = phases_long_fortran['type_str'].str.replace('EK','1').str.replace('VM','2').str.replace('LK','3').str.replace('NM','4').astype(int)
phases_long_python = hatyan.astrog_phases(phases_long_fortran.index[0]-dt.timedelta(days=5), phases_long_fortran.index[-1]+dt.timedelta(days=5), dT_fortran=dT_fortran)
phases_long_python = phases_long_python.tz_convert(tz_EurAms) #convert to local timezone

moonriseset_fortran = pd.read_pickle(pkl_moon)
moonriseset_fortran = moonriseset_fortran[np.logical_and(moonriseset_fortran['datetime']>=start_date_naive,moonriseset_fortran['datetime']<=end_date_naive)]
moonriseset_fortran = pd.read_pickle(pkl_moon).set_index('datetime')
moonriseset_fortran = moonriseset_fortran.loc[start_date_naive:end_date_naive]

sunriseset_fortran = pd.read_pickle(pkl_sun)
sunriseset_fortran = sunriseset_fortran[np.logical_and(sunriseset_fortran['datetime']>=start_date_naive,sunriseset_fortran['datetime']<=end_date_naive)]
pyth_index = sunriseset_python['datetime'].dt.date.isin(sunriseset_fortran['datetime'].dt.date.unique())
sunriseset_python_somedays = sunriseset_python.loc[pyth_index].reset_index(drop=True)
sunriseset_fortran = pd.read_pickle(pkl_sun).set_index('datetime')
sunriseset_fortran = sunriseset_fortran.loc[start_date_naive:end_date_naive]
selected_dates = sunriseset_fortran.index.date
sunriseset_python_somedays = sunriseset_python.loc[pd.Index(sunriseset_python.index.date).isin(selected_dates)]

anomalies_fortran = pd.read_pickle(pkl_anom)
anomalies_fortran = anomalies_fortran[np.logical_and(anomalies_fortran['datetime']>=start_date_naive,anomalies_fortran['datetime']<=end_date_naive)].reset_index(drop=True)
anomalies_fortran = pd.read_pickle(pkl_anom).set_index('datetime')
anomalies_fortran = anomalies_fortran.loc[start_date_naive:end_date_naive]

seasons_fortran = pd.read_pickle(pkl_seas)
seasons_fortran = seasons_fortran[np.logical_and(seasons_fortran['datetime']>=start_date_naive,seasons_fortran['datetime']<=end_date_naive)].reset_index(drop=True)
seasons_fortran = pd.read_pickle(pkl_seas).set_index('datetime')
seasons_fortran = seasons_fortran.loc[start_date_naive:end_date_naive]

#%% plot results (differences)
fig, (ax1,ax2,ax3) = hatyan.plot_astrog_diff(culminations_python, culminations_fortran, typeLab=['lower','upper'], timeBand=[-.18,.18])
Expand All @@ -118,7 +121,7 @@
fig, (ax1,ax2,ax3) = hatyan.plot_astrog_diff(pd_python=phases_python, pd_fortran=phases_fortran, typeLab=['FQ','FM','LQ','NM'], timeBand=[-30,30])
fig.savefig('phase_differences.png')

fig, (ax1,ax2,ax3) = hatyan.plot_astrog_diff(pd_python=phases_long_python, pd_fortran=phases_long_fortran[['datetime','type']], typeLab=['FQ','FM','LQ','NM'], timeBand=[-30,30])
fig, (ax1,ax2,ax3) = hatyan.plot_astrog_diff(pd_python=phases_long_python, pd_fortran=phases_long_fortran[['type']], typeLab=['FQ','FM','LQ','NM'], timeBand=[-30,30])
fig.savefig('phase_differences_longperiod.png')

fig, (ax1,ax2,ax3) = hatyan.plot_astrog_diff(pd_python=moonriseset_python, pd_fortran=moonriseset_fortran, typeLab=['rise','set'], timeBand=[-30,30])
Expand All @@ -132,4 +135,3 @@

fig, (ax1,ax2,ax3) = hatyan.plot_astrog_diff(pd_python=seasons_python, pd_fortran=seasons_fortran, typeLab=['spring','summer','autumn','winter'], timeBand=[-30,30])
fig.savefig('season_differences.png')

75 changes: 37 additions & 38 deletions tests/test_astrog.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@ def test_astrog_culminations():
# moon culminations
culminations_python = hatyan.astrog_culminations(tFirst=start_date, tLast=end_date)

datetimes = culminations_python['datetime']
assert datetimes.dt.tz == dt.timezone.utc
assert datetimes.iloc[0] == pd.Timestamp('2020-01-01 04:44:14.732116514+0000')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-31 13:16:46.464269856+0000')
datetimes = culminations_python.index
assert datetimes.tz == dt.timezone.utc
assert datetimes[0] == pd.Timestamp('2020-01-01 04:44:14.732116514+0000')
assert datetimes[-1] == pd.Timestamp('2020-12-31 13:16:46.464269856+0000')

subset = culminations_python.iloc[:10]
assert subset["type"].tolist() == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
Expand Down Expand Up @@ -248,10 +248,10 @@ def test_astrog_culminations_met():
# moon culminations
culminations_python = hatyan.astrog_culminations(tFirst=start_date, tLast=end_date)

datetimes = culminations_python['datetime']
assert datetimes.dt.tz == dt.timezone(dt.timedelta(seconds=3600))
assert datetimes.iloc[0] == pd.Timestamp('2020-01-01 05:44:14.732115815+0100')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-31 14:16:46.464269693+0100')
datetimes = culminations_python.index
assert datetimes.tz == dt.timezone(dt.timedelta(seconds=3600))
assert datetimes[0] == pd.Timestamp('2020-01-01 05:44:14.732115815+0100')
assert datetimes[-1] == pd.Timestamp('2020-12-31 14:16:46.464269693+0100')


@pytest.mark.systemtest
Expand All @@ -261,10 +261,10 @@ def test_astrog_phases():
# lunar phases
phases_python = hatyan.astrog_phases(tFirst=start_date, tLast=end_date)

datetimes = phases_python['datetime']
assert datetimes.dt.tz == dt.timezone.utc
assert datetimes.iloc[0] == pd.Timestamp('2020-01-03 04:45:20+0000')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-30 03:28:07+0000')
datetimes = phases_python.index
assert datetimes.tz == dt.timezone.utc
assert datetimes[0] == pd.Timestamp('2020-01-03 04:45:20+0000')
assert datetimes[-1] == pd.Timestamp('2020-12-30 03:28:07+0000')

expected_type = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
Expand All @@ -289,15 +289,15 @@ def test_astrog_moonriseset():
moonriseset_python = hatyan.astrog_moonriseset(tFirst=start_date, tLast=end_date)
moonriseset_python_perday = hatyan.convert2perday(moonriseset_python)

datetimes = moonriseset_python['datetime']
assert datetimes.dt.tz == dt.timezone.utc
assert datetimes.iloc[0] == pd.Timestamp('2020-01-01 11:15:43+0000')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-31 16:46:52+0000')
datetimes = moonriseset_python.index
assert datetimes.tz == dt.timezone.utc
assert datetimes[0] == pd.Timestamp('2020-01-01 11:15:43+0000')
assert datetimes[-1] == pd.Timestamp('2020-12-31 16:46:52+0000')

datetimes_perday = pd.to_datetime(moonriseset_python_perday['datetime'])
assert datetimes_perday.dt.tz is None
assert datetimes_perday.iloc[0] == pd.Timestamp('2020-01-01 00:00:00')
assert datetimes_perday.iloc[-1] == pd.Timestamp('2020-12-31 00:00:00')
datetimes_perday = pd.to_datetime(moonriseset_python_perday.index)
assert datetimes_perday.tz is None
assert datetimes_perday[0] == pd.Timestamp('2020-01-01 00:00:00')
assert datetimes_perday[-1] == pd.Timestamp('2020-12-31 00:00:00')

subset = moonriseset_python.iloc[:10]
assert subset["type"].tolist() == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
Expand Down Expand Up @@ -339,15 +339,15 @@ def test_astrog_sunriseset():
sunriseset_python = hatyan.astrog_sunriseset(tFirst=start_date, tLast=end_date)
sunriseset_python_perday = hatyan.convert2perday(sunriseset_python)

datetimes = sunriseset_python['datetime']
assert datetimes.dt.tz == dt.timezone.utc
assert datetimes.iloc[0] == pd.Timestamp('2020-01-01 07:47:25+0000')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-31 15:36:02+0000')
datetimes = sunriseset_python.index
assert datetimes.tz == dt.timezone.utc
assert datetimes[0] == pd.Timestamp('2020-01-01 07:47:25+0000')
assert datetimes[-1] == pd.Timestamp('2020-12-31 15:36:02+0000')

datetimes_perday = pd.to_datetime(sunriseset_python_perday['datetime'])
assert datetimes_perday.dt.tz is None
assert datetimes_perday.iloc[0] == pd.Timestamp('2020-01-01 00:00:00')
assert datetimes_perday.iloc[-1] == pd.Timestamp('2020-12-31 00:00:00')
datetimes_perday = pd.to_datetime(sunriseset_python_perday.index)
assert datetimes_perday.tz is None
assert datetimes_perday[0] == pd.Timestamp('2020-01-01 00:00:00')
assert datetimes_perday[-1] == pd.Timestamp('2020-12-31 00:00:00')

subset = sunriseset_python.iloc[:10]
assert subset["type"].tolist() == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
Expand All @@ -369,10 +369,10 @@ def test_astrog_anomalies():
# lunar anomalies
anomalies_python = hatyan.astrog_anomalies(tFirst=start_date, tLast=end_date)

datetimes = anomalies_python['datetime']
assert datetimes.dt.tz == dt.timezone.utc
assert datetimes.iloc[0] == pd.Timestamp('2020-01-02 01:25:44+0000')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-24 16:48:18+0000')
datetimes = anomalies_python.index
assert datetimes.tz == dt.timezone.utc
assert datetimes[0] == pd.Timestamp('2020-01-02 01:25:44+0000')
assert datetimes[-1] == pd.Timestamp('2020-12-24 16:48:18+0000')

expected_type = [2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2,
1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
Expand All @@ -392,10 +392,10 @@ def test_astrog_seasons():
# astronomical seasons
seasons_python = hatyan.astrog_seasons(tFirst=start_date, tLast=end_date)

datetimes = seasons_python['datetime']
assert datetimes.dt.tz == dt.timezone.utc
assert datetimes.iloc[0] == pd.Timestamp('2020-03-20 03:49:46+0000')
assert datetimes.iloc[-1] == pd.Timestamp('2020-12-21 10:02:39+0000')
datetimes = seasons_python.index
assert datetimes.tz == dt.timezone.utc
assert datetimes[0] == pd.Timestamp('2020-03-20 03:49:46+0000')
assert datetimes[-1] == pd.Timestamp('2020-12-21 10:02:39+0000')

assert seasons_python['type'].tolist() == [1, 2, 3, 4]
assert seasons_python['type_str'].tolist() == ['spring', 'summer', 'autumn', 'winter']
Expand All @@ -407,7 +407,6 @@ def test_plot_astrog_diff():
end_date_utc = pd.Timestamp(2000, 4, 1, tz="UTC")

culminations_python = hatyan.astrog_culminations(tFirst=start_date_utc, tLast=end_date_utc)
culminations_python_naive = culminations_python.copy()
culminations_python_naive['datetime'] = culminations_python_naive['datetime'].dt.tz_localize(None)
culminations_python_naive = culminations_python.tz_localize(None).reset_index()

hatyan.plot_astrog_diff(culminations_python, culminations_python_naive, typeLab=['lower','upper'], timeBand=[-.18,.18])

0 comments on commit d9f5d15

Please sign in to comment.