Skip to content

Commit

Permalink
new method to check a float string #244 (#245)
Browse files Browse the repository at this point in the history
  • Loading branch information
TatianaBurek authored Dec 6, 2022
1 parent 13e8809 commit a455262
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 33 deletions.
4 changes: 3 additions & 1 deletion metcalcpy/agg_eclv.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from metcalcpy.bootstrap import bootstrap_and_value, BootstrapResults
from metcalcpy.event_equalize import event_equalize
from metcalcpy.util.utils import PRECISION
from metcalcpy.util.utils import PRECISION, is_string_strictly_float
from metcalcpy.util.eclv_statistics import *

from metcalcpy.util.utils import is_string_integer, parse_bool
Expand Down Expand Up @@ -239,6 +239,8 @@ def _proceed_with_axis(self):
for i, filter_val in enumerate(filter_list):
if is_string_integer(filter_val):
filter_list[i] = int(filter_val)
elif is_string_strictly_float(filter_val):
filter_list[i] = float(filter_val)
if field in self.input_data.keys():
all_filters.append((self.input_data[field].isin(filter_list)))

Expand Down
13 changes: 8 additions & 5 deletions metcalcpy/agg_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@

from metcalcpy.util.utils import is_string_integer, get_derived_curve_name, \
calc_derived_curve_value, intersection, is_derived_point, parse_bool, \
OPERATION_TO_SIGN, perfect_score_adjustment, perform_event_equalization,\
aggregate_field_values, sort_data, DerivedCurveComponent
OPERATION_TO_SIGN, perfect_score_adjustment, perform_event_equalization, \
aggregate_field_values, sort_data, DerivedCurveComponent, is_string_strictly_float

__author__ = 'Tatiana Burek'

Expand Down Expand Up @@ -1000,22 +1000,25 @@ def _proceed_with_axis(self, axis="1"):
for i, filter_val in enumerate(filter_list):
if is_string_integer(filter_val):
filter_list[i] = int(filter_val)
elif is_string_strictly_float(filter_val):
filter_list[i] = float(filter_val)
if field in self.input_data.keys():
if field != self.params['indy_var']: #
filters_wihtout_indy. \
append((self.input_data[field].isin(filter_list)))
else:
indy_val = filter_value

all_filters.append((self.input_data[field].isin(filter_list)))
all_filters.append(self.input_data[field].isin(filter_list))
if field in series_val.keys():
all_filters_pct.append((self.input_data[field].isin(filter_list)))

# add fcst var
fcst_var = None
if len(self.params['fcst_var_val_' + axis]) > 0 and 'fcst_var' in self.input_data.columns:
if len(self.params['fcst_var_val_' + axis]) > 0:
fcst_var = list(self.params['fcst_var_val_' + axis].keys())[0]
all_filters.append((self.input_data['fcst_var'].isin([fcst_var])))
if 'fcst_var' in self.input_data.columns:
all_filters.append((self.input_data['fcst_var'].isin([fcst_var])))

# use numpy to select the rows where any record evaluates to True
mask = np.array(all_filters).all(axis=0)
Expand Down
9 changes: 8 additions & 1 deletion metcalcpy/agg_stat_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from metcalcpy.util.mode_2d_ratio_statistics import *
from metcalcpy.util.mode_3d_volrat_statistics import *
from metcalcpy.util.mode_3d_ratio_statistics import *
from metcalcpy.util.utils import is_string_integer, parse_bool, sort_data
from metcalcpy.util.utils import is_string_integer, parse_bool, sort_data, is_string_strictly_float


class AggStatBootstrap:
Expand Down Expand Up @@ -128,6 +128,9 @@ def _proceed_with_axis(self, axis="1"):
if is_string_integer(indy_val):
filtered_by_indy_data = \
self.input_data[self.input_data[self.params['indy_var']] == int(indy_val)]
elif is_string_strictly_float(indy_val):
filtered_by_indy_data = \
self.input_data[self.input_data[self.params['indy_var']] == float(indy_val)]
else:
filtered_by_indy_data = \
self.input_data[self.input_data[self.params['indy_var']] == indy_val]
Expand All @@ -149,6 +152,8 @@ def _proceed_with_axis(self, axis="1"):
for i, filter_val in enumerate(filter_list):
if is_string_integer(filter_val):
filter_list[i] = int(filter_val)
elif is_string_strictly_float(filter_val):
filter_list[i] = float(filter_val)

all_filters.append((filtered_by_indy_data[field].isin(filter_list)))

Expand Down Expand Up @@ -183,6 +188,8 @@ def _proceed_with_axis(self, axis="1"):
for i, filter_val in enumerate(filter_list):
if is_string_integer(filter_val):
filter_list[i] = int(filter_val)
elif is_string_strictly_float(filter_val):
filter_list[i] = float(filter_val)

all_filters.append((filtered_by_indy_data[field].isin(filter_list)))
out_frame_filter.append((out_frame[field].isin(filter_list)))
Expand Down
4 changes: 3 additions & 1 deletion metcalcpy/scorecard.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

from metcalcpy.util.utils import intersection, get_derived_curve_name, \
is_derived_point, is_string_integer, OPERATION_TO_SIGN, calc_derived_curve_value, \
perfect_score_adjustment, sort_data, PRECISION, DerivedCurveComponent
perfect_score_adjustment, sort_data, PRECISION, DerivedCurveComponent, is_string_strictly_float

COLUMNS_TO_REMOVE = ['equalize', 'stat_ncl', 'stat_ncu', 'stat_bcl', 'stat_bcu', 'fcst_valid_beg', 'fcst_init_beg']

Expand Down Expand Up @@ -151,6 +151,8 @@ def calculate_scorecard_data(self):
for i, filter_val in enumerate(filter_list):
if is_string_integer(filter_val):
filter_list[i] = int(filter_val)
elif is_string_strictly_float(filter_val):
filter_list[i] = float(filter_val)
if field in self.input_data.keys():
if field != self.params['indy_var']:
filters_wihtout_indy.append((self.input_data[field].isin(filter_list)))
Expand Down
4 changes: 3 additions & 1 deletion metcalcpy/sum_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
from metcalcpy.util.rps_statistics import *

from metcalcpy.util.utils import is_string_integer, parse_bool, \
aggregate_field_values, perform_event_equalization
aggregate_field_values, perform_event_equalization, is_string_strictly_float


class SumStat:
Expand Down Expand Up @@ -217,6 +217,8 @@ def aggregate_special_fields(self, axis='1'):
for ind, val in enumerate(actual_series_vals):
if is_string_integer(val):
actual_series_vals[ind] = int(val)
elif is_string_strictly_float(val):
actual_series_vals[ind] = float(val)
point_data = \
point_data[point_data[series_vars[index]].isin(actual_series_vals)]

Expand Down
41 changes: 29 additions & 12 deletions metcalcpy/util/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

__author__ = 'Tatiana Burek'


import warnings
# To deal with third-party warnings
from typing import Union
Expand Down Expand Up @@ -97,6 +96,23 @@ def is_string_integer(str_int):
return False


def is_string_strictly_float(str_float) -> bool:
"""Checks if the input string is strictly float.
Args:
str_float: string value to check
Returns:
True - if the input value is strictly float
False - if the input value is not strictly float
"""
if str_float is None:
return False
if str_float.startswith('-'):
str_float = str_float[1:]
return '.' in str_float and str_float.replace('.', '', 1).isdecimal()


def get_derived_curve_name(list_of_names):
"""Creates the derived series name from the list of series name components
Expand Down Expand Up @@ -411,7 +427,8 @@ def perfect_score_adjustment(mean_stats_1, mean_stats_2, statistic, pval):
and abs(mean_stats_1 - 1) > abs(mean_stats_2 - 1):
result = pval * -1
else:
print(f"WARNING: statistic {statistic} doesn't belong to any of the perfect score groups. Returning unprocessed p-value")
print(
f"WARNING: statistic {statistic} doesn't belong to any of the perfect score groups. Returning unprocessed p-value")
result = pval

return result
Expand Down Expand Up @@ -690,19 +707,19 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi
output_ee_data = output_ee_data.append(series_data_after_ee)

try:
output_ee_data_valid = output_ee_data.drop('equalize', axis=1)
output_ee_data_valid = output_ee_data.drop('equalize', axis=1)

# It is possible to produce an empty data frame after applying event equalization. Print an informational
# message before returning the data frame.
if output_ee_data_valid.empty:
print(f"\nINFO: Event equalization has produced no results. Data frame is empty.")
# It is possible to produce an empty data frame after applying event equalization. Print an informational
# message before returning the data frame.
if output_ee_data_valid.empty:
print(f"\nINFO: Event equalization has produced no results. Data frame is empty.")

return output_ee_data_valid
return output_ee_data_valid
except (KeyError, AttributeError):
# Two possible exceptions are raised when the data frame is empty *and* is missing the 'equalize' column
# following event equalization. Return the empty dataframe
# without dropping the 'equalize' column, and print an informational message.
print(f"\nINFO: No resulting data after performing event equalization of axis", axis)
# Two possible exceptions are raised when the data frame is empty *and* is missing the 'equalize' column
# following event equalization. Return the empty dataframe
# without dropping the 'equalize' column, and print an informational message.
print(f"\nINFO: No resulting data after performing event equalization of axis", axis)

return output_ee_data

Expand Down
22 changes: 11 additions & 11 deletions test/data/agg_stat_and_boot_output.data
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
model fcst_lead stat_name fcst_var stat_value stat_btcl stat_btcu nstats
ENS001v3.6.1_d01 0 FBAR DPT 192.1303904 192.1271511 192.137578 4
ENS001v3.6.1_d01 0 FBAR DPT 192.1303904 192.1263071 192.1363725175 4
ENS001v3.6.1_d01 30000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d01 60000 FBAR DPT 192.1304275 192.1262001 192.1365529 4
ENS001v3.6.1_d01 90000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d01 120000 FBAR DPT 192.1333718 192.1280098 192.1371883 4
ENS001v3.6.1_d01 120000 FBAR DPT 192.1333718 192.12905488 192.13719857 4
ENS001v3.6.1_d01 150000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d01 180000 FBAR DPT 192.1327699 192.12962736 192.1364473 4
ENS001v3.6.1_d01 180000 FBAR DPT 192.1327699 192.1290291 192.13659429749998 4
ENS001v3.6.1_d01 210000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d01 240000 FBAR DPT 192.1305292 192.1272196 192.13256661 4
ENS001v3.6.1_d01 240000 FBAR DPT 192.1305292 192.128156 192.1335303 4
ENS001v3.6.1_d02 0 FBAR DPT 192.1247886 192.12478 192.1248 4
ENS001v3.6.1_d02 30000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d02 60000 FBAR DPT 192.1268052 192.12478 192.1303239 4
ENS001v3.6.1_d02 90000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d02 120000 FBAR DPT 192.12478 NA NA 4
ENS001v3.6.1_d02 150000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d02 180000 FBAR DPT 192.12494 192.12478 192.1251057 4
ENS001v3.6.1_d02 180000 FBAR DPT 192.12494 192.12478 192.1251308 4
ENS001v3.6.1_d02 210000 FBAR DPT NA NA NA 0
ENS001v3.6.1_d02 240000 FBAR DPT 192.1247876 192.12478 192.1247933825 4
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 0 FBAR DPT 0.005601800000022195 0.0020097525000089436 0.010839152500015797 0
ENS001v3.6.1_d02 240000 FBAR DPT 192.1247876 192.12478 192.1247971 4
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 0 FBAR DPT 0.005601800000022195 0.002022500000009586 0.010861507500016642 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 30000 FBAR DPT NA NA NA 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 60000 FBAR DPT 0.003622299999989309 -0.0014125999999805572 0.010593762500005955 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 60000 FBAR DPT 0.003622299999989309 -0.0031821999999976924 0.01176620000001094 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 90000 FBAR DPT NA NA NA 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 120000 FBAR DPT 0.008591800000004923 0.005628925000024765 0.012408300000004147 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 120000 FBAR DPT 0.008591800000004923 0.004274880000019011 0.011897100000027194 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 150000 FBAR DPT NA NA NA 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 180000 FBAR DPT 0.007829899999990175 0.004543457499984527 0.011667299999999159 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 180000 FBAR DPT 0.007829899999990175 0.0037788300000066504 0.010786800000005314 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 210000 FBAR DPT NA NA NA 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 240000 FBAR DPT 0.005741600000021663 0.0033588999999949465 0.008660210000002876 0
DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 240000 FBAR DPT 0.005741600000021663 0.0029691999999954533 0.008847600000024158 0
11 changes: 10 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from metcalcpy.util.utils import represents_int, is_string_integer, get_derived_curve_name, calc_derived_curve_value, \
unique, intersection, is_derived_point, parse_bool, round_half_up, sum_column_data_by_name, \
nrow_column_data_by_name_value, create_permutations_mv, column_data_by_name, calculate_mtd_revision_stats, \
autocor_coef
autocor_coef, is_string_strictly_float


@pytest.fixture
Expand Down Expand Up @@ -39,6 +39,14 @@ def test_is_string_integer():
assert not is_string_integer(None)


def test_is_string_float():
assert not is_string_strictly_float("1")
assert is_string_strictly_float("1.5")
assert not is_string_strictly_float("not_float")
assert not is_string_strictly_float("not.float")
assert not is_string_strictly_float(None)


def test_get_derived_curve_name():
list_of_names = ['analog_e GHI FBAR', 'dicast15 GHI FBAR']
assert 'DIFF(analog_e GHI FBAR-dicast15 GHI FBAR)' == get_derived_curve_name(list_of_names)
Expand Down Expand Up @@ -202,6 +210,7 @@ def test_autocor_coef():
if __name__ == "__main__":
test_represents_int()
test_is_string_integer()
test_is_string_float()
test_get_derived_curve_name()
test_calc_derived_curve_value()
test_unique()
Expand Down

0 comments on commit a455262

Please sign in to comment.