From 70cf91ce679d695a9fa00d6b194f1c5c7b6d3d99 Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 5 Dec 2022 12:01:45 -0700 Subject: [PATCH] new method to check a float string #244 --- metcalcpy/agg_eclv.py | 4 ++- metcalcpy/agg_stat.py | 13 +++++--- metcalcpy/agg_stat_bootstrap.py | 9 +++++- metcalcpy/scorecard.py | 4 ++- metcalcpy/sum_stat.py | 4 ++- metcalcpy/util/utils.py | 41 +++++++++++++++++-------- test/data/agg_stat_and_boot_output.data | 22 ++++++------- test/test_utils.py | 11 ++++++- 8 files changed, 75 insertions(+), 33 deletions(-) diff --git a/metcalcpy/agg_eclv.py b/metcalcpy/agg_eclv.py index 2bcb73b6..eed39ebd 100644 --- a/metcalcpy/agg_eclv.py +++ b/metcalcpy/agg_eclv.py @@ -40,7 +40,7 @@ from metcalcpy.bootstrap import bootstrap_and_value, BootstrapResults from metcalcpy.event_equalize import event_equalize -from metcalcpy.util.utils import PRECISION +from metcalcpy.util.utils import PRECISION, is_string_strictly_float from metcalcpy.util.eclv_statistics import * from metcalcpy.util.utils import is_string_integer, parse_bool @@ -239,6 +239,8 @@ def _proceed_with_axis(self): for i, filter_val in enumerate(filter_list): if is_string_integer(filter_val): filter_list[i] = int(filter_val) + elif is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) if field in self.input_data.keys(): all_filters.append((self.input_data[field].isin(filter_list))) diff --git a/metcalcpy/agg_stat.py b/metcalcpy/agg_stat.py index c193f192..06645851 100644 --- a/metcalcpy/agg_stat.py +++ b/metcalcpy/agg_stat.py @@ -57,8 +57,8 @@ from metcalcpy.util.utils import is_string_integer, get_derived_curve_name, \ calc_derived_curve_value, intersection, is_derived_point, parse_bool, \ - OPERATION_TO_SIGN, perfect_score_adjustment, perform_event_equalization,\ - aggregate_field_values, sort_data, DerivedCurveComponent + OPERATION_TO_SIGN, perfect_score_adjustment, perform_event_equalization, \ + aggregate_field_values, sort_data, DerivedCurveComponent, is_string_strictly_float __author__ = 'Tatiana Burek' @@ -1000,6 +1000,8 @@ def _proceed_with_axis(self, axis="1"): for i, filter_val in enumerate(filter_list): if is_string_integer(filter_val): filter_list[i] = int(filter_val) + elif is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) if field in self.input_data.keys(): if field != self.params['indy_var']: # filters_wihtout_indy. \ @@ -1007,15 +1009,16 @@ def _proceed_with_axis(self, axis="1"): else: indy_val = filter_value - all_filters.append((self.input_data[field].isin(filter_list))) + all_filters.append(self.input_data[field].isin(filter_list)) if field in series_val.keys(): all_filters_pct.append((self.input_data[field].isin(filter_list))) # add fcst var fcst_var = None - if len(self.params['fcst_var_val_' + axis]) > 0 and 'fcst_var' in self.input_data.columns: + if len(self.params['fcst_var_val_' + axis]) > 0: fcst_var = list(self.params['fcst_var_val_' + axis].keys())[0] - all_filters.append((self.input_data['fcst_var'].isin([fcst_var]))) + if 'fcst_var' in self.input_data.columns: + all_filters.append((self.input_data['fcst_var'].isin([fcst_var]))) # use numpy to select the rows where any record evaluates to True mask = np.array(all_filters).all(axis=0) diff --git a/metcalcpy/agg_stat_bootstrap.py b/metcalcpy/agg_stat_bootstrap.py index b5adb50c..9346c2bf 100644 --- a/metcalcpy/agg_stat_bootstrap.py +++ b/metcalcpy/agg_stat_bootstrap.py @@ -48,7 +48,7 @@ from metcalcpy.util.mode_2d_ratio_statistics import * from metcalcpy.util.mode_3d_volrat_statistics import * from metcalcpy.util.mode_3d_ratio_statistics import * -from metcalcpy.util.utils import is_string_integer, parse_bool, sort_data +from metcalcpy.util.utils import is_string_integer, parse_bool, sort_data, is_string_strictly_float class AggStatBootstrap: @@ -128,6 +128,9 @@ def _proceed_with_axis(self, axis="1"): if is_string_integer(indy_val): filtered_by_indy_data = \ self.input_data[self.input_data[self.params['indy_var']] == int(indy_val)] + elif is_string_strictly_float(indy_val): + filtered_by_indy_data = \ + self.input_data[self.input_data[self.params['indy_var']] == float(indy_val)] else: filtered_by_indy_data = \ self.input_data[self.input_data[self.params['indy_var']] == indy_val] @@ -149,6 +152,8 @@ def _proceed_with_axis(self, axis="1"): for i, filter_val in enumerate(filter_list): if is_string_integer(filter_val): filter_list[i] = int(filter_val) + elif is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) all_filters.append((filtered_by_indy_data[field].isin(filter_list))) @@ -183,6 +188,8 @@ def _proceed_with_axis(self, axis="1"): for i, filter_val in enumerate(filter_list): if is_string_integer(filter_val): filter_list[i] = int(filter_val) + elif is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) all_filters.append((filtered_by_indy_data[field].isin(filter_list))) out_frame_filter.append((out_frame[field].isin(filter_list))) diff --git a/metcalcpy/scorecard.py b/metcalcpy/scorecard.py index 54413bc0..bbcedcfa 100644 --- a/metcalcpy/scorecard.py +++ b/metcalcpy/scorecard.py @@ -51,7 +51,7 @@ from metcalcpy.util.utils import intersection, get_derived_curve_name, \ is_derived_point, is_string_integer, OPERATION_TO_SIGN, calc_derived_curve_value, \ - perfect_score_adjustment, sort_data, PRECISION, DerivedCurveComponent + perfect_score_adjustment, sort_data, PRECISION, DerivedCurveComponent, is_string_strictly_float COLUMNS_TO_REMOVE = ['equalize', 'stat_ncl', 'stat_ncu', 'stat_bcl', 'stat_bcu', 'fcst_valid_beg', 'fcst_init_beg'] @@ -151,6 +151,8 @@ def calculate_scorecard_data(self): for i, filter_val in enumerate(filter_list): if is_string_integer(filter_val): filter_list[i] = int(filter_val) + elif is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) if field in self.input_data.keys(): if field != self.params['indy_var']: filters_wihtout_indy.append((self.input_data[field].isin(filter_list))) diff --git a/metcalcpy/sum_stat.py b/metcalcpy/sum_stat.py index db0d8dff..7a1db3bd 100644 --- a/metcalcpy/sum_stat.py +++ b/metcalcpy/sum_stat.py @@ -53,7 +53,7 @@ from metcalcpy.util.rps_statistics import * from metcalcpy.util.utils import is_string_integer, parse_bool, \ - aggregate_field_values, perform_event_equalization + aggregate_field_values, perform_event_equalization, is_string_strictly_float class SumStat: @@ -217,6 +217,8 @@ def aggregate_special_fields(self, axis='1'): for ind, val in enumerate(actual_series_vals): if is_string_integer(val): actual_series_vals[ind] = int(val) + elif is_string_strictly_float(val): + actual_series_vals[ind] = float(val) point_data = \ point_data[point_data[series_vars[index]].isin(actual_series_vals)] diff --git a/metcalcpy/util/utils.py b/metcalcpy/util/utils.py index 1e5ea081..30ef6029 100644 --- a/metcalcpy/util/utils.py +++ b/metcalcpy/util/utils.py @@ -13,7 +13,6 @@ __author__ = 'Tatiana Burek' - import warnings # To deal with third-party warnings from typing import Union @@ -97,6 +96,23 @@ def is_string_integer(str_int): return False +def is_string_strictly_float(str_float) -> bool: + """Checks if the input string is strictly float. + + Args: + str_float: string value to check + + Returns: + True - if the input value is strictly float + False - if the input value is not strictly float + """ + if str_float is None: + return False + if str_float.startswith('-'): + str_float = str_float[1:] + return '.' in str_float and str_float.replace('.', '', 1).isdecimal() + + def get_derived_curve_name(list_of_names): """Creates the derived series name from the list of series name components @@ -411,7 +427,8 @@ def perfect_score_adjustment(mean_stats_1, mean_stats_2, statistic, pval): and abs(mean_stats_1 - 1) > abs(mean_stats_2 - 1): result = pval * -1 else: - print(f"WARNING: statistic {statistic} doesn't belong to any of the perfect score groups. Returning unprocessed p-value") + print( + f"WARNING: statistic {statistic} doesn't belong to any of the perfect score groups. Returning unprocessed p-value") result = pval return result @@ -690,19 +707,19 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi output_ee_data = output_ee_data.append(series_data_after_ee) try: - output_ee_data_valid = output_ee_data.drop('equalize', axis=1) + output_ee_data_valid = output_ee_data.drop('equalize', axis=1) - # It is possible to produce an empty data frame after applying event equalization. Print an informational - # message before returning the data frame. - if output_ee_data_valid.empty: - print(f"\nINFO: Event equalization has produced no results. Data frame is empty.") + # It is possible to produce an empty data frame after applying event equalization. Print an informational + # message before returning the data frame. + if output_ee_data_valid.empty: + print(f"\nINFO: Event equalization has produced no results. Data frame is empty.") - return output_ee_data_valid + return output_ee_data_valid except (KeyError, AttributeError): - # Two possible exceptions are raised when the data frame is empty *and* is missing the 'equalize' column - # following event equalization. Return the empty dataframe - # without dropping the 'equalize' column, and print an informational message. - print(f"\nINFO: No resulting data after performing event equalization of axis", axis) + # Two possible exceptions are raised when the data frame is empty *and* is missing the 'equalize' column + # following event equalization. Return the empty dataframe + # without dropping the 'equalize' column, and print an informational message. + print(f"\nINFO: No resulting data after performing event equalization of axis", axis) return output_ee_data diff --git a/test/data/agg_stat_and_boot_output.data b/test/data/agg_stat_and_boot_output.data index 8a0aea07..033aa724 100644 --- a/test/data/agg_stat_and_boot_output.data +++ b/test/data/agg_stat_and_boot_output.data @@ -1,28 +1,28 @@ model fcst_lead stat_name fcst_var stat_value stat_btcl stat_btcu nstats -ENS001v3.6.1_d01 0 FBAR DPT 192.1303904 192.1271511 192.137578 4 +ENS001v3.6.1_d01 0 FBAR DPT 192.1303904 192.1263071 192.1363725175 4 ENS001v3.6.1_d01 30000 FBAR DPT NA NA NA 0 ENS001v3.6.1_d01 60000 FBAR DPT 192.1304275 192.1262001 192.1365529 4 ENS001v3.6.1_d01 90000 FBAR DPT NA NA NA 0 -ENS001v3.6.1_d01 120000 FBAR DPT 192.1333718 192.1280098 192.1371883 4 +ENS001v3.6.1_d01 120000 FBAR DPT 192.1333718 192.12905488 192.13719857 4 ENS001v3.6.1_d01 150000 FBAR DPT NA NA NA 0 -ENS001v3.6.1_d01 180000 FBAR DPT 192.1327699 192.12962736 192.1364473 4 +ENS001v3.6.1_d01 180000 FBAR DPT 192.1327699 192.1290291 192.13659429749998 4 ENS001v3.6.1_d01 210000 FBAR DPT NA NA NA 0 -ENS001v3.6.1_d01 240000 FBAR DPT 192.1305292 192.1272196 192.13256661 4 +ENS001v3.6.1_d01 240000 FBAR DPT 192.1305292 192.128156 192.1335303 4 ENS001v3.6.1_d02 0 FBAR DPT 192.1247886 192.12478 192.1248 4 ENS001v3.6.1_d02 30000 FBAR DPT NA NA NA 0 ENS001v3.6.1_d02 60000 FBAR DPT 192.1268052 192.12478 192.1303239 4 ENS001v3.6.1_d02 90000 FBAR DPT NA NA NA 0 ENS001v3.6.1_d02 120000 FBAR DPT 192.12478 NA NA 4 ENS001v3.6.1_d02 150000 FBAR DPT NA NA NA 0 -ENS001v3.6.1_d02 180000 FBAR DPT 192.12494 192.12478 192.1251057 4 +ENS001v3.6.1_d02 180000 FBAR DPT 192.12494 192.12478 192.1251308 4 ENS001v3.6.1_d02 210000 FBAR DPT NA NA NA 0 -ENS001v3.6.1_d02 240000 FBAR DPT 192.1247876 192.12478 192.1247933825 4 -DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 0 FBAR DPT 0.005601800000022195 0.0020097525000089436 0.010839152500015797 0 +ENS001v3.6.1_d02 240000 FBAR DPT 192.1247876 192.12478 192.1247971 4 +DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 0 FBAR DPT 0.005601800000022195 0.002022500000009586 0.010861507500016642 0 DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 30000 FBAR DPT NA NA NA 0 -DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 60000 FBAR DPT 0.003622299999989309 -0.0014125999999805572 0.010593762500005955 0 +DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 60000 FBAR DPT 0.003622299999989309 -0.0031821999999976924 0.01176620000001094 0 DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 90000 FBAR DPT NA NA NA 0 -DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 120000 FBAR DPT 0.008591800000004923 0.005628925000024765 0.012408300000004147 0 +DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 120000 FBAR DPT 0.008591800000004923 0.004274880000019011 0.011897100000027194 0 DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 150000 FBAR DPT NA NA NA 0 -DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 180000 FBAR DPT 0.007829899999990175 0.004543457499984527 0.011667299999999159 0 +DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 180000 FBAR DPT 0.007829899999990175 0.0037788300000066504 0.010786800000005314 0 DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 210000 FBAR DPT NA NA NA 0 -DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 240000 FBAR DPT 0.005741600000021663 0.0033588999999949465 0.008660210000002876 0 +DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR) 240000 FBAR DPT 0.005741600000021663 0.0029691999999954533 0.008847600000024158 0 diff --git a/test/test_utils.py b/test/test_utils.py index 0150d511..3d4cd87f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -6,7 +6,7 @@ from metcalcpy.util.utils import represents_int, is_string_integer, get_derived_curve_name, calc_derived_curve_value, \ unique, intersection, is_derived_point, parse_bool, round_half_up, sum_column_data_by_name, \ nrow_column_data_by_name_value, create_permutations_mv, column_data_by_name, calculate_mtd_revision_stats, \ - autocor_coef + autocor_coef, is_string_strictly_float @pytest.fixture @@ -39,6 +39,14 @@ def test_is_string_integer(): assert not is_string_integer(None) +def test_is_string_float(): + assert not is_string_strictly_float("1") + assert is_string_strictly_float("1.5") + assert not is_string_strictly_float("not_float") + assert not is_string_strictly_float("not.float") + assert not is_string_strictly_float(None) + + def test_get_derived_curve_name(): list_of_names = ['analog_e GHI FBAR', 'dicast15 GHI FBAR'] assert 'DIFF(analog_e GHI FBAR-dicast15 GHI FBAR)' == get_derived_curve_name(list_of_names) @@ -202,6 +210,7 @@ def test_autocor_coef(): if __name__ == "__main__": test_represents_int() test_is_string_integer() + test_is_string_float() test_get_derived_curve_name() test_calc_derived_curve_value() test_unique()