From 0a2e17384a80a94d359a34046c901be649a70768 Mon Sep 17 00:00:00 2001 From: Tsotne Tabidze Date: Sat, 3 Jul 2021 12:54:21 -0700 Subject: [PATCH] Fix unit tests that got broken by Pandas 1.3.0 release (#1683) Signed-off-by: Tsotne Tabidze --- sdk/python/feast/driver_test_data.py | 4 ++-- sdk/python/tests/test_historical_retrieval.py | 15 ++++++++------- .../test_offline_online_store_consistency.py | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sdk/python/feast/driver_test_data.py b/sdk/python/feast/driver_test_data.py index d50128696d..ea0921bf04 100644 --- a/sdk/python/feast/driver_test_data.py +++ b/sdk/python/feast/driver_test_data.py @@ -140,8 +140,8 @@ def create_driver_hourly_stats_df(drivers, start_date, end_date) -> pd.DataFrame # TODO: These duplicate rows area indirectly being filtered out by the point in time join already. We need to # inject a bad row at a timestamp where we know it will get joined to the entity dataframe, and then test that # we are actually filtering it with the created timestamp - late_row = df_all_drivers.iloc[int(rows / 2)] - df_all_drivers = df_all_drivers.append(late_row).append(late_row) + late_row = df_all_drivers[rows // 2 : rows // 2 + 1] + df_all_drivers = pd.concat([df_all_drivers, late_row, late_row], ignore_index=True) return df_all_drivers diff --git a/sdk/python/tests/test_historical_retrieval.py b/sdk/python/tests/test_historical_retrieval.py index e3c28ef866..992a2c8524 100644 --- a/sdk/python/tests/test_historical_retrieval.py +++ b/sdk/python/tests/test_historical_retrieval.py @@ -195,13 +195,14 @@ def get_expected_training_df( expected_df = expected_df[[event_timestamp] + current_cols] # Cast some columns to expected types, since we lose information when converting pandas DFs into Python objects. - expected_df["order_is_success"] = expected_df["order_is_success"].astype("int32") - expected_df["customer_profile__current_balance"] = expected_df[ - "customer_profile__current_balance" - ].astype("float32") - expected_df["customer_profile__avg_passenger_count"] = expected_df[ - "customer_profile__avg_passenger_count" - ].astype("float32") + expected_column_types = { + "order_is_success": "int32", + "driver_stats__conv_rate": "float32", + "customer_profile__current_balance": "float32", + "customer_profile__avg_passenger_count": "float32", + } + for col, typ in expected_column_types.items(): + expected_df[col] = expected_df[col].astype(typ) return expected_df diff --git a/sdk/python/tests/test_offline_online_store_consistency.py b/sdk/python/tests/test_offline_online_store_consistency.py index d30541d8e7..6f2fc41841 100644 --- a/sdk/python/tests/test_offline_online_store_consistency.py +++ b/sdk/python/tests/test_offline_online_store_consistency.py @@ -1,4 +1,5 @@ import contextlib +import math import tempfile import time import uuid @@ -212,8 +213,7 @@ def check_offline_and_online_features( if expected_value: assert abs(df.to_dict()[f"{fv.name}__value"][0] - expected_value) < 1e-6 else: - df = df.where(pd.notnull(df), None) - assert df.to_dict()[f"{fv.name}__value"][0] is None + assert math.isnan(df.to_dict()[f"{fv.name}__value"][0]) def run_offline_online_store_consistency_test(