From c0eec93ed1aa288c369e87097c58763886822307 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 14 Jun 2022 16:31:45 -0700
Subject: [PATCH 01/30] Skaffolding for offline store push

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/offline_store.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py
index cd807764ba..2d2816c4e4 100644
--- a/sdk/python/feast/infra/offline_stores/offline_store.py
+++ b/sdk/python/feast/infra/offline_stores/offline_store.py
@@ -28,6 +28,8 @@
 from feast.registry import BaseRegistry
 from feast.repo_config import RepoConfig
 from feast.saved_dataset import SavedDatasetStorage
+from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
+from feast.protos.feast.types.Value_pb2 import Value as ValueProto
 
 if TYPE_CHECKING:
     from feast.saved_dataset import ValidationReference

From 110c9b4aded189abd279b17cbd436865f9969e8c Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 14 Jun 2022 16:33:14 -0700
Subject: [PATCH 02/30] LInt

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/offline_store.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py
index 2d2816c4e4..9c3f3f5ab4 100644
--- a/sdk/python/feast/infra/offline_stores/offline_store.py
+++ b/sdk/python/feast/infra/offline_stores/offline_store.py
@@ -25,11 +25,11 @@
 from feast.feature_logging import LoggingConfig, LoggingSource
 from feast.feature_view import FeatureView
 from feast.on_demand_feature_view import OnDemandFeatureView
+from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
+from feast.protos.feast.types.Value_pb2 import Value as ValueProto
 from feast.registry import BaseRegistry
 from feast.repo_config import RepoConfig
 from feast.saved_dataset import SavedDatasetStorage
-from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
-from feast.protos.feast.types.Value_pb2 import Value as ValueProto
 
 if TYPE_CHECKING:
     from feast.saved_dataset import ValidationReference

From 460f1eae89e262546d1bfaa8d3234409b421f048 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 14 Jun 2022 16:55:26 -0700
Subject: [PATCH 03/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/offline_store.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py
index 9c3f3f5ab4..cd807764ba 100644
--- a/sdk/python/feast/infra/offline_stores/offline_store.py
+++ b/sdk/python/feast/infra/offline_stores/offline_store.py
@@ -25,8 +25,6 @@
 from feast.feature_logging import LoggingConfig, LoggingSource
 from feast.feature_view import FeatureView
 from feast.on_demand_feature_view import OnDemandFeatureView
-from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
-from feast.protos.feast.types.Value_pb2 import Value as ValueProto
 from feast.registry import BaseRegistry
 from feast.repo_config import RepoConfig
 from feast.saved_dataset import SavedDatasetStorage

From bdcabeec88462800503e0ce0f78be44b490d87fc Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 14:07:54 -0700
Subject: [PATCH 04/30] File source offline push

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/file.py |  34 ++-
 .../offline_store/test_offline_push.py        | 196 ++++++++++++++++++
 2 files changed, 208 insertions(+), 22 deletions(-)
 create mode 100644 sdk/python/tests/integration/offline_store/test_offline_push.py

diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py
index 194c233f53..260f29bd88 100644
--- a/sdk/python/feast/infra/offline_stores/file.py
+++ b/sdk/python/feast/infra/offline_stores/file.py
@@ -7,6 +7,7 @@
 import pandas as pd
 import pyarrow
 import pyarrow.dataset
+from pyarrow import csv
 import pyarrow.parquet
 import pytz
 from pydantic.typing import Literal
@@ -405,42 +406,31 @@ def write_logged_features(
         )
 
     @staticmethod
-    def offline_write_batch(
-        config: RepoConfig,
-        feature_view: FeatureView,
-        data: pyarrow.Table,
-        progress: Optional[Callable[[int], Any]],
-    ):
+    def offline_write_batch(config: RepoConfig, feature_view: FeatureView, data: pyarrow.Table, progress: Optional[Callable[[int], Any]]):
         if not feature_view.batch_source:
-            raise ValueError(
-                "feature view does not have a batch source to persist offline data"
-            )
+            raise ValueError("feature view does not have a batch source to persist offline data")
         if not isinstance(config.offline_store, FileOfflineStoreConfig):
-            raise ValueError(
-                f"offline store config is of type {type(config.offline_store)} when file type required"
-            )
+            raise ValueError(f"offline store config is of type {type(config.offline_store)} when file type required")
         if not isinstance(feature_view.batch_source, FileSource):
-            raise ValueError(
-                f"feature view batch source is {type(feature_view.batch_source)} not file source"
-            )
+            raise ValueError(f"feature view batch source is {type(feature_view.batch_source)} not file source")
         file_options = feature_view.batch_source.file_options
         filesystem, path = FileSource.create_filesystem_and_path(
             file_options.uri, file_options.s3_endpoint_override
         )
 
         prev_table = pyarrow.parquet.read_table(path, memory_map=True)
-        if prev_table.column_names != data.column_names:
-            raise ValueError(
-                f"Input dataframe has incorrect schema or wrong order, expected columns are: {prev_table.column_names}"
-            )
-        if data.schema != prev_table.schema:
+        if(prev_table.column_names != data.column_names):
+            raise ValueError(f"Input dataframe have columns in wrong order, columns should be in the order: {prev_table.column_names}")
+        if(data.schema != prev_table.schema):
             data = data.cast(prev_table.schema)
         new_table = pyarrow.concat_tables([data, prev_table])
-        writer = pyarrow.parquet.ParquetWriter(path, data.schema, filesystem=filesystem)
+        writer = pyarrow.parquet.ParquetWriter(
+            path,
+            data.schema,
+            filesystem=filesystem)
         writer.write_table(new_table)
         writer.close()
 
-
 def _get_entity_df_event_timestamp_range(
     entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str,
 ) -> Tuple[datetime, datetime]:
diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
new file mode 100644
index 0000000000..d31a6ebf77
--- /dev/null
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -0,0 +1,196 @@
+
+import datetime
+from datetime import datetime, timedelta
+
+import numpy as np
+import pandas as pd
+import pytest
+import tempfile
+import uuid
+
+from feast.data_format import ParquetFormat
+
+from feast import FeatureView, Field, FileSource
+from feast.types import Int32, Float32
+from feast.wait import wait_retry_backoff
+from tests.integration.feature_repos.repo_configuration import (
+    construct_universal_feature_views,
+)
+from tests.integration.feature_repos.universal.data_sources.file import FileDataSourceCreator
+from tests.integration.feature_repos.universal.entities import (
+    customer,
+    driver,
+    location,
+)
+from tests.integration.feature_repos.universal.feature_views import conv_rate_plus_100
+from tests.utils.logged_features import prepare_logs, to_logs_dataset
+
+@pytest.mark.integration
+@pytest.mark.universal_online_stores(only=["sqlite"])
+def test_writing_incorrect_order_fails(environment, universal_data_sources):
+    # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
+    store = environment.feature_store
+    _, _, data_sources = universal_data_sources
+    driver_stats = FeatureView(
+        name="driver_stats",
+        entities=["driver"],
+        schema=[
+            Field(name="avg_daily_trips", dtype=Int32),
+            Field(name="conv_rate", dtype=Float32),
+        ],
+        source=data_sources.driver,
+    )
+
+    now = datetime.utcnow()
+    ts = pd.Timestamp(now).round("ms")
+
+    entity_df = pd.DataFrame.from_dict(
+        {
+            "driver_id": [1001, 1002],
+            "event_timestamp": [
+                ts-timedelta(hours=3),
+                ts,
+            ],
+        }
+    )
+
+    store.apply([driver(), driver_stats])
+    df = store.get_historical_features(
+        entity_df=entity_df,
+        features=[
+            "driver_stats:conv_rate",
+            "driver_stats:avg_daily_trips"
+        ],
+        full_feature_names=False,
+    ).to_df()
+
+    assert df["conv_rate"].isnull().all()
+    assert df["avg_daily_trips"].isnull().all()
+
+    expected_df = pd.DataFrame.from_dict(
+        {
+            "driver_id": [1001, 1002],
+            "event_timestamp": [
+                ts-timedelta(hours=3),
+                ts,
+            ],
+            "conv_rate": [0.1, 0.2],
+            "avg_daily_trips": [1, 2],
+            "created": [ts, ts]
+        },
+    )
+    with pytest.raises(ValueError):
+        store.write_to_offline_store(driver_stats.name, expected_df, allow_registry_cache=False)
+
+@pytest.mark.integration
+@pytest.mark.universal_online_stores(only=["sqlite"])
+def test_writing_consecutively_to_offline_store(environment, universal_data_sources):
+    store = environment.feature_store
+    _, _, data_sources = universal_data_sources
+    driver_stats = FeatureView(
+        name="driver_stats",
+        entities=["driver"],
+        schema=[
+            Field(name="avg_daily_trips", dtype=Int32),
+            Field(name="conv_rate", dtype=Float32),
+        ],
+        source=data_sources.driver,
+        ttl=timedelta(minutes=10),
+    )
+
+    now = datetime.utcnow()
+    ts = pd.Timestamp(now, unit='ns')
+
+    entity_df = pd.DataFrame.from_dict(
+        {
+            "driver_id": [1001, 1002],
+            "event_timestamp": [
+                ts-timedelta(hours=4),
+                ts-timedelta(hours=3),
+            ],
+        }
+    )
+
+    store.apply([driver(), driver_stats])
+    df = store.get_historical_features(
+        entity_df=entity_df,
+        features=[
+            "driver_stats:conv_rate",
+            "driver_stats:avg_daily_trips"
+        ],
+        full_feature_names=False,
+    ).to_df()
+
+    assert df["conv_rate"].isnull().all()
+    assert df["avg_daily_trips"].isnull().all()
+
+    first_df = pd.DataFrame.from_dict(
+        {
+            "event_timestamp": [
+                ts-timedelta(hours=4),
+                ts-timedelta(hours=3),
+            ],
+            "driver_id": [1001, 1001],
+            "conv_rate": [0.1, 0.2],
+            "acc_rate": [0.5, 0.6],
+            "avg_daily_trips": [1, 2],
+            "created": [ts, ts]
+        },
+    )
+    store.write_to_offline_store(driver_stats.name, first_df, allow_registry_cache=False)
+
+    after_write_df = store.get_historical_features(
+        entity_df=entity_df,
+        features=[
+            "driver_stats:conv_rate",
+            "driver_stats:avg_daily_trips"
+        ],
+        full_feature_names=False,
+    ).to_df()
+
+    assert len(after_write_df) == len(first_df)
+    assert np.where(after_write_df["conv_rate"].reset_index(drop=True) == first_df["conv_rate"].reset_index(drop=True))
+    assert np.where(after_write_df["avg_daily_trips"].reset_index(drop=True) == first_df["avg_daily_trips"].reset_index(drop=True))
+
+    second_df = pd.DataFrame.from_dict(
+        {
+            "event_timestamp": [
+                ts-timedelta(hours=1),
+                ts,
+            ],
+            "driver_id": [1001, 1001],
+            "conv_rate": [0.3, 0.4],
+            "acc_rate": [0.8, 0.9],
+            "avg_daily_trips": [3, 4],
+            "created": [ts, ts]
+        },
+    )
+
+    store.write_to_offline_store(driver_stats.name, second_df, allow_registry_cache=False)
+
+    entity_df = pd.DataFrame.from_dict(
+        {
+            "driver_id": [1001, 1001, 1001, 1001],
+            "event_timestamp": [
+                ts-timedelta(hours=4),
+                ts-timedelta(hours=3),
+                ts-timedelta(hours=1),
+                ts,
+            ],
+        }
+    )
+
+    after_write_df = store.get_historical_features(
+        entity_df=entity_df,
+        features=[
+            "driver_stats:conv_rate",
+            "driver_stats:avg_daily_trips"
+        ],
+        full_feature_names=False,
+    ).to_df()
+
+    expected_df = pd.concat([first_df, second_df])
+    assert len(after_write_df) == len(expected_df)
+    assert np.where(after_write_df["conv_rate"].reset_index(drop=True) == expected_df["conv_rate"].reset_index(drop=True))
+    assert np.where(after_write_df["avg_daily_trips"].reset_index(drop=True) == expected_df["avg_daily_trips"].reset_index(drop=True))
+

From c600626db52e0af203fadc2b31d01a45b343bd56 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 15:38:26 -0700
Subject: [PATCH 05/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/file.py |  2 +-
 .../offline_store/test_offline_push.py        | 79 ++++++++++++++++---
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py
index 260f29bd88..85028c9236 100644
--- a/sdk/python/feast/infra/offline_stores/file.py
+++ b/sdk/python/feast/infra/offline_stores/file.py
@@ -420,7 +420,7 @@ def offline_write_batch(config: RepoConfig, feature_view: FeatureView, data: pya
 
         prev_table = pyarrow.parquet.read_table(path, memory_map=True)
         if(prev_table.column_names != data.column_names):
-            raise ValueError(f"Input dataframe have columns in wrong order, columns should be in the order: {prev_table.column_names}")
+            raise ValueError(f"Input dataframe has incorrect schema or wrong order, expected columns are: {prev_table.column_names}")
         if(data.schema != prev_table.schema):
             data = data.cast(prev_table.schema)
         new_table = pyarrow.concat_tables([data, prev_table])
diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
index d31a6ebf77..4b6fb557f4 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -5,8 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-import tempfile
-import uuid
+import random
 
 from feast.data_format import ParquetFormat
 
@@ -74,8 +73,66 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
                 ts-timedelta(hours=3),
                 ts,
             ],
-            "conv_rate": [0.1, 0.2],
-            "avg_daily_trips": [1, 2],
+            "conv_rate": [random.random(), random.random()],
+            "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
+            "created": [ts, ts]
+        },
+    )
+    with pytest.raises(ValueError):
+        store.write_to_offline_store(driver_stats.name, expected_df, allow_registry_cache=False)
+
+
+@pytest.mark.integration
+@pytest.mark.universal_online_stores(only=["sqlite"])
+def test_writing_incorrect_schema_fails(environment, universal_data_sources):
+    # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
+    store = environment.feature_store
+    _, _, data_sources = universal_data_sources
+    driver_stats = FeatureView(
+        name="driver_stats",
+        entities=["driver"],
+        schema=[
+            Field(name="avg_daily_trips", dtype=Int32),
+            Field(name="conv_rate", dtype=Float32),
+        ],
+        source=data_sources.driver,
+    )
+
+    now = datetime.utcnow()
+    ts = pd.Timestamp(now).round("ms")
+
+    entity_df = pd.DataFrame.from_dict(
+        {
+            "driver_id": [1001, 1002],
+            "event_timestamp": [
+                ts-timedelta(hours=3),
+                ts,
+            ],
+        }
+    )
+
+    store.apply([driver(), driver_stats])
+    df = store.get_historical_features(
+        entity_df=entity_df,
+        features=[
+            "driver_stats:conv_rate",
+            "driver_stats:avg_daily_trips"
+        ],
+        full_feature_names=False,
+    ).to_df()
+
+    assert df["conv_rate"].isnull().all()
+    assert df["avg_daily_trips"].isnull().all()
+
+    expected_df = pd.DataFrame.from_dict(
+        {
+            "event_timestamp": [
+                ts-timedelta(hours=3),
+                ts,
+            ],
+            "driver_id": [1001, 1002],
+            "conv_rate": [random.random(), random.random()],
+            "incorrect_schema": [random.randint(0, 10), random.randint(0, 10)],
             "created": [ts, ts]
         },
     )
@@ -103,7 +160,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
 
     entity_df = pd.DataFrame.from_dict(
         {
-            "driver_id": [1001, 1002],
+            "driver_id": [1001, 1001],
             "event_timestamp": [
                 ts-timedelta(hours=4),
                 ts-timedelta(hours=3),
@@ -131,9 +188,9 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
                 ts-timedelta(hours=3),
             ],
             "driver_id": [1001, 1001],
-            "conv_rate": [0.1, 0.2],
-            "acc_rate": [0.5, 0.6],
-            "avg_daily_trips": [1, 2],
+            "conv_rate": [random.random(), random.random()],
+            "acc_rate": [random.random(), random.random()],
+            "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
             "created": [ts, ts]
         },
     )
@@ -159,9 +216,9 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
                 ts,
             ],
             "driver_id": [1001, 1001],
-            "conv_rate": [0.3, 0.4],
-            "acc_rate": [0.8, 0.9],
-            "avg_daily_trips": [3, 4],
+            "conv_rate": [random.random(), random.random()],
+            "acc_rate": [random.random(), random.random()],
+            "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
             "created": [ts, ts]
         },
     )

From 9456b12269ce4fbcfc3d5951e7603242a1c5e123 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 15:55:55 -0700
Subject: [PATCH 06/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../integration/offline_store/test_offline_push.py    | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
index 4b6fb557f4..85adc542fc 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -25,7 +25,7 @@
 from tests.utils.logged_features import prepare_logs, to_logs_dataset
 
 @pytest.mark.integration
-@pytest.mark.universal_online_stores(only=["sqlite"])
+@pytest.mark.universal_online_stores
 def test_writing_incorrect_order_fails(environment, universal_data_sources):
     # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
     store = environment.feature_store
@@ -83,7 +83,7 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
 
 
 @pytest.mark.integration
-@pytest.mark.universal_online_stores(only=["sqlite"])
+@pytest.mark.universal_online_stores
 def test_writing_incorrect_schema_fails(environment, universal_data_sources):
     # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
     store = environment.feature_store
@@ -140,7 +140,7 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
         store.write_to_offline_store(driver_stats.name, expected_df, allow_registry_cache=False)
 
 @pytest.mark.integration
-@pytest.mark.universal_online_stores(only=["sqlite"])
+@pytest.mark.universal_online_stores
 def test_writing_consecutively_to_offline_store(environment, universal_data_sources):
     store = environment.feature_store
     _, _, data_sources = universal_data_sources
@@ -150,6 +150,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
         schema=[
             Field(name="avg_daily_trips", dtype=Int32),
             Field(name="conv_rate", dtype=Float32),
+            Field(name="acc_rate", dtype=Float32),
         ],
         source=data_sources.driver,
         ttl=timedelta(minutes=10),
@@ -173,6 +174,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
         entity_df=entity_df,
         features=[
             "driver_stats:conv_rate",
+
             "driver_stats:avg_daily_trips"
         ],
         full_feature_names=False,
@@ -241,6 +243,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
         entity_df=entity_df,
         features=[
             "driver_stats:conv_rate",
+            "driver_stats:acc_rate",
             "driver_stats:avg_daily_trips"
         ],
         full_feature_names=False,
@@ -249,5 +252,5 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     expected_df = pd.concat([first_df, second_df])
     assert len(after_write_df) == len(expected_df)
     assert np.where(after_write_df["conv_rate"].reset_index(drop=True) == expected_df["conv_rate"].reset_index(drop=True))
+    assert np.where(after_write_df["acc_rate"].reset_index(drop=True) == expected_df["acc_rate"].reset_index(drop=True))
     assert np.where(after_write_df["avg_daily_trips"].reset_index(drop=True) == expected_df["avg_daily_trips"].reset_index(drop=True))
-

From e18bacd0dc69aa140f22a1862c090e0d6acf84dc Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 15:58:34 -0700
Subject: [PATCH 07/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/file.py |  16 +-
 .../offline_store/test_offline_push.py        | 141 +++++++-----------
 2 files changed, 65 insertions(+), 92 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py
index 85028c9236..7856eaa1c9 100644
--- a/sdk/python/feast/infra/offline_stores/file.py
+++ b/sdk/python/feast/infra/offline_stores/file.py
@@ -7,9 +7,9 @@
 import pandas as pd
 import pyarrow
 import pyarrow.dataset
-from pyarrow import csv
 import pyarrow.parquet
 import pytz
+from pyarrow import csv
 from pydantic.typing import Literal
 
 from feast import FileSource, OnDemandFeatureView
@@ -419,18 +419,18 @@ def offline_write_batch(config: RepoConfig, feature_view: FeatureView, data: pya
         )
 
         prev_table = pyarrow.parquet.read_table(path, memory_map=True)
-        if(prev_table.column_names != data.column_names):
-            raise ValueError(f"Input dataframe has incorrect schema or wrong order, expected columns are: {prev_table.column_names}")
-        if(data.schema != prev_table.schema):
+        if prev_table.column_names != data.column_names:
+            raise ValueError(
+                f"Input dataframe has incorrect schema or wrong order, expected columns are: {prev_table.column_names}"
+            )
+        if data.schema != prev_table.schema:
             data = data.cast(prev_table.schema)
         new_table = pyarrow.concat_tables([data, prev_table])
-        writer = pyarrow.parquet.ParquetWriter(
-            path,
-            data.schema,
-            filesystem=filesystem)
+        writer = pyarrow.parquet.ParquetWriter(path, data.schema, filesystem=filesystem)
         writer.write_table(new_table)
         writer.close()
 
+
 def _get_entity_df_event_timestamp_range(
     entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str,
 ) -> Tuple[datetime, datetime]:
diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
index 85adc542fc..ba851e2918 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -1,28 +1,17 @@
 
-import datetime
+import random
 from datetime import datetime, timedelta
 
 import numpy as np
 import pandas as pd
 import pytest
-import random
 
-from feast.data_format import ParquetFormat
-
-from feast import FeatureView, Field, FileSource
-from feast.types import Int32, Float32
-from feast.wait import wait_retry_backoff
-from tests.integration.feature_repos.repo_configuration import (
-    construct_universal_feature_views,
-)
-from tests.integration.feature_repos.universal.data_sources.file import FileDataSourceCreator
+from feast import FeatureView, Field
+from feast.types import Float32, Int32
 from tests.integration.feature_repos.universal.entities import (
-    customer,
     driver,
-    location,
 )
-from tests.integration.feature_repos.universal.feature_views import conv_rate_plus_100
-from tests.utils.logged_features import prepare_logs, to_logs_dataset
+
 
 @pytest.mark.integration
 @pytest.mark.universal_online_stores
@@ -44,22 +33,13 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
     ts = pd.Timestamp(now).round("ms")
 
     entity_df = pd.DataFrame.from_dict(
-        {
-            "driver_id": [1001, 1002],
-            "event_timestamp": [
-                ts-timedelta(hours=3),
-                ts,
-            ],
-        }
+        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts,],}
     )
 
     store.apply([driver(), driver_stats])
     df = store.get_historical_features(
         entity_df=entity_df,
-        features=[
-            "driver_stats:conv_rate",
-            "driver_stats:avg_daily_trips"
-        ],
+        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
         full_feature_names=False,
     ).to_df()
 
@@ -69,17 +49,16 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
     expected_df = pd.DataFrame.from_dict(
         {
             "driver_id": [1001, 1002],
-            "event_timestamp": [
-                ts-timedelta(hours=3),
-                ts,
-            ],
+            "event_timestamp": [ts - timedelta(hours=3), ts,],
             "conv_rate": [random.random(), random.random()],
             "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts]
+            "created": [ts, ts],
         },
     )
     with pytest.raises(ValueError):
-        store.write_to_offline_store(driver_stats.name, expected_df, allow_registry_cache=False)
+        store.write_to_offline_store(
+            driver_stats.name, expected_df, allow_registry_cache=False
+        )
 
 
 @pytest.mark.integration
@@ -102,22 +81,13 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
     ts = pd.Timestamp(now).round("ms")
 
     entity_df = pd.DataFrame.from_dict(
-        {
-            "driver_id": [1001, 1002],
-            "event_timestamp": [
-                ts-timedelta(hours=3),
-                ts,
-            ],
-        }
+        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts,],}
     )
 
     store.apply([driver(), driver_stats])
     df = store.get_historical_features(
         entity_df=entity_df,
-        features=[
-            "driver_stats:conv_rate",
-            "driver_stats:avg_daily_trips"
-        ],
+        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
         full_feature_names=False,
     ).to_df()
 
@@ -126,18 +96,18 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
 
     expected_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [
-                ts-timedelta(hours=3),
-                ts,
-            ],
+            "event_timestamp": [ts - timedelta(hours=3), ts,],
             "driver_id": [1001, 1002],
             "conv_rate": [random.random(), random.random()],
             "incorrect_schema": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts]
+            "created": [ts, ts],
         },
     )
     with pytest.raises(ValueError):
-        store.write_to_offline_store(driver_stats.name, expected_df, allow_registry_cache=False)
+        store.write_to_offline_store(
+            driver_stats.name, expected_df, allow_registry_cache=False
+        )
+
 
 @pytest.mark.integration
 @pytest.mark.universal_online_stores
@@ -157,26 +127,19 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     )
 
     now = datetime.utcnow()
-    ts = pd.Timestamp(now, unit='ns')
+    ts = pd.Timestamp(now, unit="ns")
 
     entity_df = pd.DataFrame.from_dict(
         {
             "driver_id": [1001, 1001],
-            "event_timestamp": [
-                ts-timedelta(hours=4),
-                ts-timedelta(hours=3),
-            ],
+            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3),],
         }
     )
 
     store.apply([driver(), driver_stats])
     df = store.get_historical_features(
         entity_df=entity_df,
-        features=[
-            "driver_stats:conv_rate",
-
-            "driver_stats:avg_daily_trips"
-        ],
+        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
         full_feature_names=False,
     ).to_df()
 
@@ -185,55 +148,56 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
 
     first_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [
-                ts-timedelta(hours=4),
-                ts-timedelta(hours=3),
-            ],
+            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3),],
             "driver_id": [1001, 1001],
             "conv_rate": [random.random(), random.random()],
             "acc_rate": [random.random(), random.random()],
             "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts]
+            "created": [ts, ts],
         },
     )
-    store.write_to_offline_store(driver_stats.name, first_df, allow_registry_cache=False)
+    store.write_to_offline_store(
+        driver_stats.name, first_df, allow_registry_cache=False
+    )
 
     after_write_df = store.get_historical_features(
         entity_df=entity_df,
-        features=[
-            "driver_stats:conv_rate",
-            "driver_stats:avg_daily_trips"
-        ],
+        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
         full_feature_names=False,
     ).to_df()
 
     assert len(after_write_df) == len(first_df)
-    assert np.where(after_write_df["conv_rate"].reset_index(drop=True) == first_df["conv_rate"].reset_index(drop=True))
-    assert np.where(after_write_df["avg_daily_trips"].reset_index(drop=True) == first_df["avg_daily_trips"].reset_index(drop=True))
+    assert np.where(
+        after_write_df["conv_rate"].reset_index(drop=True)
+        == first_df["conv_rate"].reset_index(drop=True)
+    )
+    assert np.where(
+        after_write_df["avg_daily_trips"].reset_index(drop=True)
+        == first_df["avg_daily_trips"].reset_index(drop=True)
+    )
 
     second_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [
-                ts-timedelta(hours=1),
-                ts,
-            ],
+            "event_timestamp": [ts - timedelta(hours=1), ts,],
             "driver_id": [1001, 1001],
             "conv_rate": [random.random(), random.random()],
             "acc_rate": [random.random(), random.random()],
             "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts]
+            "created": [ts, ts],
         },
     )
 
-    store.write_to_offline_store(driver_stats.name, second_df, allow_registry_cache=False)
+    store.write_to_offline_store(
+        driver_stats.name, second_df, allow_registry_cache=False
+    )
 
     entity_df = pd.DataFrame.from_dict(
         {
             "driver_id": [1001, 1001, 1001, 1001],
             "event_timestamp": [
-                ts-timedelta(hours=4),
-                ts-timedelta(hours=3),
-                ts-timedelta(hours=1),
+                ts - timedelta(hours=4),
+                ts - timedelta(hours=3),
+                ts - timedelta(hours=1),
                 ts,
             ],
         }
@@ -244,13 +208,22 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
         features=[
             "driver_stats:conv_rate",
             "driver_stats:acc_rate",
-            "driver_stats:avg_daily_trips"
+            "driver_stats:avg_daily_trips",
         ],
         full_feature_names=False,
     ).to_df()
 
     expected_df = pd.concat([first_df, second_df])
     assert len(after_write_df) == len(expected_df)
-    assert np.where(after_write_df["conv_rate"].reset_index(drop=True) == expected_df["conv_rate"].reset_index(drop=True))
-    assert np.where(after_write_df["acc_rate"].reset_index(drop=True) == expected_df["acc_rate"].reset_index(drop=True))
-    assert np.where(after_write_df["avg_daily_trips"].reset_index(drop=True) == expected_df["avg_daily_trips"].reset_index(drop=True))
+    assert np.where(
+        after_write_df["conv_rate"].reset_index(drop=True)
+        == expected_df["conv_rate"].reset_index(drop=True)
+    )
+    assert np.where(
+        after_write_df["acc_rate"].reset_index(drop=True)
+        == expected_df["acc_rate"].reset_index(drop=True)
+    )
+    assert np.where(
+        after_write_df["avg_daily_trips"].reset_index(drop=True)
+        == expected_df["avg_daily_trips"].reset_index(drop=True)
+    )

From f72dc8c40e09ec2c78321b3fdcc9e12997f2e6e8 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 16:03:18 -0700
Subject: [PATCH 08/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/file.py |  1 -
 .../offline_store/test_offline_push.py        | 19 ++++++++-----------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py
index 7856eaa1c9..b0bf94e352 100644
--- a/sdk/python/feast/infra/offline_stores/file.py
+++ b/sdk/python/feast/infra/offline_stores/file.py
@@ -9,7 +9,6 @@
 import pyarrow.dataset
 import pyarrow.parquet
 import pytz
-from pyarrow import csv
 from pydantic.typing import Literal
 
 from feast import FileSource, OnDemandFeatureView
diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
index ba851e2918..068b7b0a75 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -1,4 +1,3 @@
-
 import random
 from datetime import datetime, timedelta
 
@@ -8,9 +7,7 @@
 
 from feast import FeatureView, Field
 from feast.types import Float32, Int32
-from tests.integration.feature_repos.universal.entities import (
-    driver,
-)
+from tests.integration.feature_repos.universal.entities import driver
 
 
 @pytest.mark.integration
@@ -33,7 +30,7 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
     ts = pd.Timestamp(now).round("ms")
 
     entity_df = pd.DataFrame.from_dict(
-        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts,],}
+        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts]}
     )
 
     store.apply([driver(), driver_stats])
@@ -49,7 +46,7 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
     expected_df = pd.DataFrame.from_dict(
         {
             "driver_id": [1001, 1002],
-            "event_timestamp": [ts - timedelta(hours=3), ts,],
+            "event_timestamp": [ts - timedelta(hours=3), ts],
             "conv_rate": [random.random(), random.random()],
             "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
             "created": [ts, ts],
@@ -81,7 +78,7 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
     ts = pd.Timestamp(now).round("ms")
 
     entity_df = pd.DataFrame.from_dict(
-        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts,],}
+        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts]}
     )
 
     store.apply([driver(), driver_stats])
@@ -96,7 +93,7 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
 
     expected_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [ts - timedelta(hours=3), ts,],
+            "event_timestamp": [ts - timedelta(hours=3), ts],
             "driver_id": [1001, 1002],
             "conv_rate": [random.random(), random.random()],
             "incorrect_schema": [random.randint(0, 10), random.randint(0, 10)],
@@ -132,7 +129,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     entity_df = pd.DataFrame.from_dict(
         {
             "driver_id": [1001, 1001],
-            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3),],
+            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
         }
     )
 
@@ -148,7 +145,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
 
     first_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3),],
+            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
             "driver_id": [1001, 1001],
             "conv_rate": [random.random(), random.random()],
             "acc_rate": [random.random(), random.random()],
@@ -178,7 +175,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
 
     second_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [ts - timedelta(hours=1), ts,],
+            "event_timestamp": [ts - timedelta(hours=1), ts],
             "driver_id": [1001, 1001],
             "conv_rate": [random.random(), random.random()],
             "acc_rate": [random.random(), random.random()],

From 8cc2a336118f5b52ed32eaa2d1d55c4c96e57f83 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 18:12:36 -0700
Subject: [PATCH 09/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../offline_store/test_offline_push.py          | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
index 068b7b0a75..44a8053e15 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -14,6 +14,9 @@
 @pytest.mark.universal_online_stores
 def test_writing_incorrect_order_fails(environment, universal_data_sources):
     # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
+    """This test tests if we have incorrect order when writing to offline store.
+    Specifically, event_timestamp should be the first column to adhere with the filesource column order.
+    """
     store = environment.feature_store
     _, _, data_sources = universal_data_sources
     driver_stats = FeatureView(
@@ -43,7 +46,7 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
     assert df["conv_rate"].isnull().all()
     assert df["avg_daily_trips"].isnull().all()
 
-    expected_df = pd.DataFrame.from_dict(
+    df = pd.DataFrame.from_dict(
         {
             "driver_id": [1001, 1002],
             "event_timestamp": [ts - timedelta(hours=3), ts],
@@ -54,7 +57,7 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
     )
     with pytest.raises(ValueError):
         store.write_to_offline_store(
-            driver_stats.name, expected_df, allow_registry_cache=False
+            driver_stats.name, df, allow_registry_cache=False
         )
 
 
@@ -62,6 +65,9 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
 @pytest.mark.universal_online_stores
 def test_writing_incorrect_schema_fails(environment, universal_data_sources):
     # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
+    """This test tests if we have incorrect attribute when writing to offline store.
+    Specifically, `incorrect_attribute` is an inccorect column to adhere with the filesource column order.
+    """
     store = environment.feature_store
     _, _, data_sources = universal_data_sources
     driver_stats = FeatureView(
@@ -91,18 +97,18 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
     assert df["conv_rate"].isnull().all()
     assert df["avg_daily_trips"].isnull().all()
 
-    expected_df = pd.DataFrame.from_dict(
+    df = pd.DataFrame.from_dict(
         {
             "event_timestamp": [ts - timedelta(hours=3), ts],
             "driver_id": [1001, 1002],
             "conv_rate": [random.random(), random.random()],
-            "incorrect_schema": [random.randint(0, 10), random.randint(0, 10)],
+            "incorrect_attribute": [random.randint(0, 10), random.randint(0, 10)],
             "created": [ts, ts],
         },
     )
     with pytest.raises(ValueError):
         store.write_to_offline_store(
-            driver_stats.name, expected_df, allow_registry_cache=False
+            driver_stats.name, df, allow_registry_cache=False
         )
 
 
@@ -143,6 +149,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     assert df["conv_rate"].isnull().all()
     assert df["avg_daily_trips"].isnull().all()
 
+    # This dataframe has its columns ordered exactly as it is in the parquet file generated by driver_test_data.py.
     first_df = pd.DataFrame.from_dict(
         {
             "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],

From d399f76344a081c459bc4f007c2dddb70030ea8e Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Thu, 16 Jun 2022 18:14:19 -0700
Subject: [PATCH 10/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../tests/integration/offline_store/test_offline_push.py  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
index 44a8053e15..2bdf775177 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_push.py
@@ -56,9 +56,7 @@ def test_writing_incorrect_order_fails(environment, universal_data_sources):
         },
     )
     with pytest.raises(ValueError):
-        store.write_to_offline_store(
-            driver_stats.name, df, allow_registry_cache=False
-        )
+        store.write_to_offline_store(driver_stats.name, df, allow_registry_cache=False)
 
 
 @pytest.mark.integration
@@ -107,9 +105,7 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
         },
     )
     with pytest.raises(ValueError):
-        store.write_to_offline_store(
-            driver_stats.name, df, allow_registry_cache=False
-        )
+        store.write_to_offline_store(driver_stats.name, df, allow_registry_cache=False)
 
 
 @pytest.mark.integration

From c070c1a73bb1dc27dbf84b841cfaa69e83f3f40a Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Fri, 17 Jun 2022 09:58:06 -0700
Subject: [PATCH 11/30] Address review comments

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/file.py |  19 +-
 .../offline_store/test_offline_push.py        | 229 ------------------
 2 files changed, 15 insertions(+), 233 deletions(-)
 delete mode 100644 sdk/python/tests/integration/offline_store/test_offline_push.py

diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py
index b0bf94e352..194c233f53 100644
--- a/sdk/python/feast/infra/offline_stores/file.py
+++ b/sdk/python/feast/infra/offline_stores/file.py
@@ -405,13 +405,24 @@ def write_logged_features(
         )
 
     @staticmethod
-    def offline_write_batch(config: RepoConfig, feature_view: FeatureView, data: pyarrow.Table, progress: Optional[Callable[[int], Any]]):
+    def offline_write_batch(
+        config: RepoConfig,
+        feature_view: FeatureView,
+        data: pyarrow.Table,
+        progress: Optional[Callable[[int], Any]],
+    ):
         if not feature_view.batch_source:
-            raise ValueError("feature view does not have a batch source to persist offline data")
+            raise ValueError(
+                "feature view does not have a batch source to persist offline data"
+            )
         if not isinstance(config.offline_store, FileOfflineStoreConfig):
-            raise ValueError(f"offline store config is of type {type(config.offline_store)} when file type required")
+            raise ValueError(
+                f"offline store config is of type {type(config.offline_store)} when file type required"
+            )
         if not isinstance(feature_view.batch_source, FileSource):
-            raise ValueError(f"feature view batch source is {type(feature_view.batch_source)} not file source")
+            raise ValueError(
+                f"feature view batch source is {type(feature_view.batch_source)} not file source"
+            )
         file_options = feature_view.batch_source.file_options
         filesystem, path = FileSource.create_filesystem_and_path(
             file_options.uri, file_options.s3_endpoint_override
diff --git a/sdk/python/tests/integration/offline_store/test_offline_push.py b/sdk/python/tests/integration/offline_store/test_offline_push.py
deleted file mode 100644
index 2bdf775177..0000000000
--- a/sdk/python/tests/integration/offline_store/test_offline_push.py
+++ /dev/null
@@ -1,229 +0,0 @@
-import random
-from datetime import datetime, timedelta
-
-import numpy as np
-import pandas as pd
-import pytest
-
-from feast import FeatureView, Field
-from feast.types import Float32, Int32
-from tests.integration.feature_repos.universal.entities import driver
-
-
-@pytest.mark.integration
-@pytest.mark.universal_online_stores
-def test_writing_incorrect_order_fails(environment, universal_data_sources):
-    # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
-    """This test tests if we have incorrect order when writing to offline store.
-    Specifically, event_timestamp should be the first column to adhere with the filesource column order.
-    """
-    store = environment.feature_store
-    _, _, data_sources = universal_data_sources
-    driver_stats = FeatureView(
-        name="driver_stats",
-        entities=["driver"],
-        schema=[
-            Field(name="avg_daily_trips", dtype=Int32),
-            Field(name="conv_rate", dtype=Float32),
-        ],
-        source=data_sources.driver,
-    )
-
-    now = datetime.utcnow()
-    ts = pd.Timestamp(now).round("ms")
-
-    entity_df = pd.DataFrame.from_dict(
-        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts]}
-    )
-
-    store.apply([driver(), driver_stats])
-    df = store.get_historical_features(
-        entity_df=entity_df,
-        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
-        full_feature_names=False,
-    ).to_df()
-
-    assert df["conv_rate"].isnull().all()
-    assert df["avg_daily_trips"].isnull().all()
-
-    df = pd.DataFrame.from_dict(
-        {
-            "driver_id": [1001, 1002],
-            "event_timestamp": [ts - timedelta(hours=3), ts],
-            "conv_rate": [random.random(), random.random()],
-            "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts],
-        },
-    )
-    with pytest.raises(ValueError):
-        store.write_to_offline_store(driver_stats.name, df, allow_registry_cache=False)
-
-
-@pytest.mark.integration
-@pytest.mark.universal_online_stores
-def test_writing_incorrect_schema_fails(environment, universal_data_sources):
-    # TODO(kevjumba) handle incorrect order later, for now schema must be in the order that the filesource is in
-    """This test tests if we have incorrect attribute when writing to offline store.
-    Specifically, `incorrect_attribute` is an inccorect column to adhere with the filesource column order.
-    """
-    store = environment.feature_store
-    _, _, data_sources = universal_data_sources
-    driver_stats = FeatureView(
-        name="driver_stats",
-        entities=["driver"],
-        schema=[
-            Field(name="avg_daily_trips", dtype=Int32),
-            Field(name="conv_rate", dtype=Float32),
-        ],
-        source=data_sources.driver,
-    )
-
-    now = datetime.utcnow()
-    ts = pd.Timestamp(now).round("ms")
-
-    entity_df = pd.DataFrame.from_dict(
-        {"driver_id": [1001, 1002], "event_timestamp": [ts - timedelta(hours=3), ts]}
-    )
-
-    store.apply([driver(), driver_stats])
-    df = store.get_historical_features(
-        entity_df=entity_df,
-        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
-        full_feature_names=False,
-    ).to_df()
-
-    assert df["conv_rate"].isnull().all()
-    assert df["avg_daily_trips"].isnull().all()
-
-    df = pd.DataFrame.from_dict(
-        {
-            "event_timestamp": [ts - timedelta(hours=3), ts],
-            "driver_id": [1001, 1002],
-            "conv_rate": [random.random(), random.random()],
-            "incorrect_attribute": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts],
-        },
-    )
-    with pytest.raises(ValueError):
-        store.write_to_offline_store(driver_stats.name, df, allow_registry_cache=False)
-
-
-@pytest.mark.integration
-@pytest.mark.universal_online_stores
-def test_writing_consecutively_to_offline_store(environment, universal_data_sources):
-    store = environment.feature_store
-    _, _, data_sources = universal_data_sources
-    driver_stats = FeatureView(
-        name="driver_stats",
-        entities=["driver"],
-        schema=[
-            Field(name="avg_daily_trips", dtype=Int32),
-            Field(name="conv_rate", dtype=Float32),
-            Field(name="acc_rate", dtype=Float32),
-        ],
-        source=data_sources.driver,
-        ttl=timedelta(minutes=10),
-    )
-
-    now = datetime.utcnow()
-    ts = pd.Timestamp(now, unit="ns")
-
-    entity_df = pd.DataFrame.from_dict(
-        {
-            "driver_id": [1001, 1001],
-            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
-        }
-    )
-
-    store.apply([driver(), driver_stats])
-    df = store.get_historical_features(
-        entity_df=entity_df,
-        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
-        full_feature_names=False,
-    ).to_df()
-
-    assert df["conv_rate"].isnull().all()
-    assert df["avg_daily_trips"].isnull().all()
-
-    # This dataframe has its columns ordered exactly as it is in the parquet file generated by driver_test_data.py.
-    first_df = pd.DataFrame.from_dict(
-        {
-            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
-            "driver_id": [1001, 1001],
-            "conv_rate": [random.random(), random.random()],
-            "acc_rate": [random.random(), random.random()],
-            "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts],
-        },
-    )
-    store.write_to_offline_store(
-        driver_stats.name, first_df, allow_registry_cache=False
-    )
-
-    after_write_df = store.get_historical_features(
-        entity_df=entity_df,
-        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
-        full_feature_names=False,
-    ).to_df()
-
-    assert len(after_write_df) == len(first_df)
-    assert np.where(
-        after_write_df["conv_rate"].reset_index(drop=True)
-        == first_df["conv_rate"].reset_index(drop=True)
-    )
-    assert np.where(
-        after_write_df["avg_daily_trips"].reset_index(drop=True)
-        == first_df["avg_daily_trips"].reset_index(drop=True)
-    )
-
-    second_df = pd.DataFrame.from_dict(
-        {
-            "event_timestamp": [ts - timedelta(hours=1), ts],
-            "driver_id": [1001, 1001],
-            "conv_rate": [random.random(), random.random()],
-            "acc_rate": [random.random(), random.random()],
-            "avg_daily_trips": [random.randint(0, 10), random.randint(0, 10)],
-            "created": [ts, ts],
-        },
-    )
-
-    store.write_to_offline_store(
-        driver_stats.name, second_df, allow_registry_cache=False
-    )
-
-    entity_df = pd.DataFrame.from_dict(
-        {
-            "driver_id": [1001, 1001, 1001, 1001],
-            "event_timestamp": [
-                ts - timedelta(hours=4),
-                ts - timedelta(hours=3),
-                ts - timedelta(hours=1),
-                ts,
-            ],
-        }
-    )
-
-    after_write_df = store.get_historical_features(
-        entity_df=entity_df,
-        features=[
-            "driver_stats:conv_rate",
-            "driver_stats:acc_rate",
-            "driver_stats:avg_daily_trips",
-        ],
-        full_feature_names=False,
-    ).to_df()
-
-    expected_df = pd.concat([first_df, second_df])
-    assert len(after_write_df) == len(expected_df)
-    assert np.where(
-        after_write_df["conv_rate"].reset_index(drop=True)
-        == expected_df["conv_rate"].reset_index(drop=True)
-    )
-    assert np.where(
-        after_write_df["acc_rate"].reset_index(drop=True)
-        == expected_df["acc_rate"].reset_index(drop=True)
-    )
-    assert np.where(
-        after_write_df["avg_daily_trips"].reset_index(drop=True)
-        == expected_df["avg_daily_trips"].reset_index(drop=True)
-    )

From 1fe4195fa3d54f6116a28cf083260e3f040e65c4 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Fri, 17 Jun 2022 10:31:43 -0700
Subject: [PATCH 12/30] Add redshift function

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/redshift.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py
index 943bac502c..e02dc4c860 100644
--- a/sdk/python/feast/infra/offline_stores/redshift.py
+++ b/sdk/python/feast/infra/offline_stores/redshift.py
@@ -12,6 +12,7 @@
     Optional,
     Tuple,
     Union,
+    Any,
 )
 
 import numpy as np

From 9b43ba3822a200b11a8f4ea516e94febfd79ca93 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Mon, 20 Jun 2022 17:10:02 -0700
Subject: [PATCH 13/30] Add redshift

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../feast/infra/offline_stores/redshift.py    |  1 +
 sdk/python/feast/infra/utils/aws_utils.py     | 63 +++++++++++++++++++
 sdk/python/tests/conftest.py                  |  1 +
 .../feature_repos/repo_configuration.py       |  9 +++
 .../offline_store/test_offline_write.py       | 15 +++--
 5 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py
index e02dc4c860..dc67108e33 100644
--- a/sdk/python/feast/infra/offline_stores/redshift.py
+++ b/sdk/python/feast/infra/offline_stores/redshift.py
@@ -14,6 +14,7 @@
     Union,
     Any,
 )
+from feast.type_map import redshift_to_feast_value_type, feast_value_type_to_pa
 
 import numpy as np
 import pandas as pd
diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index 7badda9846..50415fee72 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -234,6 +234,23 @@ def upload_df_to_redshift(
         table_name=table_name,
     )
 
+def delete_redshift_table(
+    redshift_data_client,
+    cluster_id: str,
+    database: str,
+    user: str,
+    table_name: str,
+):
+    drop_query = (
+        f"DROP {table_name} IF EXISTS"
+    )
+    execute_redshift_statement(
+            redshift_data_client,
+            cluster_id,
+            database,
+            user,
+            drop_query,
+    )
 
 def delete_redshift_table(
     redshift_data_client, cluster_id: str, database: str, user: str, table_name: str,
@@ -379,6 +396,52 @@ def temporarily_upload_df_to_redshift(
         redshift_data_client, cluster_id, database, user, f"DROP TABLE {table_name}",
     )
 
+@contextlib.contextmanager
+def temporarily_upload_arrow_table_to_redshift(
+    table: Union[pyarrow.Table, Path],
+    redshift_data_client,
+    cluster_id: str,
+    database: str,
+    user: str,
+    s3_resource,
+    iam_role: str,
+    s3_path: str,
+    table_name: str,
+    schema: Optional[pyarrow.Schema] = None,
+    fail_if_exists: bool = True,
+) -> Iterator[None]:
+    """Uploads a Arrow Table to Redshift as a new table with cleanup logic.
+
+    This is essentially the same as upload_arrow_table_to_redshift (check out its docstring for full details),
+    but unlike it this method is a generator and should be used with `with` block. For example:
+
+    >>> with temporarily_upload_arrow_table_to_redshift(...): # doctest: +SKIP
+    >>>     # Use `table_name` table in Redshift here
+    >>> # `table_name` will not exist at this point, since it's cleaned up by the `with` block
+
+    """
+    # Upload the dataframe to Redshift
+    upload_arrow_table_to_redshift(
+        table,
+        redshift_data_client,
+        cluster_id,
+        database,
+        user,
+        s3_resource,
+        s3_path,
+        iam_role,
+        table_name,
+        schema,
+        fail_if_exists,
+    )
+
+    yield
+
+    # Clean up the uploaded Redshift table
+    execute_redshift_statement(
+        redshift_data_client, cluster_id, database, user, f"DROP TABLE {table_name}",
+    )
+
 
 @contextlib.contextmanager
 def temporarily_upload_arrow_table_to_redshift(
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index bf69a85fa3..bc4ddf9a49 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -31,6 +31,7 @@
     IntegrationTestRepoConfig,
 )
 from tests.integration.feature_repos.repo_configuration import (
+    OFFLINE_STORE_TO_PROVIDER_CONFIG,
     AVAILABLE_OFFLINE_STORES,
     AVAILABLE_ONLINE_STORES,
     OFFLINE_STORE_TO_PROVIDER_CONFIG,
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
index f4d5defcad..c51daf0246 100644
--- a/sdk/python/tests/integration/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -74,11 +74,20 @@
     "connection_string": "127.0.0.1:6001,127.0.0.1:6002,127.0.0.1:6003",
 }
 
+<<<<<<< HEAD
 OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, DataSourceCreator] = {
     "file": ("local", FileDataSourceCreator),
     "gcp": ("gcp", BigQueryDataSourceCreator),
     "redshift": ("aws", RedshiftDataSourceCreator),
     "snowflake": ("aws", RedshiftDataSourceCreator),
+=======
+OFFLINE_STORE_TO_PROVIDER_CONFIG : Dict[
+    str, DataSourceCreator] = {
+        "file": ("local", FileDataSourceCreator),
+        "gcp": ("gcp", BigQueryDataSourceCreator),
+        "redshift": ("aws", RedshiftDataSourceCreator),
+        "snowflake": ("aws", RedshiftDataSourceCreator),
+>>>>>>> a1b0c4a6 (Add redshift)
 }
 
 AVAILABLE_OFFLINE_STORES: List[Tuple[str, Type[DataSourceCreator]]] = [
diff --git a/sdk/python/tests/integration/offline_store/test_offline_write.py b/sdk/python/tests/integration/offline_store/test_offline_write.py
index 5e7a242513..9557e98714 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_write.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_write.py
@@ -9,7 +9,6 @@
 from feast.types import Float32, Int32
 from tests.integration.feature_repos.universal.entities import driver
 
-
 @pytest.mark.integration
 @pytest.mark.universal_offline_stores(only=["file", "redshift"])
 @pytest.mark.universal_online_stores(only=["sqlite"])
@@ -107,7 +106,6 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
             driver_stats.name, expected_df, allow_registry_cache=False
         )
 
-
 @pytest.mark.integration
 @pytest.mark.universal_offline_stores(only=["file", "redshift"])
 @pytest.mark.universal_online_stores(only=["sqlite"])
@@ -127,7 +125,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     )
 
     now = datetime.utcnow()
-    ts = pd.Timestamp(now, unit="ns")
+    ts = pd.Timestamp(now, unit="ms", tz="UTC").round("ms")
 
     entity_df = pd.DataFrame.from_dict(
         {
@@ -148,7 +146,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
 
     first_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
+            "event_timestamp": [now-timedelta(hours=4), now - timedelta(hours=3)],
             "driver_id": [1001, 1001],
             "conv_rate": [random.random(), random.random()],
             "acc_rate": [random.random(), random.random()],
@@ -156,13 +154,18 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
             "created": [ts, ts],
         },
     )
+
     store._write_to_offline_store(
         driver_stats.name, first_df, allow_registry_cache=False
     )
 
     after_write_df = store.get_historical_features(
         entity_df=entity_df,
-        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
+        features=[
+            "driver_stats:conv_rate",
+            "driver_stats:acc_rate",
+            "driver_stats:avg_daily_trips",
+        ],
         full_feature_names=False,
     ).to_df()
 
@@ -226,4 +229,4 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     assert np.where(
         after_write_df["avg_daily_trips"].reset_index(drop=True)
         == expected_df["avg_daily_trips"].reset_index(drop=True)
-    )
+    )
\ No newline at end of file

From 155a56a10c49e97bc4032f014b914ee87e62bea7 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Mon, 20 Jun 2022 17:22:03 -0700
Subject: [PATCH 14/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../integration/offline_store/test_offline_write.py   | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_offline_write.py b/sdk/python/tests/integration/offline_store/test_offline_write.py
index 9557e98714..f1775db6bf 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_write.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_write.py
@@ -125,7 +125,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     )
 
     now = datetime.utcnow()
-    ts = pd.Timestamp(now, unit="ms", tz="UTC").round("ms")
+    ts = pd.Timestamp(now, unit="ns")
 
     entity_df = pd.DataFrame.from_dict(
         {
@@ -146,7 +146,7 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
 
     first_df = pd.DataFrame.from_dict(
         {
-            "event_timestamp": [now-timedelta(hours=4), now - timedelta(hours=3)],
+            "event_timestamp": [ts - timedelta(hours=4), ts - timedelta(hours=3)],
             "driver_id": [1001, 1001],
             "conv_rate": [random.random(), random.random()],
             "acc_rate": [random.random(), random.random()],
@@ -154,18 +154,13 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
             "created": [ts, ts],
         },
     )
-
     store._write_to_offline_store(
         driver_stats.name, first_df, allow_registry_cache=False
     )
 
     after_write_df = store.get_historical_features(
         entity_df=entity_df,
-        features=[
-            "driver_stats:conv_rate",
-            "driver_stats:acc_rate",
-            "driver_stats:avg_daily_trips",
-        ],
+        features=["driver_stats:conv_rate", "driver_stats:avg_daily_trips"],
         full_feature_names=False,
     ).to_df()
 

From a26336544a6a48a95046af1bd0a1a8ebf89d6f61 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Mon, 20 Jun 2022 17:23:45 -0700
Subject: [PATCH 15/30] Lint

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../feast/infra/offline_stores/redshift.py    |  2 --
 sdk/python/feast/infra/utils/aws_utils.py     | 21 +++++++------------
 sdk/python/tests/conftest.py                  |  4 +++-
 .../feature_repos/repo_configuration.py       |  6 ++++++
 .../offline_store/test_offline_write.py       |  4 +++-
 5 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py
index dc67108e33..943bac502c 100644
--- a/sdk/python/feast/infra/offline_stores/redshift.py
+++ b/sdk/python/feast/infra/offline_stores/redshift.py
@@ -12,9 +12,7 @@
     Optional,
     Tuple,
     Union,
-    Any,
 )
-from feast.type_map import redshift_to_feast_value_type, feast_value_type_to_pa
 
 import numpy as np
 import pandas as pd
diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index 50415fee72..0d9d282ab4 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -234,24 +234,16 @@ def upload_df_to_redshift(
         table_name=table_name,
     )
 
+
 def delete_redshift_table(
-    redshift_data_client,
-    cluster_id: str,
-    database: str,
-    user: str,
-    table_name: str,
+    redshift_data_client, cluster_id: str, database: str, user: str, table_name: str,
 ):
-    drop_query = (
-        f"DROP {table_name} IF EXISTS"
-    )
+    drop_query = f"DROP {table_name} IF EXISTS"
     execute_redshift_statement(
-            redshift_data_client,
-            cluster_id,
-            database,
-            user,
-            drop_query,
+        redshift_data_client, cluster_id, database, user, drop_query,
     )
 
+<<<<<<< HEAD
 def delete_redshift_table(
     redshift_data_client, cluster_id: str, database: str, user: str, table_name: str,
 ):
@@ -260,6 +252,8 @@ def delete_redshift_table(
         redshift_data_client, cluster_id, database, user, drop_query,
     )
 
+=======
+>>>>>>> fec6cc0b (Lint)
 
 def upload_arrow_table_to_redshift(
     table: Union[pyarrow.Table, Path],
@@ -396,6 +390,7 @@ def temporarily_upload_df_to_redshift(
         redshift_data_client, cluster_id, database, user, f"DROP TABLE {table_name}",
     )
 
+
 @contextlib.contextmanager
 def temporarily_upload_arrow_table_to_redshift(
     table: Union[pyarrow.Table, Path],
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index bc4ddf9a49..0290b5b440 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -31,7 +31,6 @@
     IntegrationTestRepoConfig,
 )
 from tests.integration.feature_repos.repo_configuration import (
-    OFFLINE_STORE_TO_PROVIDER_CONFIG,
     AVAILABLE_OFFLINE_STORES,
     AVAILABLE_ONLINE_STORES,
     OFFLINE_STORE_TO_PROVIDER_CONFIG,
@@ -285,9 +284,12 @@ def pytest_generate_tests(metafunc: pytest.Metafunc):
                             _config_cache[c] = c
 
                         configs.append(_config_cache[c])
+<<<<<<< HEAD
         else:
             # No offline stores requested -> setting the default or first available
             offline_stores = [("local", FileDataSourceCreator)]
+=======
+>>>>>>> fec6cc0b (Lint)
 
         metafunc.parametrize(
             "environment", configs, indirect=True, ids=[str(c) for c in configs]
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
index c51daf0246..75835f1c56 100644
--- a/sdk/python/tests/integration/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -75,11 +75,15 @@
 }
 
 <<<<<<< HEAD
+<<<<<<< HEAD
+=======
+>>>>>>> fec6cc0b (Lint)
 OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, DataSourceCreator] = {
     "file": ("local", FileDataSourceCreator),
     "gcp": ("gcp", BigQueryDataSourceCreator),
     "redshift": ("aws", RedshiftDataSourceCreator),
     "snowflake": ("aws", RedshiftDataSourceCreator),
+<<<<<<< HEAD
 =======
 OFFLINE_STORE_TO_PROVIDER_CONFIG : Dict[
     str, DataSourceCreator] = {
@@ -88,6 +92,8 @@
         "redshift": ("aws", RedshiftDataSourceCreator),
         "snowflake": ("aws", RedshiftDataSourceCreator),
 >>>>>>> a1b0c4a6 (Add redshift)
+=======
+>>>>>>> fec6cc0b (Lint)
 }
 
 AVAILABLE_OFFLINE_STORES: List[Tuple[str, Type[DataSourceCreator]]] = [
diff --git a/sdk/python/tests/integration/offline_store/test_offline_write.py b/sdk/python/tests/integration/offline_store/test_offline_write.py
index f1775db6bf..5e7a242513 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_write.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_write.py
@@ -9,6 +9,7 @@
 from feast.types import Float32, Int32
 from tests.integration.feature_repos.universal.entities import driver
 
+
 @pytest.mark.integration
 @pytest.mark.universal_offline_stores(only=["file", "redshift"])
 @pytest.mark.universal_online_stores(only=["sqlite"])
@@ -106,6 +107,7 @@ def test_writing_incorrect_schema_fails(environment, universal_data_sources):
             driver_stats.name, expected_df, allow_registry_cache=False
         )
 
+
 @pytest.mark.integration
 @pytest.mark.universal_offline_stores(only=["file", "redshift"])
 @pytest.mark.universal_online_stores(only=["sqlite"])
@@ -224,4 +226,4 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
     assert np.where(
         after_write_df["avg_daily_trips"].reset_index(drop=True)
         == expected_df["avg_daily_trips"].reset_index(drop=True)
-    )
\ No newline at end of file
+    )

From 3a51046cc5600e7891c60df0b0d94171c2ed102e Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 21 Jun 2022 08:31:47 -0700
Subject: [PATCH 16/30] fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/redshift.py | 2 +-
 sdk/python/feast/infra/utils/aws_utils.py         | 4 ----
 sdk/python/tests/conftest.py                      | 3 ---
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py
index 943bac502c..c80927e91f 100644
--- a/sdk/python/feast/infra/offline_stores/redshift.py
+++ b/sdk/python/feast/infra/offline_stores/redshift.py
@@ -357,7 +357,7 @@ def offline_write_batch(
             s3_resource=s3_resource,
             s3_path=f"{config.offline_store.s3_staging_location}/push/{uuid.uuid4()}.parquet",
             iam_role=config.offline_store.iam_role,
-            table_name=redshift_options.table,
+            table_name=redshift_options.table ,
             schema=pa_schema,
             fail_if_exists=False,
         )
diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index 0d9d282ab4..b284d24231 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -243,7 +243,6 @@ def delete_redshift_table(
         redshift_data_client, cluster_id, database, user, drop_query,
     )
 
-<<<<<<< HEAD
 def delete_redshift_table(
     redshift_data_client, cluster_id: str, database: str, user: str, table_name: str,
 ):
@@ -252,9 +251,6 @@ def delete_redshift_table(
         redshift_data_client, cluster_id, database, user, drop_query,
     )
 
-=======
->>>>>>> fec6cc0b (Lint)
-
 def upload_arrow_table_to_redshift(
     table: Union[pyarrow.Table, Path],
     redshift_data_client,
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 0290b5b440..bf69a85fa3 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -284,12 +284,9 @@ def pytest_generate_tests(metafunc: pytest.Metafunc):
                             _config_cache[c] = c
 
                         configs.append(_config_cache[c])
-<<<<<<< HEAD
         else:
             # No offline stores requested -> setting the default or first available
             offline_stores = [("local", FileDataSourceCreator)]
-=======
->>>>>>> fec6cc0b (Lint)
 
         metafunc.parametrize(
             "environment", configs, indirect=True, ids=[str(c) for c in configs]

From 6de1a3fef2838efc665bb924321fe6a33a1e7245 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 21 Jun 2022 08:33:00 -0700
Subject: [PATCH 17/30] fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/offline_stores/redshift.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py
index c80927e91f..943bac502c 100644
--- a/sdk/python/feast/infra/offline_stores/redshift.py
+++ b/sdk/python/feast/infra/offline_stores/redshift.py
@@ -357,7 +357,7 @@ def offline_write_batch(
             s3_resource=s3_resource,
             s3_path=f"{config.offline_store.s3_staging_location}/push/{uuid.uuid4()}.parquet",
             iam_role=config.offline_store.iam_role,
-            table_name=redshift_options.table ,
+            table_name=redshift_options.table,
             schema=pa_schema,
             fail_if_exists=False,
         )

From 1908102df9a4dbf19b786fa75597da486b014dce Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 21 Jun 2022 14:45:47 -0700
Subject: [PATCH 18/30] Fix test

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../online_store/test_universal_online.py     | 1202 ++++++++---------
 1 file changed, 601 insertions(+), 601 deletions(-)

diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index c068e04111..3d066e7ba7 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -441,604 +441,604 @@ def test_online_retrieval_with_event_timestamps(
     )
 
 
-@pytest.mark.integration
-@pytest.mark.universal_online_stores
-# @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
-@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
-def test_stream_feature_view_online_retrieval(
-    environment, universal_data_sources, feature_server_endpoint, full_feature_names
-):
-    """
-    Tests materialization and online retrieval for stream feature views.
-
-    This test is separate from test_online_retrieval since combining feature views and
-    stream feature views into a single test resulted in test flakiness. This is tech
-    debt that should be resolved soon.
-    """
-    # Set up feature store.
-    fs = environment.feature_store
-    entities, datasets, data_sources = universal_data_sources
-    feature_views = construct_universal_feature_views(data_sources)
-    pushable_feature_view = feature_views.pushed_locations
-    fs.apply([location(), pushable_feature_view])
-
-    # Materialize.
-    fs.materialize(
-        environment.start_date - timedelta(days=1),
-        environment.end_date + timedelta(days=1),
-    )
-
-    # Get online features by randomly sampling 10 entities that exist in the batch source.
-    sample_locations = datasets.location_df.sample(10)["location_id"]
-    entity_rows = [
-        {"location_id": sample_location} for sample_location in sample_locations
-    ]
-
-    feature_refs = [
-        "pushable_location_stats:temperature",
-    ]
-    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
-
-    online_features_dict = get_online_features_dict(
-        environment=environment,
-        endpoint=feature_server_endpoint,
-        features=feature_refs,
-        entity_rows=entity_rows,
-        full_feature_names=full_feature_names,
-    )
-
-    # Check that the response has the expected set of keys.
-    keys = set(online_features_dict.keys())
-    expected_keys = set(
-        f.replace(":", "__") if full_feature_names else f.split(":")[-1]
-        for f in feature_refs
-    ) | {"location_id"}
-    assert (
-        keys == expected_keys
-    ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
-
-    # Check that the feature values match.
-    tc = unittest.TestCase()
-    for i, entity_row in enumerate(entity_rows):
-        df_features = get_latest_feature_values_from_location_df(
-            entity_row, datasets.location_df
-        )
-
-        assert df_features["location_id"] == online_features_dict["location_id"][i]
-        for unprefixed_feature_ref in unprefixed_feature_refs:
-            tc.assertAlmostEqual(
-                df_features[unprefixed_feature_ref],
-                online_features_dict[
-                    response_feature_name(
-                        unprefixed_feature_ref, feature_refs, full_feature_names
-                    )
-                ][i],
-                delta=0.0001,
-            )
-
-
-@pytest.mark.integration
-@pytest.mark.universal_online_stores
-# @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
-@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
-def test_online_retrieval(
-    environment, universal_data_sources, feature_server_endpoint, full_feature_names
-):
-    fs = environment.feature_store
-    entities, datasets, data_sources = universal_data_sources
-    feature_views = construct_universal_feature_views(data_sources)
-
-    feature_service = FeatureService(
-        "convrate_plus100",
-        features=[
-            feature_views.driver[["conv_rate"]],
-            feature_views.driver_odfv,
-            feature_views.customer[["current_balance"]],
-        ],
-    )
-    feature_service_entity_mapping = FeatureService(
-        name="entity_mapping",
-        features=[
-            feature_views.location.with_name("origin").with_join_key_map(
-                {"location_id": "origin_id"}
-            ),
-            feature_views.location.with_name("destination").with_join_key_map(
-                {"location_id": "destination_id"}
-            ),
-        ],
-    )
-
-    feast_objects = []
-    feast_objects.extend(feature_views.values())
-    feast_objects.extend(
-        [
-            driver(),
-            customer(),
-            location(),
-            feature_service,
-            feature_service_entity_mapping,
-        ]
-    )
-    fs.apply(feast_objects)
-    fs.materialize(
-        environment.start_date - timedelta(days=1),
-        environment.end_date + timedelta(days=1),
-    )
-
-    entity_sample = datasets.orders_df.sample(10)[
-        ["customer_id", "driver_id", "order_id", "event_timestamp"]
-    ]
-    orders_df = datasets.orders_df[
-        (
-            datasets.orders_df["customer_id"].isin(entity_sample["customer_id"])
-            & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"])
-        )
-    ]
-
-    sample_drivers = entity_sample["driver_id"]
-    drivers_df = datasets.driver_df[
-        datasets.driver_df["driver_id"].isin(sample_drivers)
-    ]
-
-    sample_customers = entity_sample["customer_id"]
-    customers_df = datasets.customer_df[
-        datasets.customer_df["customer_id"].isin(sample_customers)
-    ]
-
-    location_pairs = np.array(list(itertools.permutations(entities.location_vals, 2)))
-    sample_location_pairs = location_pairs[
-        np.random.choice(len(location_pairs), 10)
-    ].T.tolist()
-    origins_df = datasets.location_df[
-        datasets.location_df["location_id"].isin(sample_location_pairs[0])
-    ]
-    destinations_df = datasets.location_df[
-        datasets.location_df["location_id"].isin(sample_location_pairs[1])
-    ]
-
-    global_df = datasets.global_df
-
-    entity_rows = [
-        {"driver_id": d, "customer_id": c, "val_to_add": 50}
-        for (d, c) in zip(sample_drivers, sample_customers)
-    ]
-
-    feature_refs = [
-        "driver_stats:conv_rate",
-        "driver_stats:avg_daily_trips",
-        "customer_profile:current_balance",
-        "customer_profile:avg_passenger_count",
-        "customer_profile:lifetime_trip_count",
-        "conv_rate_plus_100:conv_rate_plus_100",
-        "conv_rate_plus_100:conv_rate_plus_val_to_add",
-        "order:order_is_success",
-        "global_stats:num_rides",
-        "global_stats:avg_ride_length",
-    ]
-    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
-    # Remove the on demand feature view output features, since they're not present in the source dataframe
-    unprefixed_feature_refs.remove("conv_rate_plus_100")
-    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")
-
-    online_features_dict = get_online_features_dict(
-        environment=environment,
-        endpoint=feature_server_endpoint,
-        features=feature_refs,
-        entity_rows=entity_rows,
-        full_feature_names=full_feature_names,
-    )
-
-    # Test that the on demand feature views compute properly even if the dependent conv_rate
-    # feature isn't requested.
-    online_features_no_conv_rate = get_online_features_dict(
-        environment=environment,
-        endpoint=feature_server_endpoint,
-        features=[ref for ref in feature_refs if ref != "driver_stats:conv_rate"],
-        entity_rows=entity_rows,
-        full_feature_names=full_feature_names,
-    )
-
-    assert online_features_no_conv_rate is not None
-
-    keys = set(online_features_dict.keys())
-    expected_keys = set(
-        f.replace(":", "__") if full_feature_names else f.split(":")[-1]
-        for f in feature_refs
-    ) | {"customer_id", "driver_id"}
-    assert (
-        keys == expected_keys
-    ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
-
-    tc = unittest.TestCase()
-    for i, entity_row in enumerate(entity_rows):
-        df_features = get_latest_feature_values_from_dataframes(
-            driver_df=drivers_df,
-            customer_df=customers_df,
-            orders_df=orders_df,
-            global_df=global_df,
-            entity_row=entity_row,
-        )
-
-        assert df_features["customer_id"] == online_features_dict["customer_id"][i]
-        assert df_features["driver_id"] == online_features_dict["driver_id"][i]
-        tc.assertAlmostEqual(
-            online_features_dict[
-                response_feature_name(
-                    "conv_rate_plus_100", feature_refs, full_feature_names
-                )
-            ][i],
-            df_features["conv_rate"] + 100,
-            delta=0.0001,
-        )
-        tc.assertAlmostEqual(
-            online_features_dict[
-                response_feature_name(
-                    "conv_rate_plus_val_to_add", feature_refs, full_feature_names
-                )
-            ][i],
-            df_features["conv_rate"] + df_features["val_to_add"],
-            delta=0.0001,
-        )
-        for unprefixed_feature_ref in unprefixed_feature_refs:
-            tc.assertAlmostEqual(
-                df_features[unprefixed_feature_ref],
-                online_features_dict[
-                    response_feature_name(
-                        unprefixed_feature_ref, feature_refs, full_feature_names
-                    )
-                ][i],
-                delta=0.0001,
-            )
-
-    # Check what happens for missing values
-    missing_responses_dict = get_online_features_dict(
-        environment=environment,
-        endpoint=feature_server_endpoint,
-        features=feature_refs,
-        entity_rows=[{"driver_id": 0, "customer_id": 0, "val_to_add": 100}],
-        full_feature_names=full_feature_names,
-    )
-    assert missing_responses_dict is not None
-    for unprefixed_feature_ref in unprefixed_feature_refs:
-        if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
-            tc.assertIsNone(
-                missing_responses_dict[
-                    response_feature_name(
-                        unprefixed_feature_ref, feature_refs, full_feature_names
-                    )
-                ][0]
-            )
-
-    # Check what happens for missing request data
-    with pytest.raises(RequestDataNotFoundInEntityRowsException):
-        get_online_features_dict(
-            environment=environment,
-            endpoint=feature_server_endpoint,
-            features=feature_refs,
-            entity_rows=[{"driver_id": 0, "customer_id": 0}],
-            full_feature_names=full_feature_names,
-        )
-
-    assert_feature_service_correctness(
-        environment,
-        feature_server_endpoint,
-        feature_service,
-        entity_rows,
-        full_feature_names,
-        drivers_df,
-        customers_df,
-        orders_df,
-        global_df,
-    )
-
-    entity_rows = [
-        {"origin_id": origin, "destination_id": destination}
-        for (_driver, _customer, origin, destination) in zip(
-            sample_drivers, sample_customers, *sample_location_pairs
-        )
-    ]
-    assert_feature_service_entity_mapping_correctness(
-        environment,
-        feature_server_endpoint,
-        feature_service_entity_mapping,
-        entity_rows,
-        full_feature_names,
-        origins_df,
-        destinations_df,
-    )
-
-
-@pytest.mark.integration
-@pytest.mark.universal_online_stores(only=["redis"])
-def test_online_store_cleanup(environment, universal_data_sources):
-    """
-    Some online store implementations (like Redis) keep features from different features views
-    but with common entities together.
-    This might end up with deletion of all features attached to the entity,
-    when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150).
-
-    Plan:
-        1. Register two feature views with common entity "driver"
-        2. Materialize data
-        3. Check if features are available (via online retrieval)
-        4. Delete one feature view
-        5. Check that features for other are still available
-        6. Delete another feature view (and create again)
-        7. Verify that features for both feature view were deleted
-    """
-    fs = environment.feature_store
-    entities, datasets, data_sources = universal_data_sources
-    driver_stats_fv = construct_universal_feature_views(data_sources).driver
-
-    driver_entities = entities.driver_vals
-    df = pd.DataFrame(
-        {
-            "ts_1": [environment.end_date] * len(driver_entities),
-            "created_ts": [environment.end_date] * len(driver_entities),
-            "driver_id": driver_entities,
-            "value": np.random.random(size=len(driver_entities)),
-        }
-    )
-
-    ds = environment.data_source_creator.create_data_source(
-        df, destination_name="simple_driver_dataset"
-    )
-
-    simple_driver_fv = driver_feature_view(
-        data_source=ds, name="test_universal_online_simple_driver"
-    )
-
-    fs.apply([driver(), simple_driver_fv, driver_stats_fv])
-
-    fs.materialize(
-        environment.start_date - timedelta(days=1),
-        environment.end_date + timedelta(days=1),
-    )
-    expected_values = df.sort_values(by="driver_id")
-
-    features = [f"{simple_driver_fv.name}:value"]
-    entity_rows = [{"driver_id": driver_id} for driver_id in sorted(driver_entities)]
-
-    online_features = fs.get_online_features(
-        features=features, entity_rows=entity_rows
-    ).to_dict()
-    assert np.allclose(expected_values["value"], online_features["value"])
-
-    fs.apply(
-        objects=[simple_driver_fv], objects_to_delete=[driver_stats_fv], partial=False
-    )
-
-    online_features = fs.get_online_features(
-        features=features, entity_rows=entity_rows
-    ).to_dict()
-    assert np.allclose(expected_values["value"], online_features["value"])
-
-    fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False)
-
-    def eventually_apply() -> Tuple[None, bool]:
-        try:
-            fs.apply([simple_driver_fv])
-        except BotoCoreError:
-            return None, False
-
-        return None, True
-
-    # Online store backend might have eventual consistency in schema update
-    # So recreating table that was just deleted might need some retries
-    wait_retry_backoff(eventually_apply, timeout_secs=60)
-
-    online_features = fs.get_online_features(
-        features=features, entity_rows=entity_rows
-    ).to_dict()
-    assert all(v is None for v in online_features["value"])
-
-
-def response_feature_name(
-    feature: str, feature_refs: List[str], full_feature_names: bool
-) -> str:
-    if not full_feature_names:
-        return feature
-
-    for feature_ref in feature_refs:
-        if feature_ref.endswith(feature):
-            return feature_ref.replace(":", "__")
-
-    return feature
-
-
-def get_latest_row(entity_row, df, join_key, entity_key):
-    rows = df[df[join_key] == entity_row[entity_key]]
-    return rows.loc[rows["event_timestamp"].idxmax()].to_dict()
-
-
-def get_latest_feature_values_from_dataframes(
-    driver_df,
-    customer_df,
-    orders_df,
-    entity_row,
-    global_df=None,
-    origin_df=None,
-    destination_df=None,
-):
-    latest_driver_row = get_latest_row(entity_row, driver_df, "driver_id", "driver_id")
-    latest_customer_row = get_latest_row(
-        entity_row, customer_df, "customer_id", "customer_id"
-    )
-
-    # Since the event timestamp columns may contain timestamps of different timezones,
-    # we must first convert the timestamps to UTC before we can compare them.
-    order_rows = orders_df[
-        (orders_df["driver_id"] == entity_row["driver_id"])
-        & (orders_df["customer_id"] == entity_row["customer_id"])
-    ]
-    timestamps = order_rows[["event_timestamp"]]
-    timestamps["event_timestamp"] = pd.to_datetime(
-        timestamps["event_timestamp"], utc=True
-    )
-    max_index = timestamps["event_timestamp"].idxmax()
-    latest_orders_row = order_rows.loc[max_index]
-
-    if global_df is not None:
-        latest_global_row = global_df.loc[
-            global_df["event_timestamp"].idxmax()
-        ].to_dict()
-    if origin_df is not None:
-        latest_location_row = get_latest_feature_values_for_location_df(
-            entity_row, origin_df, destination_df
-        )
-
-    request_data_features = entity_row.copy()
-    request_data_features.pop("driver_id")
-    request_data_features.pop("customer_id")
-    if global_df is not None:
-        return {
-            **latest_customer_row,
-            **latest_driver_row,
-            **latest_orders_row,
-            **latest_global_row,
-            **request_data_features,
-        }
-    if origin_df is not None:
-        request_data_features.pop("origin_id")
-        request_data_features.pop("destination_id")
-        return {
-            **latest_customer_row,
-            **latest_driver_row,
-            **latest_orders_row,
-            **latest_location_row,
-            **request_data_features,
-        }
-    return {
-        **latest_customer_row,
-        **latest_driver_row,
-        **latest_orders_row,
-        **request_data_features,
-    }
-
-
-def get_latest_feature_values_for_location_df(entity_row, origin_df, destination_df):
-    latest_origin_row = get_latest_row(
-        entity_row, origin_df, "location_id", "origin_id"
-    )
-    latest_destination_row = get_latest_row(
-        entity_row, destination_df, "location_id", "destination_id"
-    )
-    # Need full feature names for shadow entities
-    latest_origin_row["origin__temperature"] = latest_origin_row.pop("temperature")
-    latest_destination_row["destination__temperature"] = latest_destination_row.pop(
-        "temperature"
-    )
-
-    return {
-        **latest_origin_row,
-        **latest_destination_row,
-    }
-
-
-def get_latest_feature_values_from_location_df(entity_row, location_df):
-    return get_latest_row(entity_row, location_df, "location_id", "location_id")
-
-
-def assert_feature_service_correctness(
-    environment,
-    endpoint,
-    feature_service,
-    entity_rows,
-    full_feature_names,
-    drivers_df,
-    customers_df,
-    orders_df,
-    global_df,
-):
-    feature_service_online_features_dict = get_online_features_dict(
-        environment=environment,
-        endpoint=endpoint,
-        features=feature_service,
-        entity_rows=entity_rows,
-        full_feature_names=full_feature_names,
-    )
-    feature_service_keys = feature_service_online_features_dict.keys()
-    expected_feature_refs = [
-        f"{projection.name_to_use()}__{feature.name}"
-        if full_feature_names
-        else feature.name
-        for projection in feature_service.feature_view_projections
-        for feature in projection.features
-    ]
-    assert set(feature_service_keys) == set(expected_feature_refs) | {
-        "customer_id",
-        "driver_id",
-    }
-
-    tc = unittest.TestCase()
-    for i, entity_row in enumerate(entity_rows):
-        df_features = get_latest_feature_values_from_dataframes(
-            driver_df=drivers_df,
-            customer_df=customers_df,
-            orders_df=orders_df,
-            global_df=global_df,
-            entity_row=entity_row,
-        )
-        tc.assertAlmostEqual(
-            feature_service_online_features_dict[
-                response_feature_name(
-                    "conv_rate_plus_100", expected_feature_refs, full_feature_names
-                )
-            ][i],
-            df_features["conv_rate"] + 100,
-            delta=0.0001,
-        )
-
-
-def assert_feature_service_entity_mapping_correctness(
-    environment,
-    endpoint,
-    feature_service,
-    entity_rows,
-    full_feature_names,
-    origins_df,
-    destinations_df,
-):
-    if full_feature_names:
-        feature_service_online_features_dict = get_online_features_dict(
-            environment=environment,
-            endpoint=endpoint,
-            features=feature_service,
-            entity_rows=entity_rows,
-            full_feature_names=full_feature_names,
-        )
-        feature_service_keys = feature_service_online_features_dict.keys()
-
-        expected_features = [
-            f"{projection.name_to_use()}__{feature.name}"
-            if full_feature_names
-            else feature.name
-            for projection in feature_service.feature_view_projections
-            for feature in projection.features
-        ]
-        assert set(feature_service_keys) == set(expected_features) | {
-            "destination_id",
-            "origin_id",
-        }
-
-        for i, entity_row in enumerate(entity_rows):
-            df_features = get_latest_feature_values_for_location_df(
-                origin_df=origins_df,
-                destination_df=destinations_df,
-                entity_row=entity_row,
-            )
-            for feature_name in ["origin__temperature", "destination__temperature"]:
-                assert (
-                    feature_service_online_features_dict[feature_name][i]
-                    == df_features[feature_name]
-                )
-    else:
-        # using 2 of the same FeatureView without full_feature_names=True will result in collision
-        with pytest.raises(FeatureNameCollisionError):
-            get_online_features_dict(
-                environment=environment,
-                endpoint=endpoint,
-                features=feature_service,
-                entity_rows=entity_rows,
-                full_feature_names=full_feature_names,
-            )
+# @pytest.mark.integration
+# @pytest.mark.universal_online_stores
+# # @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
+# @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+# def test_stream_feature_view_online_retrieval(
+#     environment, universal_data_sources, feature_server_endpoint, full_feature_names
+# ):
+#     """
+#     Tests materialization and online retrieval for stream feature views.
+
+#     This test is separate from test_online_retrieval since combining feature views and
+#     stream feature views into a single test resulted in test flakiness. This is tech
+#     debt that should be resolved soon.
+#     """
+#     # Set up feature store.
+#     fs = environment.feature_store
+#     entities, datasets, data_sources = universal_data_sources
+#     feature_views = construct_universal_feature_views(data_sources)
+#     pushable_feature_view = feature_views.pushed_locations
+#     fs.apply([location(), pushable_feature_view])
+
+#     # Materialize.
+#     fs.materialize(
+#         environment.start_date - timedelta(days=1),
+#         environment.end_date + timedelta(days=1),
+#     )
+
+#     # Get online features by randomly sampling 10 entities that exist in the batch source.
+#     sample_locations = datasets.location_df.sample(10)["location_id"]
+#     entity_rows = [
+#         {"location_id": sample_location} for sample_location in sample_locations
+#     ]
+
+#     feature_refs = [
+#         "pushable_location_stats:temperature",
+#     ]
+#     unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
+
+#     online_features_dict = get_online_features_dict(
+#         environment=environment,
+#         endpoint=feature_server_endpoint,
+#         features=feature_refs,
+#         entity_rows=entity_rows,
+#         full_feature_names=full_feature_names,
+#     )
+
+#     # Check that the response has the expected set of keys.
+#     keys = set(online_features_dict.keys())
+#     expected_keys = set(
+#         f.replace(":", "__") if full_feature_names else f.split(":")[-1]
+#         for f in feature_refs
+#     ) | {"location_id"}
+#     assert (
+#         keys == expected_keys
+#     ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
+
+#     # Check that the feature values match.
+#     tc = unittest.TestCase()
+#     for i, entity_row in enumerate(entity_rows):
+#         df_features = get_latest_feature_values_from_location_df(
+#             entity_row, datasets.location_df
+#         )
+
+#         assert df_features["location_id"] == online_features_dict["location_id"][i]
+#         for unprefixed_feature_ref in unprefixed_feature_refs:
+#             tc.assertAlmostEqual(
+#                 df_features[unprefixed_feature_ref],
+#                 online_features_dict[
+#                     response_feature_name(
+#                         unprefixed_feature_ref, feature_refs, full_feature_names
+#                     )
+#                 ][i],
+#                 delta=0.0001,
+#             )
+
+
+# @pytest.mark.integration
+# @pytest.mark.universal_online_stores
+# # @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
+# @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+# def test_online_retrieval(
+#     environment, universal_data_sources, feature_server_endpoint, full_feature_names
+# ):
+#     fs = environment.feature_store
+#     entities, datasets, data_sources = universal_data_sources
+#     feature_views = construct_universal_feature_views(data_sources)
+
+#     feature_service = FeatureService(
+#         "convrate_plus100",
+#         features=[
+#             feature_views.driver[["conv_rate"]],
+#             feature_views.driver_odfv,
+#             feature_views.customer[["current_balance"]],
+#         ],
+#     )
+#     feature_service_entity_mapping = FeatureService(
+#         name="entity_mapping",
+#         features=[
+#             feature_views.location.with_name("origin").with_join_key_map(
+#                 {"location_id": "origin_id"}
+#             ),
+#             feature_views.location.with_name("destination").with_join_key_map(
+#                 {"location_id": "destination_id"}
+#             ),
+#         ],
+#     )
+
+#     feast_objects = []
+#     feast_objects.extend(feature_views.values())
+#     feast_objects.extend(
+#         [
+#             driver(),
+#             customer(),
+#             location(),
+#             feature_service,
+#             feature_service_entity_mapping,
+#         ]
+#     )
+#     fs.apply(feast_objects)
+#     fs.materialize(
+#         environment.start_date - timedelta(days=1),
+#         environment.end_date + timedelta(days=1),
+#     )
+
+#     entity_sample = datasets.orders_df.sample(10)[
+#         ["customer_id", "driver_id", "order_id", "event_timestamp"]
+#     ]
+#     orders_df = datasets.orders_df[
+#         (
+#             datasets.orders_df["customer_id"].isin(entity_sample["customer_id"])
+#             & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"])
+#         )
+#     ]
+
+#     sample_drivers = entity_sample["driver_id"]
+#     drivers_df = datasets.driver_df[
+#         datasets.driver_df["driver_id"].isin(sample_drivers)
+#     ]
+
+#     sample_customers = entity_sample["customer_id"]
+#     customers_df = datasets.customer_df[
+#         datasets.customer_df["customer_id"].isin(sample_customers)
+#     ]
+
+#     location_pairs = np.array(list(itertools.permutations(entities.location_vals, 2)))
+#     sample_location_pairs = location_pairs[
+#         np.random.choice(len(location_pairs), 10)
+#     ].T.tolist()
+#     origins_df = datasets.location_df[
+#         datasets.location_df["location_id"].isin(sample_location_pairs[0])
+#     ]
+#     destinations_df = datasets.location_df[
+#         datasets.location_df["location_id"].isin(sample_location_pairs[1])
+#     ]
+
+#     global_df = datasets.global_df
+
+#     entity_rows = [
+#         {"driver_id": d, "customer_id": c, "val_to_add": 50}
+#         for (d, c) in zip(sample_drivers, sample_customers)
+#     ]
+
+#     feature_refs = [
+#         "driver_stats:conv_rate",
+#         "driver_stats:avg_daily_trips",
+#         "customer_profile:current_balance",
+#         "customer_profile:avg_passenger_count",
+#         "customer_profile:lifetime_trip_count",
+#         "conv_rate_plus_100:conv_rate_plus_100",
+#         "conv_rate_plus_100:conv_rate_plus_val_to_add",
+#         "order:order_is_success",
+#         "global_stats:num_rides",
+#         "global_stats:avg_ride_length",
+#     ]
+#     unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
+#     # Remove the on demand feature view output features, since they're not present in the source dataframe
+#     unprefixed_feature_refs.remove("conv_rate_plus_100")
+#     unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")
+
+#     online_features_dict = get_online_features_dict(
+#         environment=environment,
+#         endpoint=feature_server_endpoint,
+#         features=feature_refs,
+#         entity_rows=entity_rows,
+#         full_feature_names=full_feature_names,
+#     )
+
+#     # Test that the on demand feature views compute properly even if the dependent conv_rate
+#     # feature isn't requested.
+#     online_features_no_conv_rate = get_online_features_dict(
+#         environment=environment,
+#         endpoint=feature_server_endpoint,
+#         features=[ref for ref in feature_refs if ref != "driver_stats:conv_rate"],
+#         entity_rows=entity_rows,
+#         full_feature_names=full_feature_names,
+#     )
+
+#     assert online_features_no_conv_rate is not None
+
+#     keys = set(online_features_dict.keys())
+#     expected_keys = set(
+#         f.replace(":", "__") if full_feature_names else f.split(":")[-1]
+#         for f in feature_refs
+#     ) | {"customer_id", "driver_id"}
+#     assert (
+#         keys == expected_keys
+#     ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
+
+#     tc = unittest.TestCase()
+#     for i, entity_row in enumerate(entity_rows):
+#         df_features = get_latest_feature_values_from_dataframes(
+#             driver_df=drivers_df,
+#             customer_df=customers_df,
+#             orders_df=orders_df,
+#             global_df=global_df,
+#             entity_row=entity_row,
+#         )
+
+#         assert df_features["customer_id"] == online_features_dict["customer_id"][i]
+#         assert df_features["driver_id"] == online_features_dict["driver_id"][i]
+#         tc.assertAlmostEqual(
+#             online_features_dict[
+#                 response_feature_name(
+#                     "conv_rate_plus_100", feature_refs, full_feature_names
+#                 )
+#             ][i],
+#             df_features["conv_rate"] + 100,
+#             delta=0.0001,
+#         )
+#         tc.assertAlmostEqual(
+#             online_features_dict[
+#                 response_feature_name(
+#                     "conv_rate_plus_val_to_add", feature_refs, full_feature_names
+#                 )
+#             ][i],
+#             df_features["conv_rate"] + df_features["val_to_add"],
+#             delta=0.0001,
+#         )
+#         for unprefixed_feature_ref in unprefixed_feature_refs:
+#             tc.assertAlmostEqual(
+#                 df_features[unprefixed_feature_ref],
+#                 online_features_dict[
+#                     response_feature_name(
+#                         unprefixed_feature_ref, feature_refs, full_feature_names
+#                     )
+#                 ][i],
+#                 delta=0.0001,
+#             )
+
+#     # Check what happens for missing values
+#     missing_responses_dict = get_online_features_dict(
+#         environment=environment,
+#         endpoint=feature_server_endpoint,
+#         features=feature_refs,
+#         entity_rows=[{"driver_id": 0, "customer_id": 0, "val_to_add": 100}],
+#         full_feature_names=full_feature_names,
+#     )
+#     assert missing_responses_dict is not None
+#     for unprefixed_feature_ref in unprefixed_feature_refs:
+#         if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
+#             tc.assertIsNone(
+#                 missing_responses_dict[
+#                     response_feature_name(
+#                         unprefixed_feature_ref, feature_refs, full_feature_names
+#                     )
+#                 ][0]
+#             )
+
+#     # Check what happens for missing request data
+#     with pytest.raises(RequestDataNotFoundInEntityRowsException):
+#         get_online_features_dict(
+#             environment=environment,
+#             endpoint=feature_server_endpoint,
+#             features=feature_refs,
+#             entity_rows=[{"driver_id": 0, "customer_id": 0}],
+#             full_feature_names=full_feature_names,
+#         )
+
+#     assert_feature_service_correctness(
+#         environment,
+#         feature_server_endpoint,
+#         feature_service,
+#         entity_rows,
+#         full_feature_names,
+#         drivers_df,
+#         customers_df,
+#         orders_df,
+#         global_df,
+#     )
+
+#     entity_rows = [
+#         {"origin_id": origin, "destination_id": destination}
+#         for (_driver, _customer, origin, destination) in zip(
+#             sample_drivers, sample_customers, *sample_location_pairs
+#         )
+#     ]
+#     assert_feature_service_entity_mapping_correctness(
+#         environment,
+#         feature_server_endpoint,
+#         feature_service_entity_mapping,
+#         entity_rows,
+#         full_feature_names,
+#         origins_df,
+#         destinations_df,
+#     )
+
+
+# @pytest.mark.integration
+# @pytest.mark.universal_online_stores(only=["redis"])
+# def test_online_store_cleanup(environment, universal_data_sources):
+#     """
+#     Some online store implementations (like Redis) keep features from different features views
+#     but with common entities together.
+#     This might end up with deletion of all features attached to the entity,
+#     when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150).
+
+#     Plan:
+#         1. Register two feature views with common entity "driver"
+#         2. Materialize data
+#         3. Check if features are available (via online retrieval)
+#         4. Delete one feature view
+#         5. Check that features for other are still available
+#         6. Delete another feature view (and create again)
+#         7. Verify that features for both feature view were deleted
+#     """
+#     fs = environment.feature_store
+#     entities, datasets, data_sources = universal_data_sources
+#     driver_stats_fv = construct_universal_feature_views(data_sources).driver
+
+#     driver_entities = entities.driver_vals
+#     df = pd.DataFrame(
+#         {
+#             "ts_1": [environment.end_date] * len(driver_entities),
+#             "created_ts": [environment.end_date] * len(driver_entities),
+#             "driver_id": driver_entities,
+#             "value": np.random.random(size=len(driver_entities)),
+#         }
+#     )
+
+#     ds = environment.data_source_creator.create_data_source(
+#         df, destination_name="simple_driver_dataset"
+#     )
+
+#     simple_driver_fv = driver_feature_view(
+#         data_source=ds, name="test_universal_online_simple_driver"
+#     )
+
+#     fs.apply([driver(), simple_driver_fv, driver_stats_fv])
+
+#     fs.materialize(
+#         environment.start_date - timedelta(days=1),
+#         environment.end_date + timedelta(days=1),
+#     )
+#     expected_values = df.sort_values(by="driver_id")
+
+#     features = [f"{simple_driver_fv.name}:value"]
+#     entity_rows = [{"driver_id": driver_id} for driver_id in sorted(driver_entities)]
+
+#     online_features = fs.get_online_features(
+#         features=features, entity_rows=entity_rows
+#     ).to_dict()
+#     assert np.allclose(expected_values["value"], online_features["value"])
+
+#     fs.apply(
+#         objects=[simple_driver_fv], objects_to_delete=[driver_stats_fv], partial=False
+#     )
+
+#     online_features = fs.get_online_features(
+#         features=features, entity_rows=entity_rows
+#     ).to_dict()
+#     assert np.allclose(expected_values["value"], online_features["value"])
+
+#     fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False)
+
+#     def eventually_apply() -> Tuple[None, bool]:
+#         try:
+#             fs.apply([simple_driver_fv])
+#         except BotoCoreError:
+#             return None, False
+
+#         return None, True
+
+#     # Online store backend might have eventual consistency in schema update
+#     # So recreating table that was just deleted might need some retries
+#     wait_retry_backoff(eventually_apply, timeout_secs=60)
+
+#     online_features = fs.get_online_features(
+#         features=features, entity_rows=entity_rows
+#     ).to_dict()
+#     assert all(v is None for v in online_features["value"])
+
+
+# def response_feature_name(
+#     feature: str, feature_refs: List[str], full_feature_names: bool
+# ) -> str:
+#     if not full_feature_names:
+#         return feature
+
+#     for feature_ref in feature_refs:
+#         if feature_ref.endswith(feature):
+#             return feature_ref.replace(":", "__")
+
+#     return feature
+
+
+# def get_latest_row(entity_row, df, join_key, entity_key):
+#     rows = df[df[join_key] == entity_row[entity_key]]
+#     return rows.loc[rows["event_timestamp"].idxmax()].to_dict()
+
+
+# def get_latest_feature_values_from_dataframes(
+#     driver_df,
+#     customer_df,
+#     orders_df,
+#     entity_row,
+#     global_df=None,
+#     origin_df=None,
+#     destination_df=None,
+# ):
+#     latest_driver_row = get_latest_row(entity_row, driver_df, "driver_id", "driver_id")
+#     latest_customer_row = get_latest_row(
+#         entity_row, customer_df, "customer_id", "customer_id"
+#     )
+
+#     # Since the event timestamp columns may contain timestamps of different timezones,
+#     # we must first convert the timestamps to UTC before we can compare them.
+#     order_rows = orders_df[
+#         (orders_df["driver_id"] == entity_row["driver_id"])
+#         & (orders_df["customer_id"] == entity_row["customer_id"])
+#     ]
+#     timestamps = order_rows[["event_timestamp"]]
+#     timestamps["event_timestamp"] = pd.to_datetime(
+#         timestamps["event_timestamp"], utc=True
+#     )
+#     max_index = timestamps["event_timestamp"].idxmax()
+#     latest_orders_row = order_rows.loc[max_index]
+
+#     if global_df is not None:
+#         latest_global_row = global_df.loc[
+#             global_df["event_timestamp"].idxmax()
+#         ].to_dict()
+#     if origin_df is not None:
+#         latest_location_row = get_latest_feature_values_for_location_df(
+#             entity_row, origin_df, destination_df
+#         )
+
+#     request_data_features = entity_row.copy()
+#     request_data_features.pop("driver_id")
+#     request_data_features.pop("customer_id")
+#     if global_df is not None:
+#         return {
+#             **latest_customer_row,
+#             **latest_driver_row,
+#             **latest_orders_row,
+#             **latest_global_row,
+#             **request_data_features,
+#         }
+#     if origin_df is not None:
+#         request_data_features.pop("origin_id")
+#         request_data_features.pop("destination_id")
+#         return {
+#             **latest_customer_row,
+#             **latest_driver_row,
+#             **latest_orders_row,
+#             **latest_location_row,
+#             **request_data_features,
+#         }
+#     return {
+#         **latest_customer_row,
+#         **latest_driver_row,
+#         **latest_orders_row,
+#         **request_data_features,
+#     }
+
+
+# def get_latest_feature_values_for_location_df(entity_row, origin_df, destination_df):
+#     latest_origin_row = get_latest_row(
+#         entity_row, origin_df, "location_id", "origin_id"
+#     )
+#     latest_destination_row = get_latest_row(
+#         entity_row, destination_df, "location_id", "destination_id"
+#     )
+#     # Need full feature names for shadow entities
+#     latest_origin_row["origin__temperature"] = latest_origin_row.pop("temperature")
+#     latest_destination_row["destination__temperature"] = latest_destination_row.pop(
+#         "temperature"
+#     )
+
+#     return {
+#         **latest_origin_row,
+#         **latest_destination_row,
+#     }
+
+
+# def get_latest_feature_values_from_location_df(entity_row, location_df):
+#     return get_latest_row(entity_row, location_df, "location_id", "location_id")
+
+
+# def assert_feature_service_correctness(
+#     environment,
+#     endpoint,
+#     feature_service,
+#     entity_rows,
+#     full_feature_names,
+#     drivers_df,
+#     customers_df,
+#     orders_df,
+#     global_df,
+# ):
+#     feature_service_online_features_dict = get_online_features_dict(
+#         environment=environment,
+#         endpoint=endpoint,
+#         features=feature_service,
+#         entity_rows=entity_rows,
+#         full_feature_names=full_feature_names,
+#     )
+#     feature_service_keys = feature_service_online_features_dict.keys()
+#     expected_feature_refs = [
+#         f"{projection.name_to_use()}__{feature.name}"
+#         if full_feature_names
+#         else feature.name
+#         for projection in feature_service.feature_view_projections
+#         for feature in projection.features
+#     ]
+#     assert set(feature_service_keys) == set(expected_feature_refs) | {
+#         "customer_id",
+#         "driver_id",
+#     }
+
+#     tc = unittest.TestCase()
+#     for i, entity_row in enumerate(entity_rows):
+#         df_features = get_latest_feature_values_from_dataframes(
+#             driver_df=drivers_df,
+#             customer_df=customers_df,
+#             orders_df=orders_df,
+#             global_df=global_df,
+#             entity_row=entity_row,
+#         )
+#         tc.assertAlmostEqual(
+#             feature_service_online_features_dict[
+#                 response_feature_name(
+#                     "conv_rate_plus_100", expected_feature_refs, full_feature_names
+#                 )
+#             ][i],
+#             df_features["conv_rate"] + 100,
+#             delta=0.0001,
+#         )
+
+
+# def assert_feature_service_entity_mapping_correctness(
+#     environment,
+#     endpoint,
+#     feature_service,
+#     entity_rows,
+#     full_feature_names,
+#     origins_df,
+#     destinations_df,
+# ):
+#     if full_feature_names:
+#         feature_service_online_features_dict = get_online_features_dict(
+#             environment=environment,
+#             endpoint=endpoint,
+#             features=feature_service,
+#             entity_rows=entity_rows,
+#             full_feature_names=full_feature_names,
+#         )
+#         feature_service_keys = feature_service_online_features_dict.keys()
+
+#         expected_features = [
+#             f"{projection.name_to_use()}__{feature.name}"
+#             if full_feature_names
+#             else feature.name
+#             for projection in feature_service.feature_view_projections
+#             for feature in projection.features
+#         ]
+#         assert set(feature_service_keys) == set(expected_features) | {
+#             "destination_id",
+#             "origin_id",
+#         }
+
+#         for i, entity_row in enumerate(entity_rows):
+#             df_features = get_latest_feature_values_for_location_df(
+#                 origin_df=origins_df,
+#                 destination_df=destinations_df,
+#                 entity_row=entity_row,
+#             )
+#             for feature_name in ["origin__temperature", "destination__temperature"]:
+#                 assert (
+#                     feature_service_online_features_dict[feature_name][i]
+#                     == df_features[feature_name]
+#                 )
+#     else:
+#         # using 2 of the same FeatureView without full_feature_names=True will result in collision
+#         with pytest.raises(FeatureNameCollisionError):
+#             get_online_features_dict(
+#                 environment=environment,
+#                 endpoint=endpoint,
+#                 features=feature_service,
+#                 entity_rows=entity_rows,
+#                 full_feature_names=full_feature_names,
+#             )

From c3075c48e0cf968f1e87d416c0a46c5392e3fef1 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 21 Jun 2022 14:46:10 -0700
Subject: [PATCH 19/30] Fix test

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../online_store/test_universal_online.py     | 960 +++++++++---------
 1 file changed, 480 insertions(+), 480 deletions(-)

diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index 3d066e7ba7..9d4db3e03e 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -517,528 +517,528 @@ def test_online_retrieval_with_event_timestamps(
 #             )
 
 
-# @pytest.mark.integration
-# @pytest.mark.universal_online_stores
-# # @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
-# @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
-# def test_online_retrieval(
-#     environment, universal_data_sources, feature_server_endpoint, full_feature_names
-# ):
-#     fs = environment.feature_store
-#     entities, datasets, data_sources = universal_data_sources
-#     feature_views = construct_universal_feature_views(data_sources)
-
-#     feature_service = FeatureService(
-#         "convrate_plus100",
-#         features=[
-#             feature_views.driver[["conv_rate"]],
-#             feature_views.driver_odfv,
-#             feature_views.customer[["current_balance"]],
-#         ],
-#     )
-#     feature_service_entity_mapping = FeatureService(
-#         name="entity_mapping",
-#         features=[
-#             feature_views.location.with_name("origin").with_join_key_map(
-#                 {"location_id": "origin_id"}
-#             ),
-#             feature_views.location.with_name("destination").with_join_key_map(
-#                 {"location_id": "destination_id"}
-#             ),
-#         ],
-#     )
-
-#     feast_objects = []
-#     feast_objects.extend(feature_views.values())
-#     feast_objects.extend(
-#         [
-#             driver(),
-#             customer(),
-#             location(),
-#             feature_service,
-#             feature_service_entity_mapping,
-#         ]
-#     )
-#     fs.apply(feast_objects)
-#     fs.materialize(
-#         environment.start_date - timedelta(days=1),
-#         environment.end_date + timedelta(days=1),
-#     )
-
-#     entity_sample = datasets.orders_df.sample(10)[
-#         ["customer_id", "driver_id", "order_id", "event_timestamp"]
-#     ]
-#     orders_df = datasets.orders_df[
-#         (
-#             datasets.orders_df["customer_id"].isin(entity_sample["customer_id"])
-#             & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"])
-#         )
-#     ]
-
-#     sample_drivers = entity_sample["driver_id"]
-#     drivers_df = datasets.driver_df[
-#         datasets.driver_df["driver_id"].isin(sample_drivers)
-#     ]
-
-#     sample_customers = entity_sample["customer_id"]
-#     customers_df = datasets.customer_df[
-#         datasets.customer_df["customer_id"].isin(sample_customers)
-#     ]
-
-#     location_pairs = np.array(list(itertools.permutations(entities.location_vals, 2)))
-#     sample_location_pairs = location_pairs[
-#         np.random.choice(len(location_pairs), 10)
-#     ].T.tolist()
-#     origins_df = datasets.location_df[
-#         datasets.location_df["location_id"].isin(sample_location_pairs[0])
-#     ]
-#     destinations_df = datasets.location_df[
-#         datasets.location_df["location_id"].isin(sample_location_pairs[1])
-#     ]
-
-#     global_df = datasets.global_df
+@pytest.mark.integration
+@pytest.mark.universal_online_stores
+# @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
+@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+def test_online_retrieval(
+    environment, universal_data_sources, feature_server_endpoint, full_feature_names
+):
+    fs = environment.feature_store
+    entities, datasets, data_sources = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
 
-#     entity_rows = [
-#         {"driver_id": d, "customer_id": c, "val_to_add": 50}
-#         for (d, c) in zip(sample_drivers, sample_customers)
-#     ]
+    feature_service = FeatureService(
+        "convrate_plus100",
+        features=[
+            feature_views.driver[["conv_rate"]],
+            feature_views.driver_odfv,
+            feature_views.customer[["current_balance"]],
+        ],
+    )
+    feature_service_entity_mapping = FeatureService(
+        name="entity_mapping",
+        features=[
+            feature_views.location.with_name("origin").with_join_key_map(
+                {"location_id": "origin_id"}
+            ),
+            feature_views.location.with_name("destination").with_join_key_map(
+                {"location_id": "destination_id"}
+            ),
+        ],
+    )
 
-#     feature_refs = [
-#         "driver_stats:conv_rate",
-#         "driver_stats:avg_daily_trips",
-#         "customer_profile:current_balance",
-#         "customer_profile:avg_passenger_count",
-#         "customer_profile:lifetime_trip_count",
-#         "conv_rate_plus_100:conv_rate_plus_100",
-#         "conv_rate_plus_100:conv_rate_plus_val_to_add",
-#         "order:order_is_success",
-#         "global_stats:num_rides",
-#         "global_stats:avg_ride_length",
-#     ]
-#     unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
-#     # Remove the on demand feature view output features, since they're not present in the source dataframe
-#     unprefixed_feature_refs.remove("conv_rate_plus_100")
-#     unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")
+    feast_objects = []
+    feast_objects.extend(feature_views.values())
+    feast_objects.extend(
+        [
+            driver(),
+            customer(),
+            location(),
+            feature_service,
+            feature_service_entity_mapping,
+        ]
+    )
+    fs.apply(feast_objects)
+    fs.materialize(
+        environment.start_date - timedelta(days=1),
+        environment.end_date + timedelta(days=1),
+    )
 
-#     online_features_dict = get_online_features_dict(
-#         environment=environment,
-#         endpoint=feature_server_endpoint,
-#         features=feature_refs,
-#         entity_rows=entity_rows,
-#         full_feature_names=full_feature_names,
-#     )
+    entity_sample = datasets.orders_df.sample(10)[
+        ["customer_id", "driver_id", "order_id", "event_timestamp"]
+    ]
+    orders_df = datasets.orders_df[
+        (
+            datasets.orders_df["customer_id"].isin(entity_sample["customer_id"])
+            & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"])
+        )
+    ]
+
+    sample_drivers = entity_sample["driver_id"]
+    drivers_df = datasets.driver_df[
+        datasets.driver_df["driver_id"].isin(sample_drivers)
+    ]
+
+    sample_customers = entity_sample["customer_id"]
+    customers_df = datasets.customer_df[
+        datasets.customer_df["customer_id"].isin(sample_customers)
+    ]
+
+    location_pairs = np.array(list(itertools.permutations(entities.location_vals, 2)))
+    sample_location_pairs = location_pairs[
+        np.random.choice(len(location_pairs), 10)
+    ].T.tolist()
+    origins_df = datasets.location_df[
+        datasets.location_df["location_id"].isin(sample_location_pairs[0])
+    ]
+    destinations_df = datasets.location_df[
+        datasets.location_df["location_id"].isin(sample_location_pairs[1])
+    ]
+
+    global_df = datasets.global_df
+
+    entity_rows = [
+        {"driver_id": d, "customer_id": c, "val_to_add": 50}
+        for (d, c) in zip(sample_drivers, sample_customers)
+    ]
+
+    feature_refs = [
+        "driver_stats:conv_rate",
+        "driver_stats:avg_daily_trips",
+        "customer_profile:current_balance",
+        "customer_profile:avg_passenger_count",
+        "customer_profile:lifetime_trip_count",
+        "conv_rate_plus_100:conv_rate_plus_100",
+        "conv_rate_plus_100:conv_rate_plus_val_to_add",
+        "order:order_is_success",
+        "global_stats:num_rides",
+        "global_stats:avg_ride_length",
+    ]
+    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
+    # Remove the on demand feature view output features, since they're not present in the source dataframe
+    unprefixed_feature_refs.remove("conv_rate_plus_100")
+    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")
+
+    online_features_dict = get_online_features_dict(
+        environment=environment,
+        endpoint=feature_server_endpoint,
+        features=feature_refs,
+        entity_rows=entity_rows,
+        full_feature_names=full_feature_names,
+    )
 
-#     # Test that the on demand feature views compute properly even if the dependent conv_rate
-#     # feature isn't requested.
-#     online_features_no_conv_rate = get_online_features_dict(
-#         environment=environment,
-#         endpoint=feature_server_endpoint,
-#         features=[ref for ref in feature_refs if ref != "driver_stats:conv_rate"],
-#         entity_rows=entity_rows,
-#         full_feature_names=full_feature_names,
-#     )
+    # Test that the on demand feature views compute properly even if the dependent conv_rate
+    # feature isn't requested.
+    online_features_no_conv_rate = get_online_features_dict(
+        environment=environment,
+        endpoint=feature_server_endpoint,
+        features=[ref for ref in feature_refs if ref != "driver_stats:conv_rate"],
+        entity_rows=entity_rows,
+        full_feature_names=full_feature_names,
+    )
 
-#     assert online_features_no_conv_rate is not None
+    assert online_features_no_conv_rate is not None
+
+    keys = set(online_features_dict.keys())
+    expected_keys = set(
+        f.replace(":", "__") if full_feature_names else f.split(":")[-1]
+        for f in feature_refs
+    ) | {"customer_id", "driver_id"}
+    assert (
+        keys == expected_keys
+    ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
+
+    tc = unittest.TestCase()
+    for i, entity_row in enumerate(entity_rows):
+        df_features = get_latest_feature_values_from_dataframes(
+            driver_df=drivers_df,
+            customer_df=customers_df,
+            orders_df=orders_df,
+            global_df=global_df,
+            entity_row=entity_row,
+        )
 
-#     keys = set(online_features_dict.keys())
-#     expected_keys = set(
-#         f.replace(":", "__") if full_feature_names else f.split(":")[-1]
-#         for f in feature_refs
-#     ) | {"customer_id", "driver_id"}
-#     assert (
-#         keys == expected_keys
-#     ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
+        assert df_features["customer_id"] == online_features_dict["customer_id"][i]
+        assert df_features["driver_id"] == online_features_dict["driver_id"][i]
+        tc.assertAlmostEqual(
+            online_features_dict[
+                response_feature_name(
+                    "conv_rate_plus_100", feature_refs, full_feature_names
+                )
+            ][i],
+            df_features["conv_rate"] + 100,
+            delta=0.0001,
+        )
+        tc.assertAlmostEqual(
+            online_features_dict[
+                response_feature_name(
+                    "conv_rate_plus_val_to_add", feature_refs, full_feature_names
+                )
+            ][i],
+            df_features["conv_rate"] + df_features["val_to_add"],
+            delta=0.0001,
+        )
+        for unprefixed_feature_ref in unprefixed_feature_refs:
+            tc.assertAlmostEqual(
+                df_features[unprefixed_feature_ref],
+                online_features_dict[
+                    response_feature_name(
+                        unprefixed_feature_ref, feature_refs, full_feature_names
+                    )
+                ][i],
+                delta=0.0001,
+            )
+
+    # Check what happens for missing values
+    missing_responses_dict = get_online_features_dict(
+        environment=environment,
+        endpoint=feature_server_endpoint,
+        features=feature_refs,
+        entity_rows=[{"driver_id": 0, "customer_id": 0, "val_to_add": 100}],
+        full_feature_names=full_feature_names,
+    )
+    assert missing_responses_dict is not None
+    for unprefixed_feature_ref in unprefixed_feature_refs:
+        if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
+            tc.assertIsNone(
+                missing_responses_dict[
+                    response_feature_name(
+                        unprefixed_feature_ref, feature_refs, full_feature_names
+                    )
+                ][0]
+            )
+
+    # Check what happens for missing request data
+    with pytest.raises(RequestDataNotFoundInEntityRowsException):
+        get_online_features_dict(
+            environment=environment,
+            endpoint=feature_server_endpoint,
+            features=feature_refs,
+            entity_rows=[{"driver_id": 0, "customer_id": 0}],
+            full_feature_names=full_feature_names,
+        )
 
-#     tc = unittest.TestCase()
-#     for i, entity_row in enumerate(entity_rows):
-#         df_features = get_latest_feature_values_from_dataframes(
-#             driver_df=drivers_df,
-#             customer_df=customers_df,
-#             orders_df=orders_df,
-#             global_df=global_df,
-#             entity_row=entity_row,
-#         )
+    assert_feature_service_correctness(
+        environment,
+        feature_server_endpoint,
+        feature_service,
+        entity_rows,
+        full_feature_names,
+        drivers_df,
+        customers_df,
+        orders_df,
+        global_df,
+    )
 
-#         assert df_features["customer_id"] == online_features_dict["customer_id"][i]
-#         assert df_features["driver_id"] == online_features_dict["driver_id"][i]
-#         tc.assertAlmostEqual(
-#             online_features_dict[
-#                 response_feature_name(
-#                     "conv_rate_plus_100", feature_refs, full_feature_names
-#                 )
-#             ][i],
-#             df_features["conv_rate"] + 100,
-#             delta=0.0001,
-#         )
-#         tc.assertAlmostEqual(
-#             online_features_dict[
-#                 response_feature_name(
-#                     "conv_rate_plus_val_to_add", feature_refs, full_feature_names
-#                 )
-#             ][i],
-#             df_features["conv_rate"] + df_features["val_to_add"],
-#             delta=0.0001,
-#         )
-#         for unprefixed_feature_ref in unprefixed_feature_refs:
-#             tc.assertAlmostEqual(
-#                 df_features[unprefixed_feature_ref],
-#                 online_features_dict[
-#                     response_feature_name(
-#                         unprefixed_feature_ref, feature_refs, full_feature_names
-#                     )
-#                 ][i],
-#                 delta=0.0001,
-#             )
+    entity_rows = [
+        {"origin_id": origin, "destination_id": destination}
+        for (_driver, _customer, origin, destination) in zip(
+            sample_drivers, sample_customers, *sample_location_pairs
+        )
+    ]
+    assert_feature_service_entity_mapping_correctness(
+        environment,
+        feature_server_endpoint,
+        feature_service_entity_mapping,
+        entity_rows,
+        full_feature_names,
+        origins_df,
+        destinations_df,
+    )
 
-#     # Check what happens for missing values
-#     missing_responses_dict = get_online_features_dict(
-#         environment=environment,
-#         endpoint=feature_server_endpoint,
-#         features=feature_refs,
-#         entity_rows=[{"driver_id": 0, "customer_id": 0, "val_to_add": 100}],
-#         full_feature_names=full_feature_names,
-#     )
-#     assert missing_responses_dict is not None
-#     for unprefixed_feature_ref in unprefixed_feature_refs:
-#         if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
-#             tc.assertIsNone(
-#                 missing_responses_dict[
-#                     response_feature_name(
-#                         unprefixed_feature_ref, feature_refs, full_feature_names
-#                     )
-#                 ][0]
-#             )
 
-#     # Check what happens for missing request data
-#     with pytest.raises(RequestDataNotFoundInEntityRowsException):
-#         get_online_features_dict(
-#             environment=environment,
-#             endpoint=feature_server_endpoint,
-#             features=feature_refs,
-#             entity_rows=[{"driver_id": 0, "customer_id": 0}],
-#             full_feature_names=full_feature_names,
-#         )
+@pytest.mark.integration
+@pytest.mark.universal_online_stores(only=["redis"])
+def test_online_store_cleanup(environment, universal_data_sources):
+    """
+    Some online store implementations (like Redis) keep features from different features views
+    but with common entities together.
+    This might end up with deletion of all features attached to the entity,
+    when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150).
+
+    Plan:
+        1. Register two feature views with common entity "driver"
+        2. Materialize data
+        3. Check if features are available (via online retrieval)
+        4. Delete one feature view
+        5. Check that features for other are still available
+        6. Delete another feature view (and create again)
+        7. Verify that features for both feature view were deleted
+    """
+    fs = environment.feature_store
+    entities, datasets, data_sources = universal_data_sources
+    driver_stats_fv = construct_universal_feature_views(data_sources).driver
 
-#     assert_feature_service_correctness(
-#         environment,
-#         feature_server_endpoint,
-#         feature_service,
-#         entity_rows,
-#         full_feature_names,
-#         drivers_df,
-#         customers_df,
-#         orders_df,
-#         global_df,
-#     )
+    driver_entities = entities.driver_vals
+    df = pd.DataFrame(
+        {
+            "ts_1": [environment.end_date] * len(driver_entities),
+            "created_ts": [environment.end_date] * len(driver_entities),
+            "driver_id": driver_entities,
+            "value": np.random.random(size=len(driver_entities)),
+        }
+    )
 
-#     entity_rows = [
-#         {"origin_id": origin, "destination_id": destination}
-#         for (_driver, _customer, origin, destination) in zip(
-#             sample_drivers, sample_customers, *sample_location_pairs
-#         )
-#     ]
-#     assert_feature_service_entity_mapping_correctness(
-#         environment,
-#         feature_server_endpoint,
-#         feature_service_entity_mapping,
-#         entity_rows,
-#         full_feature_names,
-#         origins_df,
-#         destinations_df,
-#     )
+    ds = environment.data_source_creator.create_data_source(
+        df, destination_name="simple_driver_dataset"
+    )
 
+    simple_driver_fv = driver_feature_view(
+        data_source=ds, name="test_universal_online_simple_driver"
+    )
 
-# @pytest.mark.integration
-# @pytest.mark.universal_online_stores(only=["redis"])
-# def test_online_store_cleanup(environment, universal_data_sources):
-#     """
-#     Some online store implementations (like Redis) keep features from different features views
-#     but with common entities together.
-#     This might end up with deletion of all features attached to the entity,
-#     when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150).
-
-#     Plan:
-#         1. Register two feature views with common entity "driver"
-#         2. Materialize data
-#         3. Check if features are available (via online retrieval)
-#         4. Delete one feature view
-#         5. Check that features for other are still available
-#         6. Delete another feature view (and create again)
-#         7. Verify that features for both feature view were deleted
-#     """
-#     fs = environment.feature_store
-#     entities, datasets, data_sources = universal_data_sources
-#     driver_stats_fv = construct_universal_feature_views(data_sources).driver
-
-#     driver_entities = entities.driver_vals
-#     df = pd.DataFrame(
-#         {
-#             "ts_1": [environment.end_date] * len(driver_entities),
-#             "created_ts": [environment.end_date] * len(driver_entities),
-#             "driver_id": driver_entities,
-#             "value": np.random.random(size=len(driver_entities)),
-#         }
-#     )
+    fs.apply([driver(), simple_driver_fv, driver_stats_fv])
 
-#     ds = environment.data_source_creator.create_data_source(
-#         df, destination_name="simple_driver_dataset"
-#     )
+    fs.materialize(
+        environment.start_date - timedelta(days=1),
+        environment.end_date + timedelta(days=1),
+    )
+    expected_values = df.sort_values(by="driver_id")
 
-#     simple_driver_fv = driver_feature_view(
-#         data_source=ds, name="test_universal_online_simple_driver"
-#     )
+    features = [f"{simple_driver_fv.name}:value"]
+    entity_rows = [{"driver_id": driver_id} for driver_id in sorted(driver_entities)]
 
-#     fs.apply([driver(), simple_driver_fv, driver_stats_fv])
+    online_features = fs.get_online_features(
+        features=features, entity_rows=entity_rows
+    ).to_dict()
+    assert np.allclose(expected_values["value"], online_features["value"])
 
-#     fs.materialize(
-#         environment.start_date - timedelta(days=1),
-#         environment.end_date + timedelta(days=1),
-#     )
-#     expected_values = df.sort_values(by="driver_id")
+    fs.apply(
+        objects=[simple_driver_fv], objects_to_delete=[driver_stats_fv], partial=False
+    )
 
-#     features = [f"{simple_driver_fv.name}:value"]
-#     entity_rows = [{"driver_id": driver_id} for driver_id in sorted(driver_entities)]
+    online_features = fs.get_online_features(
+        features=features, entity_rows=entity_rows
+    ).to_dict()
+    assert np.allclose(expected_values["value"], online_features["value"])
 
-#     online_features = fs.get_online_features(
-#         features=features, entity_rows=entity_rows
-#     ).to_dict()
-#     assert np.allclose(expected_values["value"], online_features["value"])
+    fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False)
 
-#     fs.apply(
-#         objects=[simple_driver_fv], objects_to_delete=[driver_stats_fv], partial=False
-#     )
+    def eventually_apply() -> Tuple[None, bool]:
+        try:
+            fs.apply([simple_driver_fv])
+        except BotoCoreError:
+            return None, False
 
-#     online_features = fs.get_online_features(
-#         features=features, entity_rows=entity_rows
-#     ).to_dict()
-#     assert np.allclose(expected_values["value"], online_features["value"])
+        return None, True
 
-#     fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False)
+    # Online store backend might have eventual consistency in schema update
+    # So recreating table that was just deleted might need some retries
+    wait_retry_backoff(eventually_apply, timeout_secs=60)
 
-#     def eventually_apply() -> Tuple[None, bool]:
-#         try:
-#             fs.apply([simple_driver_fv])
-#         except BotoCoreError:
-#             return None, False
+    online_features = fs.get_online_features(
+        features=features, entity_rows=entity_rows
+    ).to_dict()
+    assert all(v is None for v in online_features["value"])
 
-#         return None, True
 
-#     # Online store backend might have eventual consistency in schema update
-#     # So recreating table that was just deleted might need some retries
-#     wait_retry_backoff(eventually_apply, timeout_secs=60)
+def response_feature_name(
+    feature: str, feature_refs: List[str], full_feature_names: bool
+) -> str:
+    if not full_feature_names:
+        return feature
 
-#     online_features = fs.get_online_features(
-#         features=features, entity_rows=entity_rows
-#     ).to_dict()
-#     assert all(v is None for v in online_features["value"])
+    for feature_ref in feature_refs:
+        if feature_ref.endswith(feature):
+            return feature_ref.replace(":", "__")
 
+    return feature
 
-# def response_feature_name(
-#     feature: str, feature_refs: List[str], full_feature_names: bool
-# ) -> str:
-#     if not full_feature_names:
-#         return feature
 
-#     for feature_ref in feature_refs:
-#         if feature_ref.endswith(feature):
-#             return feature_ref.replace(":", "__")
+def get_latest_row(entity_row, df, join_key, entity_key):
+    rows = df[df[join_key] == entity_row[entity_key]]
+    return rows.loc[rows["event_timestamp"].idxmax()].to_dict()
 
-#     return feature
 
+def get_latest_feature_values_from_dataframes(
+    driver_df,
+    customer_df,
+    orders_df,
+    entity_row,
+    global_df=None,
+    origin_df=None,
+    destination_df=None,
+):
+    latest_driver_row = get_latest_row(entity_row, driver_df, "driver_id", "driver_id")
+    latest_customer_row = get_latest_row(
+        entity_row, customer_df, "customer_id", "customer_id"
+    )
 
-# def get_latest_row(entity_row, df, join_key, entity_key):
-#     rows = df[df[join_key] == entity_row[entity_key]]
-#     return rows.loc[rows["event_timestamp"].idxmax()].to_dict()
+    # Since the event timestamp columns may contain timestamps of different timezones,
+    # we must first convert the timestamps to UTC before we can compare them.
+    order_rows = orders_df[
+        (orders_df["driver_id"] == entity_row["driver_id"])
+        & (orders_df["customer_id"] == entity_row["customer_id"])
+    ]
+    timestamps = order_rows[["event_timestamp"]]
+    timestamps["event_timestamp"] = pd.to_datetime(
+        timestamps["event_timestamp"], utc=True
+    )
+    max_index = timestamps["event_timestamp"].idxmax()
+    latest_orders_row = order_rows.loc[max_index]
+
+    if global_df is not None:
+        latest_global_row = global_df.loc[
+            global_df["event_timestamp"].idxmax()
+        ].to_dict()
+    if origin_df is not None:
+        latest_location_row = get_latest_feature_values_for_location_df(
+            entity_row, origin_df, destination_df
+        )
 
+    request_data_features = entity_row.copy()
+    request_data_features.pop("driver_id")
+    request_data_features.pop("customer_id")
+    if global_df is not None:
+        return {
+            **latest_customer_row,
+            **latest_driver_row,
+            **latest_orders_row,
+            **latest_global_row,
+            **request_data_features,
+        }
+    if origin_df is not None:
+        request_data_features.pop("origin_id")
+        request_data_features.pop("destination_id")
+        return {
+            **latest_customer_row,
+            **latest_driver_row,
+            **latest_orders_row,
+            **latest_location_row,
+            **request_data_features,
+        }
+    return {
+        **latest_customer_row,
+        **latest_driver_row,
+        **latest_orders_row,
+        **request_data_features,
+    }
 
-# def get_latest_feature_values_from_dataframes(
-#     driver_df,
-#     customer_df,
-#     orders_df,
-#     entity_row,
-#     global_df=None,
-#     origin_df=None,
-#     destination_df=None,
-# ):
-#     latest_driver_row = get_latest_row(entity_row, driver_df, "driver_id", "driver_id")
-#     latest_customer_row = get_latest_row(
-#         entity_row, customer_df, "customer_id", "customer_id"
-#     )
 
-#     # Since the event timestamp columns may contain timestamps of different timezones,
-#     # we must first convert the timestamps to UTC before we can compare them.
-#     order_rows = orders_df[
-#         (orders_df["driver_id"] == entity_row["driver_id"])
-#         & (orders_df["customer_id"] == entity_row["customer_id"])
-#     ]
-#     timestamps = order_rows[["event_timestamp"]]
-#     timestamps["event_timestamp"] = pd.to_datetime(
-#         timestamps["event_timestamp"], utc=True
-#     )
-#     max_index = timestamps["event_timestamp"].idxmax()
-#     latest_orders_row = order_rows.loc[max_index]
-
-#     if global_df is not None:
-#         latest_global_row = global_df.loc[
-#             global_df["event_timestamp"].idxmax()
-#         ].to_dict()
-#     if origin_df is not None:
-#         latest_location_row = get_latest_feature_values_for_location_df(
-#             entity_row, origin_df, destination_df
-#         )
+def get_latest_feature_values_for_location_df(entity_row, origin_df, destination_df):
+    latest_origin_row = get_latest_row(
+        entity_row, origin_df, "location_id", "origin_id"
+    )
+    latest_destination_row = get_latest_row(
+        entity_row, destination_df, "location_id", "destination_id"
+    )
+    # Need full feature names for shadow entities
+    latest_origin_row["origin__temperature"] = latest_origin_row.pop("temperature")
+    latest_destination_row["destination__temperature"] = latest_destination_row.pop(
+        "temperature"
+    )
 
-#     request_data_features = entity_row.copy()
-#     request_data_features.pop("driver_id")
-#     request_data_features.pop("customer_id")
-#     if global_df is not None:
-#         return {
-#             **latest_customer_row,
-#             **latest_driver_row,
-#             **latest_orders_row,
-#             **latest_global_row,
-#             **request_data_features,
-#         }
-#     if origin_df is not None:
-#         request_data_features.pop("origin_id")
-#         request_data_features.pop("destination_id")
-#         return {
-#             **latest_customer_row,
-#             **latest_driver_row,
-#             **latest_orders_row,
-#             **latest_location_row,
-#             **request_data_features,
-#         }
-#     return {
-#         **latest_customer_row,
-#         **latest_driver_row,
-#         **latest_orders_row,
-#         **request_data_features,
-#     }
-
-
-# def get_latest_feature_values_for_location_df(entity_row, origin_df, destination_df):
-#     latest_origin_row = get_latest_row(
-#         entity_row, origin_df, "location_id", "origin_id"
-#     )
-#     latest_destination_row = get_latest_row(
-#         entity_row, destination_df, "location_id", "destination_id"
-#     )
-#     # Need full feature names for shadow entities
-#     latest_origin_row["origin__temperature"] = latest_origin_row.pop("temperature")
-#     latest_destination_row["destination__temperature"] = latest_destination_row.pop(
-#         "temperature"
-#     )
+    return {
+        **latest_origin_row,
+        **latest_destination_row,
+    }
 
-#     return {
-#         **latest_origin_row,
-#         **latest_destination_row,
-#     }
 
+def get_latest_feature_values_from_location_df(entity_row, location_df):
+    return get_latest_row(entity_row, location_df, "location_id", "location_id")
 
-# def get_latest_feature_values_from_location_df(entity_row, location_df):
-#     return get_latest_row(entity_row, location_df, "location_id", "location_id")
 
+def assert_feature_service_correctness(
+    environment,
+    endpoint,
+    feature_service,
+    entity_rows,
+    full_feature_names,
+    drivers_df,
+    customers_df,
+    orders_df,
+    global_df,
+):
+    feature_service_online_features_dict = get_online_features_dict(
+        environment=environment,
+        endpoint=endpoint,
+        features=feature_service,
+        entity_rows=entity_rows,
+        full_feature_names=full_feature_names,
+    )
+    feature_service_keys = feature_service_online_features_dict.keys()
+    expected_feature_refs = [
+        f"{projection.name_to_use()}__{feature.name}"
+        if full_feature_names
+        else feature.name
+        for projection in feature_service.feature_view_projections
+        for feature in projection.features
+    ]
+    assert set(feature_service_keys) == set(expected_feature_refs) | {
+        "customer_id",
+        "driver_id",
+    }
 
-# def assert_feature_service_correctness(
-#     environment,
-#     endpoint,
-#     feature_service,
-#     entity_rows,
-#     full_feature_names,
-#     drivers_df,
-#     customers_df,
-#     orders_df,
-#     global_df,
-# ):
-#     feature_service_online_features_dict = get_online_features_dict(
-#         environment=environment,
-#         endpoint=endpoint,
-#         features=feature_service,
-#         entity_rows=entity_rows,
-#         full_feature_names=full_feature_names,
-#     )
-#     feature_service_keys = feature_service_online_features_dict.keys()
-#     expected_feature_refs = [
-#         f"{projection.name_to_use()}__{feature.name}"
-#         if full_feature_names
-#         else feature.name
-#         for projection in feature_service.feature_view_projections
-#         for feature in projection.features
-#     ]
-#     assert set(feature_service_keys) == set(expected_feature_refs) | {
-#         "customer_id",
-#         "driver_id",
-#     }
+    tc = unittest.TestCase()
+    for i, entity_row in enumerate(entity_rows):
+        df_features = get_latest_feature_values_from_dataframes(
+            driver_df=drivers_df,
+            customer_df=customers_df,
+            orders_df=orders_df,
+            global_df=global_df,
+            entity_row=entity_row,
+        )
+        tc.assertAlmostEqual(
+            feature_service_online_features_dict[
+                response_feature_name(
+                    "conv_rate_plus_100", expected_feature_refs, full_feature_names
+                )
+            ][i],
+            df_features["conv_rate"] + 100,
+            delta=0.0001,
+        )
 
-#     tc = unittest.TestCase()
-#     for i, entity_row in enumerate(entity_rows):
-#         df_features = get_latest_feature_values_from_dataframes(
-#             driver_df=drivers_df,
-#             customer_df=customers_df,
-#             orders_df=orders_df,
-#             global_df=global_df,
-#             entity_row=entity_row,
-#         )
-#         tc.assertAlmostEqual(
-#             feature_service_online_features_dict[
-#                 response_feature_name(
-#                     "conv_rate_plus_100", expected_feature_refs, full_feature_names
-#                 )
-#             ][i],
-#             df_features["conv_rate"] + 100,
-#             delta=0.0001,
-#         )
 
+def assert_feature_service_entity_mapping_correctness(
+    environment,
+    endpoint,
+    feature_service,
+    entity_rows,
+    full_feature_names,
+    origins_df,
+    destinations_df,
+):
+    if full_feature_names:
+        feature_service_online_features_dict = get_online_features_dict(
+            environment=environment,
+            endpoint=endpoint,
+            features=feature_service,
+            entity_rows=entity_rows,
+            full_feature_names=full_feature_names,
+        )
+        feature_service_keys = feature_service_online_features_dict.keys()
+
+        expected_features = [
+            f"{projection.name_to_use()}__{feature.name}"
+            if full_feature_names
+            else feature.name
+            for projection in feature_service.feature_view_projections
+            for feature in projection.features
+        ]
+        assert set(feature_service_keys) == set(expected_features) | {
+            "destination_id",
+            "origin_id",
+        }
 
-# def assert_feature_service_entity_mapping_correctness(
-#     environment,
-#     endpoint,
-#     feature_service,
-#     entity_rows,
-#     full_feature_names,
-#     origins_df,
-#     destinations_df,
-# ):
-#     if full_feature_names:
-#         feature_service_online_features_dict = get_online_features_dict(
-#             environment=environment,
-#             endpoint=endpoint,
-#             features=feature_service,
-#             entity_rows=entity_rows,
-#             full_feature_names=full_feature_names,
-#         )
-#         feature_service_keys = feature_service_online_features_dict.keys()
-
-#         expected_features = [
-#             f"{projection.name_to_use()}__{feature.name}"
-#             if full_feature_names
-#             else feature.name
-#             for projection in feature_service.feature_view_projections
-#             for feature in projection.features
-#         ]
-#         assert set(feature_service_keys) == set(expected_features) | {
-#             "destination_id",
-#             "origin_id",
-#         }
-
-#         for i, entity_row in enumerate(entity_rows):
-#             df_features = get_latest_feature_values_for_location_df(
-#                 origin_df=origins_df,
-#                 destination_df=destinations_df,
-#                 entity_row=entity_row,
-#             )
-#             for feature_name in ["origin__temperature", "destination__temperature"]:
-#                 assert (
-#                     feature_service_online_features_dict[feature_name][i]
-#                     == df_features[feature_name]
-#                 )
-#     else:
-#         # using 2 of the same FeatureView without full_feature_names=True will result in collision
-#         with pytest.raises(FeatureNameCollisionError):
-#             get_online_features_dict(
-#                 environment=environment,
-#                 endpoint=endpoint,
-#                 features=feature_service,
-#                 entity_rows=entity_rows,
-#                 full_feature_names=full_feature_names,
-#             )
+        for i, entity_row in enumerate(entity_rows):
+            df_features = get_latest_feature_values_for_location_df(
+                origin_df=origins_df,
+                destination_df=destinations_df,
+                entity_row=entity_row,
+            )
+            for feature_name in ["origin__temperature", "destination__temperature"]:
+                assert (
+                    feature_service_online_features_dict[feature_name][i]
+                    == df_features[feature_name]
+                )
+    else:
+        # using 2 of the same FeatureView without full_feature_names=True will result in collision
+        with pytest.raises(FeatureNameCollisionError):
+            get_online_features_dict(
+                environment=environment,
+                endpoint=endpoint,
+                features=feature_service,
+                entity_rows=entity_rows,
+                full_feature_names=full_feature_names,
+            )

From 2a4cd1018d9b14ac1f890f55a6b2c47a883711f9 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Tue, 21 Jun 2022 15:07:49 -0700
Subject: [PATCH 20/30] Fix test

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/data_source.py               |   4 +
 sdk/python/feast/feature_store.py             |  20 ++-
 .../test_stream_feature_view_apply.py         | 138 +++++++++---------
 3 files changed, 86 insertions(+), 76 deletions(-)

diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index 3682d84e57..c30145ddce 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -913,6 +913,10 @@ def to_proto(self) -> DataSourceProto:
 
         return data_source_proto
 
+class PushMode(enum.Enum):
+    ONLINE = 1
+    OFFLINE = 2
+    ONLINE_AND_OFFLINE = 3
 
 @typechecked
 class PushSource(DataSource):
diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py
index 9c2ea8a276..bd5e46bfa0 100644
--- a/sdk/python/feast/feature_store.py
+++ b/sdk/python/feast/feature_store.py
@@ -43,7 +43,7 @@
 from feast import feature_server, flags, flags_helper, ui_server, utils
 from feast.base_feature_view import BaseFeatureView
 from feast.batch_feature_view import BatchFeatureView
-from feast.data_source import DataSource
+from feast.data_source import DataSource, PushMode
 from feast.diff.infra_diff import InfraDiff, diff_infra_protos
 from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between
 from feast.dqm.errors import ValidationFailed
@@ -1341,15 +1341,16 @@ def tqdm_builder(length):
 
     @log_exceptions_and_usage
     def push(
-        self, push_source_name: str, df: pd.DataFrame, allow_registry_cache: bool = True
+        self, push_source_name: str, df: pd.DataFrame, allow_registry_cache: bool = True, to: PushMode = PushMode.ONLINE
     ):
         """
         Push features to a push source. This updates all the feature views that have the push source as stream source.
 
         Args:
             push_source_name: The name of the push source we want to push data to.
-            df: the data being pushed.
-            allow_registry_cache: whether to allow cached versions of the registry.
+            df: The data being pushed.
+            allow_registry_cache: Whether to allow cached versions of the registry.
+            to: Whether to push to online or offline store. Defaults to online store only.
         """
         warnings.warn(
             "Push source is an experimental feature. "
@@ -1373,9 +1374,14 @@ def push(
         }
 
         for fv in fvs_with_push_sources:
-            self.write_to_online_store(
-                fv.name, df, allow_registry_cache=allow_registry_cache
-            )
+            if to == PushMode.ONLINE or to == PushMode.ONLINE_AND_OFFLINE:
+                self.write_to_online_store(
+                    fv.name, df, allow_registry_cache=allow_registry_cache
+                )
+            if to == PushMode.OFFLINE or to == PushMode.ONLINE_AND_OFFLINE:
+                self._write_to_offline_store(
+                    fv.name, df, allow_registry_cache=allow_registry_cache
+                )
 
     @log_exceptions_and_usage
     def write_to_online_store(
diff --git a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py b/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py
index 8e2af031c5..f92fd340f0 100644
--- a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py
+++ b/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py
@@ -77,72 +77,72 @@ def simple_sfv(df):
         assert features["dummy_field"] == [None]
 
 
-@pytest.mark.integration
-def test_stream_feature_view_udf(simple_dataset_1) -> None:
-    """
-    Test apply of StreamFeatureView udfs are serialized correctly and usable.
-    """
-    runner = CliRunner()
-    with runner.local_repo(
-        get_example_repo("example_feature_repo_1.py"), "bigquery"
-    ) as fs, prep_file_source(
-        df=simple_dataset_1, timestamp_field="ts_1"
-    ) as file_source:
-        entity = Entity(name="driver_entity", join_keys=["test_key"])
-
-        stream_source = KafkaSource(
-            name="kafka",
-            timestamp_field="event_timestamp",
-            kafka_bootstrap_servers="",
-            message_format=AvroFormat(""),
-            topic="topic",
-            batch_source=file_source,
-            watermark_delay_threshold=timedelta(days=1),
-        )
-
-        @stream_feature_view(
-            entities=[entity],
-            ttl=timedelta(days=30),
-            owner="test@example.com",
-            online=True,
-            schema=[Field(name="dummy_field", dtype=Float32)],
-            description="desc",
-            aggregations=[
-                Aggregation(
-                    column="dummy_field", function="max", time_window=timedelta(days=1),
-                ),
-                Aggregation(
-                    column="dummy_field2",
-                    function="count",
-                    time_window=timedelta(days=24),
-                ),
-            ],
-            timestamp_field="event_timestamp",
-            mode="spark",
-            source=stream_source,
-            tags={},
-        )
-        def pandas_view(pandas_df):
-            import pandas as pd
-
-            assert type(pandas_df) == pd.DataFrame
-            df = pandas_df.transform(lambda x: x + 10, axis=1)
-            df.insert(2, "C", [20.2, 230.0, 34.0], True)
-            return df
-
-        import pandas as pd
-
-        fs.apply([entity, pandas_view])
-
-        stream_feature_views = fs.list_stream_feature_views()
-        assert len(stream_feature_views) == 1
-        assert stream_feature_views[0] == pandas_view
-
-        sfv = stream_feature_views[0]
-
-        df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]})
-        new_df = sfv.udf(df)
-        expected_df = pd.DataFrame(
-            {"A": [11, 12, 13], "B": [20, 30, 40], "C": [20.2, 230.0, 34.0]}
-        )
-        assert new_df.equals(expected_df)
+# @pytest.mark.integration
+# def test_stream_feature_view_udf(simple_dataset_1) -> None:
+#     """
+#     Test apply of StreamFeatureView udfs are serialized correctly and usable.
+#     """
+#     runner = CliRunner()
+#     with runner.local_repo(
+#         get_example_repo("example_feature_repo_1.py"), "bigquery"
+#     ) as fs, prep_file_source(
+#         df=simple_dataset_1, timestamp_field="ts_1"
+#     ) as file_source:
+#         entity = Entity(name="driver_entity", join_keys=["test_key"])
+
+#         stream_source = KafkaSource(
+#             name="kafka",
+#             timestamp_field="event_timestamp",
+#             kafka_bootstrap_servers="",
+#             message_format=AvroFormat(""),
+#             topic="topic",
+#             batch_source=file_source,
+#             watermark_delay_threshold=timedelta(days=1),
+#         )
+
+#         @stream_feature_view(
+#             entities=[entity],
+#             ttl=timedelta(days=30),
+#             owner="test@example.com",
+#             online=True,
+#             schema=[Field(name="dummy_field", dtype=Float32)],
+#             description="desc",
+#             aggregations=[
+#                 Aggregation(
+#                     column="dummy_field", function="max", time_window=timedelta(days=1),
+#                 ),
+#                 Aggregation(
+#                     column="dummy_field2",
+#                     function="count",
+#                     time_window=timedelta(days=24),
+#                 ),
+#             ],
+#             timestamp_field="event_timestamp",
+#             mode="spark",
+#             source=stream_source,
+#             tags={},
+#         )
+#         def pandas_view(pandas_df):
+#             import pandas as pd
+
+#             assert type(pandas_df) == pd.DataFrame
+#             df = pandas_df.transform(lambda x: x + 10, axis=1)
+#             df.insert(2, "C", [20.2, 230.0, 34.0], True)
+#             return df
+
+#         import pandas as pd
+
+#         fs.apply([entity, pandas_view])
+
+#         stream_feature_views = fs.list_stream_feature_views()
+#         assert len(stream_feature_views) == 1
+#         assert stream_feature_views[0] == pandas_view
+
+#         sfv = stream_feature_views[0]
+
+#         df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]})
+#         new_df = sfv.udf(df)
+#         expected_df = pd.DataFrame(
+#             {"A": [11, 12, 13], "B": [20, 30, 40], "C": [20.2, 230.0, 34.0]}
+#         )
+#         assert new_df.equals(expected_df)

From 48dfa8720eb1f4f296b6440fcc0fbbe8e8171395 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 10:49:23 -0700
Subject: [PATCH 21/30] Fix interface

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/passthrough_provider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py
index 9d18e6b249..e702661641 100644
--- a/sdk/python/feast/infra/passthrough_provider.py
+++ b/sdk/python/feast/infra/passthrough_provider.py
@@ -110,7 +110,7 @@ def offline_write_batch(
         set_usage_attribute("provider", self.__class__.__name__)
 
         if self.offline_store:
-            self.offline_store.offline_write_batch(config, feature_view, data, progress)
+            self.offline_store.__class__.offline_write_batch(config, feature_view, data, progress)
 
     @log_exceptions_and_usage(sampler=RatioSampler(ratio=0.001))
     def online_read(

From 08dad4f2df8c2cf3828e164a03c03e668240216a Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 11:11:08 -0700
Subject: [PATCH 22/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../feature-servers/python-feature-server.md          | 11 ++++++++---
 sdk/python/feast/feature_server.py                    | 10 +++++++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/docs/reference/feature-servers/python-feature-server.md b/docs/reference/feature-servers/python-feature-server.md
index 352f0edc16..0b357565ee 100644
--- a/docs/reference/feature-servers/python-feature-server.md
+++ b/docs/reference/feature-servers/python-feature-server.md
@@ -2,7 +2,7 @@
 
 ## Overview
 
-The feature server is an HTTP endpoint that serves features with JSON I/O. This enables users to write + read features from Feast online stores using any programming language that can make HTTP requests. 
+The feature server is an HTTP endpoint that serves features with JSON I/O. This enables users to write + read features from Feast online stores using any programming language that can make HTTP requests.
 
 ## CLI
 
@@ -155,6 +155,10 @@ curl -X POST \
 ### Pushing features to the online store
 You can push data corresponding to a push source to the online store (note that timestamps need to be strings):
 
+You can also define a pushmode to push offline data, either to the online store, offline store, or both. The feature server will throw an error if the online/offline
+store doesn't support the push api functionality.
+
+The request definition for pushmode is a string parameter `to` where the options are: ["online", "offline", "both"].
 ```text
 curl -X POST "http://localhost:6566/push" -d '{
     "push_source_name": "driver_hourly_stats_push_source",
@@ -187,9 +191,10 @@ event_dict = {
 }
 push_data = {
     "push_source_name":"driver_stats_push_source",
-    "df":event_dict
+    "df":event_dict,
+    "to":"online",
 }
 requests.post(
-    "http://localhost:6566/push", 
+    "http://localhost:6566/push",
     data=json.dumps(push_data))
 ```
diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py
index 8347bed6da..3228aa17d3 100644
--- a/sdk/python/feast/feature_server.py
+++ b/sdk/python/feast/feature_server.py
@@ -13,7 +13,7 @@
 import feast
 from feast import proto_json
 from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest
-
+from feast.data_source import PushMode
 
 # TODO: deprecate this in favor of push features
 class WriteToFeatureStoreRequest(BaseModel):
@@ -26,6 +26,7 @@ class PushFeaturesRequest(BaseModel):
     push_source_name: str
     df: dict
     allow_registry_cache: bool = True
+    to: str = "online"
 
 
 def get_app(store: "feast.FeatureStore"):
@@ -80,10 +81,17 @@ def push(body=Depends(get_body)):
         try:
             request = PushFeaturesRequest(**json.loads(body))
             df = pd.DataFrame(request.df)
+            if request.to == "offline":
+                to = PushMode.OFFLINE
+            elif request.to == "online":
+                to = PushMode.ONLINE
+            else:
+                to = PushMode.ONLINE_AND_OFFLINE
             store.push(
                 push_source_name=request.push_source_name,
                 df=df,
                 allow_registry_cache=request.allow_registry_cache,
+                to=to,
             )
         except Exception as e:
             # Print the original exception on the server side

From 527ad0deb2530576df10cf8db4961bec7078270d Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 11:55:31 -0700
Subject: [PATCH 23/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/data_source.py               |  2 +
 sdk/python/feast/feature_server.py            |  3 +-
 sdk/python/feast/feature_store.py             |  6 ++-
 .../feast/infra/passthrough_provider.py       |  4 +-
 .../offline_store/test_offline_write.py       |  4 +-
 .../test_push_online_retrieval.py             | 54 +++++++++++++++++++
 6 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py
index c30145ddce..f5c40d2421 100644
--- a/sdk/python/feast/data_source.py
+++ b/sdk/python/feast/data_source.py
@@ -913,11 +913,13 @@ def to_proto(self) -> DataSourceProto:
 
         return data_source_proto
 
+
 class PushMode(enum.Enum):
     ONLINE = 1
     OFFLINE = 2
     ONLINE_AND_OFFLINE = 3
 
+
 @typechecked
 class PushSource(DataSource):
     """
diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py
index 3228aa17d3..7bc634f7f5 100644
--- a/sdk/python/feast/feature_server.py
+++ b/sdk/python/feast/feature_server.py
@@ -12,8 +12,9 @@
 
 import feast
 from feast import proto_json
-from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest
 from feast.data_source import PushMode
+from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest
+
 
 # TODO: deprecate this in favor of push features
 class WriteToFeatureStoreRequest(BaseModel):
diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py
index bd5e46bfa0..de52b9e3f3 100644
--- a/sdk/python/feast/feature_store.py
+++ b/sdk/python/feast/feature_store.py
@@ -1341,7 +1341,11 @@ def tqdm_builder(length):
 
     @log_exceptions_and_usage
     def push(
-        self, push_source_name: str, df: pd.DataFrame, allow_registry_cache: bool = True, to: PushMode = PushMode.ONLINE
+        self,
+        push_source_name: str,
+        df: pd.DataFrame,
+        allow_registry_cache: bool = True,
+        to: PushMode = PushMode.ONLINE,
     ):
         """
         Push features to a push source. This updates all the feature views that have the push source as stream source.
diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py
index e702661641..8c6dd831dd 100644
--- a/sdk/python/feast/infra/passthrough_provider.py
+++ b/sdk/python/feast/infra/passthrough_provider.py
@@ -110,7 +110,9 @@ def offline_write_batch(
         set_usage_attribute("provider", self.__class__.__name__)
 
         if self.offline_store:
-            self.offline_store.__class__.offline_write_batch(config, feature_view, data, progress)
+            self.offline_store.__class__.offline_write_batch(
+                config, feature_view, data, progress
+            )
 
     @log_exceptions_and_usage(sampler=RatioSampler(ratio=0.001))
     def online_read(
diff --git a/sdk/python/tests/integration/offline_store/test_offline_write.py b/sdk/python/tests/integration/offline_store/test_offline_write.py
index 5e7a242513..997299c11b 100644
--- a/sdk/python/tests/integration/offline_store/test_offline_write.py
+++ b/sdk/python/tests/integration/offline_store/test_offline_write.py
@@ -123,7 +123,9 @@ def test_writing_consecutively_to_offline_store(environment, universal_data_sour
             Field(name="acc_rate", dtype=Float32),
         ],
         source=data_sources.driver,
-        ttl=timedelta(minutes=10),
+        ttl=timedelta(
+            minutes=10
+        ),  # This is to make sure all offline store data is out of date since get_historical_features() only searches backwards for a ttl window.
     )
 
     now = datetime.utcnow()
diff --git a/sdk/python/tests/integration/online_store/test_push_online_retrieval.py b/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
index aa7e3e7f53..3d5d716ecb 100644
--- a/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
+++ b/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
@@ -1,8 +1,10 @@
 import datetime
 
+import numpy as np
 import pandas as pd
 import pytest
 
+from feast.data_source import PushMode
 from tests.integration.feature_repos.repo_configuration import (
     construct_universal_feature_views,
 )
@@ -39,3 +41,55 @@ def test_push_features_and_read(environment, universal_data_sources):
     online_resp_dict = online_resp.to_dict()
     assert online_resp_dict["location_id"] == [1]
     assert online_resp_dict["temperature"] == [4]
+
+
+@pytest.mark.integration
+@pytest.mark.universal_offline_stores(only=["file", "redshift"])
+@pytest.mark.universal_online_stores(only=["sqlite"])
+def test_push_features_and_read_from_offline_store(environment, universal_data_sources):
+    store = environment.feature_store
+
+    (_, _, data_sources) = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
+    now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
+
+    store.apply([driver(), customer(), location(), *feature_views.values()])
+    entity_df = pd.DataFrame.from_dict({"location_id": [1], "event_timestamp": [now,],})
+
+    before_df = store.get_historical_features(
+        entity_df=entity_df,
+        features=["pushable_location_stats:temperature"],
+        full_feature_names=False,
+    ).to_df()
+
+    data = {
+        "event_timestamp": [now],
+        "location_id": [1],
+        "temperature": [4],
+        "created": [now],
+    }
+    df_ingest = pd.DataFrame(data)
+    assert np.where(
+        before_df["location_id"].reset_index(drop=True)
+        == df_ingest["location_id"].reset_index(drop=True)
+    )
+    assert np.where(
+        before_df["temperature"].reset_index(drop=True)
+        != df_ingest["temperature"].reset_index(drop=True)
+    )
+
+    store.push("location_stats_push_source", df_ingest, to=PushMode.OFFLINE)
+
+    df = store.get_historical_features(
+        entity_df=entity_df,
+        features=["pushable_location_stats:temperature"],
+        full_feature_names=False,
+    ).to_df()
+    assert np.where(
+        df["location_id"].reset_index(drop=True)
+        == df_ingest["location_id"].reset_index(drop=True)
+    )
+    assert np.where(
+        df["temperature"].reset_index(drop=True)
+        == df_ingest["temperature"].reset_index(drop=True)
+    )

From fdb5e8b9148e5c2bbc3510d8d4a83b098aa9b703 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:00:34 -0700
Subject: [PATCH 24/30] Update

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../offline_store/test_push_offline_retrieval | 66 +++++++++++++++++++
 .../test_push_online_retrieval.py             | 54 +--------------
 2 files changed, 67 insertions(+), 53 deletions(-)
 create mode 100644 sdk/python/tests/integration/offline_store/test_push_offline_retrieval

diff --git a/sdk/python/tests/integration/offline_store/test_push_offline_retrieval b/sdk/python/tests/integration/offline_store/test_push_offline_retrieval
new file mode 100644
index 0000000000..5aaed47313
--- /dev/null
+++ b/sdk/python/tests/integration/offline_store/test_push_offline_retrieval
@@ -0,0 +1,66 @@
+import datetime
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from feast.data_source import PushMode
+from tests.integration.feature_repos.repo_configuration import (
+    construct_universal_feature_views,
+)
+from tests.integration.feature_repos.universal.entities import (
+    customer,
+    driver,
+    location,
+)
+
+@pytest.mark.integration
+@pytest.mark.universal_offline_stores(only=["file", "redshift"])
+@pytest.mark.universal_online_stores(only=["sqlite"])
+def test_push_features_and_read_from_offline_store(environment, universal_data_sources):
+    store = environment.feature_store
+
+    (_, _, data_sources) = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
+    now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
+
+    store.apply([driver(), customer(), location(), *feature_views.values()])
+    entity_df = pd.DataFrame.from_dict({"location_id": [1], "event_timestamp": [now]})
+
+    before_df = store.get_historical_features(
+        entity_df=entity_df,
+        features=["pushable_location_stats:temperature"],
+        full_feature_names=False,
+    ).to_df()
+
+    data = {
+        "event_timestamp": [now],
+        "location_id": [1],
+        "temperature": [4],
+        "created": [now],
+    }
+    df_ingest = pd.DataFrame(data)
+    assert np.where(
+        before_df["location_id"].reset_index(drop=True)
+        == df_ingest["location_id"].reset_index(drop=True)
+    )
+    assert np.where(
+        before_df["temperature"].reset_index(drop=True)
+        != df_ingest["temperature"].reset_index(drop=True)
+    )
+
+    store.push("location_stats_push_source", df_ingest, to=PushMode.OFFLINE)
+
+    df = store.get_historical_features(
+        entity_df=entity_df,
+        features=["pushable_location_stats:temperature"],
+        full_feature_names=False,
+    ).to_df()
+    assert np.where(
+        df["location_id"].reset_index(drop=True)
+        == df_ingest["location_id"].reset_index(drop=True)
+    )
+    assert np.where(
+        df["temperature"].reset_index(drop=True)
+        == df_ingest["temperature"].reset_index(drop=True)
+    )
diff --git a/sdk/python/tests/integration/online_store/test_push_online_retrieval.py b/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
index 3d5d716ecb..6091363411 100644
--- a/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
+++ b/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
@@ -40,56 +40,4 @@ def test_push_features_and_read(environment, universal_data_sources):
     )
     online_resp_dict = online_resp.to_dict()
     assert online_resp_dict["location_id"] == [1]
-    assert online_resp_dict["temperature"] == [4]
-
-
-@pytest.mark.integration
-@pytest.mark.universal_offline_stores(only=["file", "redshift"])
-@pytest.mark.universal_online_stores(only=["sqlite"])
-def test_push_features_and_read_from_offline_store(environment, universal_data_sources):
-    store = environment.feature_store
-
-    (_, _, data_sources) = universal_data_sources
-    feature_views = construct_universal_feature_views(data_sources)
-    now = pd.Timestamp(datetime.datetime.utcnow()).round("ms")
-
-    store.apply([driver(), customer(), location(), *feature_views.values()])
-    entity_df = pd.DataFrame.from_dict({"location_id": [1], "event_timestamp": [now,],})
-
-    before_df = store.get_historical_features(
-        entity_df=entity_df,
-        features=["pushable_location_stats:temperature"],
-        full_feature_names=False,
-    ).to_df()
-
-    data = {
-        "event_timestamp": [now],
-        "location_id": [1],
-        "temperature": [4],
-        "created": [now],
-    }
-    df_ingest = pd.DataFrame(data)
-    assert np.where(
-        before_df["location_id"].reset_index(drop=True)
-        == df_ingest["location_id"].reset_index(drop=True)
-    )
-    assert np.where(
-        before_df["temperature"].reset_index(drop=True)
-        != df_ingest["temperature"].reset_index(drop=True)
-    )
-
-    store.push("location_stats_push_source", df_ingest, to=PushMode.OFFLINE)
-
-    df = store.get_historical_features(
-        entity_df=entity_df,
-        features=["pushable_location_stats:temperature"],
-        full_feature_names=False,
-    ).to_df()
-    assert np.where(
-        df["location_id"].reset_index(drop=True)
-        == df_ingest["location_id"].reset_index(drop=True)
-    )
-    assert np.where(
-        df["temperature"].reset_index(drop=True)
-        == df_ingest["temperature"].reset_index(drop=True)
-    )
+    assert online_resp_dict["temperature"] == [4]
\ No newline at end of file

From 57318fdb8831b17dd26b7f8a627edf3b95746ab6 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:01:37 -0700
Subject: [PATCH 25/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../integration/online_store/test_push_online_retrieval.py    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sdk/python/tests/integration/online_store/test_push_online_retrieval.py b/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
index 6091363411..aa7e3e7f53 100644
--- a/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
+++ b/sdk/python/tests/integration/online_store/test_push_online_retrieval.py
@@ -1,10 +1,8 @@
 import datetime
 
-import numpy as np
 import pandas as pd
 import pytest
 
-from feast.data_source import PushMode
 from tests.integration.feature_repos.repo_configuration import (
     construct_universal_feature_views,
 )
@@ -40,4 +38,4 @@ def test_push_features_and_read(environment, universal_data_sources):
     )
     online_resp_dict = online_resp.to_dict()
     assert online_resp_dict["location_id"] == [1]
-    assert online_resp_dict["temperature"] == [4]
\ No newline at end of file
+    assert online_resp_dict["temperature"] == [4]

From ad3f608c03b497aacf277a150dbcabca70a1a9a4 Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:15:41 -0700
Subject: [PATCH 26/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/utils/aws_utils.py         |  2 ++
 .../feature_repos/repo_configuration.py           | 15 ---------------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index b284d24231..dc5e2f0c11 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -243,6 +243,7 @@ def delete_redshift_table(
         redshift_data_client, cluster_id, database, user, drop_query,
     )
 
+
 def delete_redshift_table(
     redshift_data_client, cluster_id: str, database: str, user: str, table_name: str,
 ):
@@ -251,6 +252,7 @@ def delete_redshift_table(
         redshift_data_client, cluster_id, database, user, drop_query,
     )
 
+
 def upload_arrow_table_to_redshift(
     table: Union[pyarrow.Table, Path],
     redshift_data_client,
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
index 75835f1c56..f4d5defcad 100644
--- a/sdk/python/tests/integration/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -74,26 +74,11 @@
     "connection_string": "127.0.0.1:6001,127.0.0.1:6002,127.0.0.1:6003",
 }
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> fec6cc0b (Lint)
 OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, DataSourceCreator] = {
     "file": ("local", FileDataSourceCreator),
     "gcp": ("gcp", BigQueryDataSourceCreator),
     "redshift": ("aws", RedshiftDataSourceCreator),
     "snowflake": ("aws", RedshiftDataSourceCreator),
-<<<<<<< HEAD
-=======
-OFFLINE_STORE_TO_PROVIDER_CONFIG : Dict[
-    str, DataSourceCreator] = {
-        "file": ("local", FileDataSourceCreator),
-        "gcp": ("gcp", BigQueryDataSourceCreator),
-        "redshift": ("aws", RedshiftDataSourceCreator),
-        "snowflake": ("aws", RedshiftDataSourceCreator),
->>>>>>> a1b0c4a6 (Add redshift)
-=======
->>>>>>> fec6cc0b (Lint)
 }
 
 AVAILABLE_OFFLINE_STORES: List[Tuple[str, Type[DataSourceCreator]]] = [

From 4f7ffd8825d8756d17717918e329a13919a956fc Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:16:48 -0700
Subject: [PATCH 27/30] Fix rebase

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 sdk/python/feast/infra/utils/aws_utils.py | 56 -----------------------
 1 file changed, 56 deletions(-)

diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index dc5e2f0c11..7badda9846 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -244,15 +244,6 @@ def delete_redshift_table(
     )
 
 
-def delete_redshift_table(
-    redshift_data_client, cluster_id: str, database: str, user: str, table_name: str,
-):
-    drop_query = f"DROP {table_name} IF EXISTS"
-    execute_redshift_statement(
-        redshift_data_client, cluster_id, database, user, drop_query,
-    )
-
-
 def upload_arrow_table_to_redshift(
     table: Union[pyarrow.Table, Path],
     redshift_data_client,
@@ -436,53 +427,6 @@ def temporarily_upload_arrow_table_to_redshift(
     )
 
 
-@contextlib.contextmanager
-def temporarily_upload_arrow_table_to_redshift(
-    table: Union[pyarrow.Table, Path],
-    redshift_data_client,
-    cluster_id: str,
-    database: str,
-    user: str,
-    s3_resource,
-    iam_role: str,
-    s3_path: str,
-    table_name: str,
-    schema: Optional[pyarrow.Schema] = None,
-    fail_if_exists: bool = True,
-) -> Iterator[None]:
-    """Uploads a Arrow Table to Redshift as a new table with cleanup logic.
-
-    This is essentially the same as upload_arrow_table_to_redshift (check out its docstring for full details),
-    but unlike it this method is a generator and should be used with `with` block. For example:
-
-    >>> with temporarily_upload_arrow_table_to_redshift(...): # doctest: +SKIP
-    >>>     # Use `table_name` table in Redshift here
-    >>> # `table_name` will not exist at this point, since it's cleaned up by the `with` block
-
-    """
-    # Upload the dataframe to Redshift
-    upload_arrow_table_to_redshift(
-        table,
-        redshift_data_client,
-        cluster_id,
-        database,
-        user,
-        s3_resource,
-        s3_path,
-        iam_role,
-        table_name,
-        schema,
-        fail_if_exists,
-    )
-
-    yield
-
-    # Clean up the uploaded Redshift table
-    execute_redshift_statement(
-        redshift_data_client, cluster_id, database, user, f"DROP TABLE {table_name}",
-    )
-
-
 def download_s3_directory(s3_resource, bucket: str, key: str, local_dir: str):
     """Download the S3 directory to a local disk"""
     bucket_obj = s3_resource.Bucket(bucket)

From 7f4f2a12c68d90be31229fcd332cd129006fb39f Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:19:09 -0700
Subject: [PATCH 28/30] Fix naming

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 ...test_push_offline_retrieval => test_push_offline_retrieval.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sdk/python/tests/integration/offline_store/{test_push_offline_retrieval => test_push_offline_retrieval.py} (100%)

diff --git a/sdk/python/tests/integration/offline_store/test_push_offline_retrieval b/sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py
similarity index 100%
rename from sdk/python/tests/integration/offline_store/test_push_offline_retrieval
rename to sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py

From b48d37711309f76dca6178732f9f0fc03740672a Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:21:35 -0700
Subject: [PATCH 29/30] Fix

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../integration/offline_store/test_push_offline_retrieval.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py b/sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py
index 5aaed47313..b2f91f442e 100644
--- a/sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_push_offline_retrieval.py
@@ -14,6 +14,7 @@
     location,
 )
 
+
 @pytest.mark.integration
 @pytest.mark.universal_offline_stores(only=["file", "redshift"])
 @pytest.mark.universal_online_stores(only=["sqlite"])

From 2e1ddb1a7dba2d5f1a2e15f40b3a6276c0242a8a Mon Sep 17 00:00:00 2001
From: Kevin Zhang <kzhang@tecton.ai>
Date: Wed, 22 Jun 2022 12:23:44 -0700
Subject: [PATCH 30/30] Uncomment

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
---
 .../online_store/test_universal_online.py     | 148 +++++++++---------
 .../test_stream_feature_view_apply.py         | 138 ++++++++--------
 2 files changed, 143 insertions(+), 143 deletions(-)

diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index 9d4db3e03e..c068e04111 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -441,80 +441,80 @@ def test_online_retrieval_with_event_timestamps(
     )
 
 
-# @pytest.mark.integration
-# @pytest.mark.universal_online_stores
-# # @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
-# @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
-# def test_stream_feature_view_online_retrieval(
-#     environment, universal_data_sources, feature_server_endpoint, full_feature_names
-# ):
-#     """
-#     Tests materialization and online retrieval for stream feature views.
-
-#     This test is separate from test_online_retrieval since combining feature views and
-#     stream feature views into a single test resulted in test flakiness. This is tech
-#     debt that should be resolved soon.
-#     """
-#     # Set up feature store.
-#     fs = environment.feature_store
-#     entities, datasets, data_sources = universal_data_sources
-#     feature_views = construct_universal_feature_views(data_sources)
-#     pushable_feature_view = feature_views.pushed_locations
-#     fs.apply([location(), pushable_feature_view])
-
-#     # Materialize.
-#     fs.materialize(
-#         environment.start_date - timedelta(days=1),
-#         environment.end_date + timedelta(days=1),
-#     )
-
-#     # Get online features by randomly sampling 10 entities that exist in the batch source.
-#     sample_locations = datasets.location_df.sample(10)["location_id"]
-#     entity_rows = [
-#         {"location_id": sample_location} for sample_location in sample_locations
-#     ]
-
-#     feature_refs = [
-#         "pushable_location_stats:temperature",
-#     ]
-#     unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
-
-#     online_features_dict = get_online_features_dict(
-#         environment=environment,
-#         endpoint=feature_server_endpoint,
-#         features=feature_refs,
-#         entity_rows=entity_rows,
-#         full_feature_names=full_feature_names,
-#     )
-
-#     # Check that the response has the expected set of keys.
-#     keys = set(online_features_dict.keys())
-#     expected_keys = set(
-#         f.replace(":", "__") if full_feature_names else f.split(":")[-1]
-#         for f in feature_refs
-#     ) | {"location_id"}
-#     assert (
-#         keys == expected_keys
-#     ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
-
-#     # Check that the feature values match.
-#     tc = unittest.TestCase()
-#     for i, entity_row in enumerate(entity_rows):
-#         df_features = get_latest_feature_values_from_location_df(
-#             entity_row, datasets.location_df
-#         )
-
-#         assert df_features["location_id"] == online_features_dict["location_id"][i]
-#         for unprefixed_feature_ref in unprefixed_feature_refs:
-#             tc.assertAlmostEqual(
-#                 df_features[unprefixed_feature_ref],
-#                 online_features_dict[
-#                     response_feature_name(
-#                         unprefixed_feature_ref, feature_refs, full_feature_names
-#                     )
-#                 ][i],
-#                 delta=0.0001,
-#             )
+@pytest.mark.integration
+@pytest.mark.universal_online_stores
+# @pytest.mark.goserver Disabling because the go fs tests are flaking in CI. TODO(achals): uncomment after fixed.
+@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+def test_stream_feature_view_online_retrieval(
+    environment, universal_data_sources, feature_server_endpoint, full_feature_names
+):
+    """
+    Tests materialization and online retrieval for stream feature views.
+
+    This test is separate from test_online_retrieval since combining feature views and
+    stream feature views into a single test resulted in test flakiness. This is tech
+    debt that should be resolved soon.
+    """
+    # Set up feature store.
+    fs = environment.feature_store
+    entities, datasets, data_sources = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
+    pushable_feature_view = feature_views.pushed_locations
+    fs.apply([location(), pushable_feature_view])
+
+    # Materialize.
+    fs.materialize(
+        environment.start_date - timedelta(days=1),
+        environment.end_date + timedelta(days=1),
+    )
+
+    # Get online features by randomly sampling 10 entities that exist in the batch source.
+    sample_locations = datasets.location_df.sample(10)["location_id"]
+    entity_rows = [
+        {"location_id": sample_location} for sample_location in sample_locations
+    ]
+
+    feature_refs = [
+        "pushable_location_stats:temperature",
+    ]
+    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
+
+    online_features_dict = get_online_features_dict(
+        environment=environment,
+        endpoint=feature_server_endpoint,
+        features=feature_refs,
+        entity_rows=entity_rows,
+        full_feature_names=full_feature_names,
+    )
+
+    # Check that the response has the expected set of keys.
+    keys = set(online_features_dict.keys())
+    expected_keys = set(
+        f.replace(":", "__") if full_feature_names else f.split(":")[-1]
+        for f in feature_refs
+    ) | {"location_id"}
+    assert (
+        keys == expected_keys
+    ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)"
+
+    # Check that the feature values match.
+    tc = unittest.TestCase()
+    for i, entity_row in enumerate(entity_rows):
+        df_features = get_latest_feature_values_from_location_df(
+            entity_row, datasets.location_df
+        )
+
+        assert df_features["location_id"] == online_features_dict["location_id"][i]
+        for unprefixed_feature_ref in unprefixed_feature_refs:
+            tc.assertAlmostEqual(
+                df_features[unprefixed_feature_ref],
+                online_features_dict[
+                    response_feature_name(
+                        unprefixed_feature_ref, feature_refs, full_feature_names
+                    )
+                ][i],
+                delta=0.0001,
+            )
 
 
 @pytest.mark.integration
diff --git a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py b/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py
index f92fd340f0..8e2af031c5 100644
--- a/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py
+++ b/sdk/python/tests/integration/registration/test_stream_feature_view_apply.py
@@ -77,72 +77,72 @@ def simple_sfv(df):
         assert features["dummy_field"] == [None]
 
 
-# @pytest.mark.integration
-# def test_stream_feature_view_udf(simple_dataset_1) -> None:
-#     """
-#     Test apply of StreamFeatureView udfs are serialized correctly and usable.
-#     """
-#     runner = CliRunner()
-#     with runner.local_repo(
-#         get_example_repo("example_feature_repo_1.py"), "bigquery"
-#     ) as fs, prep_file_source(
-#         df=simple_dataset_1, timestamp_field="ts_1"
-#     ) as file_source:
-#         entity = Entity(name="driver_entity", join_keys=["test_key"])
-
-#         stream_source = KafkaSource(
-#             name="kafka",
-#             timestamp_field="event_timestamp",
-#             kafka_bootstrap_servers="",
-#             message_format=AvroFormat(""),
-#             topic="topic",
-#             batch_source=file_source,
-#             watermark_delay_threshold=timedelta(days=1),
-#         )
-
-#         @stream_feature_view(
-#             entities=[entity],
-#             ttl=timedelta(days=30),
-#             owner="test@example.com",
-#             online=True,
-#             schema=[Field(name="dummy_field", dtype=Float32)],
-#             description="desc",
-#             aggregations=[
-#                 Aggregation(
-#                     column="dummy_field", function="max", time_window=timedelta(days=1),
-#                 ),
-#                 Aggregation(
-#                     column="dummy_field2",
-#                     function="count",
-#                     time_window=timedelta(days=24),
-#                 ),
-#             ],
-#             timestamp_field="event_timestamp",
-#             mode="spark",
-#             source=stream_source,
-#             tags={},
-#         )
-#         def pandas_view(pandas_df):
-#             import pandas as pd
-
-#             assert type(pandas_df) == pd.DataFrame
-#             df = pandas_df.transform(lambda x: x + 10, axis=1)
-#             df.insert(2, "C", [20.2, 230.0, 34.0], True)
-#             return df
-
-#         import pandas as pd
-
-#         fs.apply([entity, pandas_view])
-
-#         stream_feature_views = fs.list_stream_feature_views()
-#         assert len(stream_feature_views) == 1
-#         assert stream_feature_views[0] == pandas_view
-
-#         sfv = stream_feature_views[0]
-
-#         df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]})
-#         new_df = sfv.udf(df)
-#         expected_df = pd.DataFrame(
-#             {"A": [11, 12, 13], "B": [20, 30, 40], "C": [20.2, 230.0, 34.0]}
-#         )
-#         assert new_df.equals(expected_df)
+@pytest.mark.integration
+def test_stream_feature_view_udf(simple_dataset_1) -> None:
+    """
+    Test apply of StreamFeatureView udfs are serialized correctly and usable.
+    """
+    runner = CliRunner()
+    with runner.local_repo(
+        get_example_repo("example_feature_repo_1.py"), "bigquery"
+    ) as fs, prep_file_source(
+        df=simple_dataset_1, timestamp_field="ts_1"
+    ) as file_source:
+        entity = Entity(name="driver_entity", join_keys=["test_key"])
+
+        stream_source = KafkaSource(
+            name="kafka",
+            timestamp_field="event_timestamp",
+            kafka_bootstrap_servers="",
+            message_format=AvroFormat(""),
+            topic="topic",
+            batch_source=file_source,
+            watermark_delay_threshold=timedelta(days=1),
+        )
+
+        @stream_feature_view(
+            entities=[entity],
+            ttl=timedelta(days=30),
+            owner="test@example.com",
+            online=True,
+            schema=[Field(name="dummy_field", dtype=Float32)],
+            description="desc",
+            aggregations=[
+                Aggregation(
+                    column="dummy_field", function="max", time_window=timedelta(days=1),
+                ),
+                Aggregation(
+                    column="dummy_field2",
+                    function="count",
+                    time_window=timedelta(days=24),
+                ),
+            ],
+            timestamp_field="event_timestamp",
+            mode="spark",
+            source=stream_source,
+            tags={},
+        )
+        def pandas_view(pandas_df):
+            import pandas as pd
+
+            assert type(pandas_df) == pd.DataFrame
+            df = pandas_df.transform(lambda x: x + 10, axis=1)
+            df.insert(2, "C", [20.2, 230.0, 34.0], True)
+            return df
+
+        import pandas as pd
+
+        fs.apply([entity, pandas_view])
+
+        stream_feature_views = fs.list_stream_feature_views()
+        assert len(stream_feature_views) == 1
+        assert stream_feature_views[0] == pandas_view
+
+        sfv = stream_feature_views[0]
+
+        df = pd.DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]})
+        new_df = sfv.udf(df)
+        expected_df = pd.DataFrame(
+            {"A": [11, 12, 13], "B": [20, 30, 40], "C": [20.2, 230.0, 34.0]}
+        )
+        assert new_df.equals(expected_df)