-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor Environment class and DataSourceCreator API, and use fixture…
…s for datasets and data sources (#1790) * Fix API cruft from DataSourceCreator Signed-off-by: Achal Shah <achals@gmail.com> * Remove the need for get_prefixed_table_name Signed-off-by: Achal Shah <achals@gmail.com> * major refactor Signed-off-by: Achal Shah <achals@gmail.com> * move start time Signed-off-by: Achal Shah <achals@gmail.com> * Remove one dimension of variation to be added in later Signed-off-by: Achal Shah <achals@gmail.com> * Fix default Signed-off-by: Achal Shah <achals@gmail.com> * Fixups Signed-off-by: Achal Shah <achals@gmail.com> * Fixups Signed-off-by: Achal Shah <achals@gmail.com> * Fix up tests Signed-off-by: Achal Shah <achals@gmail.com> * Add retries to execute_redshift_statement_async Signed-off-by: Achal Shah <achals@gmail.com> * Add retries to execute_redshift_statement_async Signed-off-by: Achal Shah <achals@gmail.com> * refactoooor Signed-off-by: Achal Shah <achals@gmail.com> * remove retries Signed-off-by: Achal Shah <achals@gmail.com> * Remove provider variation since they don't really play a big role Signed-off-by: Achal Shah <achals@gmail.com> * Session scoped cache for test datasets and skipping older tests whose functionality is present in other universal tests Signed-off-by: Achal Shah <achals@gmail.com> * make format Signed-off-by: Achal Shah <achals@gmail.com> * make format Signed-off-by: Achal Shah <achals@gmail.com> * remove import Signed-off-by: Achal Shah <achals@gmail.com> * fix merge Signed-off-by: Achal Shah <achals@gmail.com> * Use an enum for the stopping procedure instead of the bools Signed-off-by: Achal Shah <achals@gmail.com> * Fix refs Signed-off-by: Achal Shah <achals@gmail.com> * fix step Signed-off-by: Achal Shah <achals@gmail.com> * WIP fixes Signed-off-by: Achal Shah <achals@gmail.com> * Fix for feature inferencing Signed-off-by: Achal Shah <achals@gmail.com> * C901 '_python_value_to_proto_value' is too complex :( Signed-off-by: Achal Shah <achals@gmail.com> * Split out construct_test_repo and construct_universal_test_repo Signed-off-by: Achal Shah <achals@gmail.com> * remove import Signed-off-by: Achal Shah <achals@gmail.com> * add unsafe_hash Signed-off-by: Achal Shah <achals@gmail.com> * Update testrepoconfig Signed-off-by: Achal Shah <achals@gmail.com> * Update testrepoconfig Signed-off-by: Achal Shah <achals@gmail.com> * Remove kwargs from construct_universal_test_environment Signed-off-by: Achal Shah <achals@gmail.com> * Remove unneeded method Signed-off-by: Achal Shah <achals@gmail.com> * Docs Signed-off-by: Achal Shah <achals@gmail.com> * Kill skipped tests Signed-off-by: Achal Shah <achals@gmail.com> * reorder Signed-off-by: Achal Shah <achals@gmail.com> * add todo Signed-off-by: Achal Shah <achals@gmail.com> * Split universal vs non data_source_cache Signed-off-by: Achal Shah <achals@gmail.com> * make format Signed-off-by: Achal Shah <achals@gmail.com> * WIP fixtures Signed-off-by: Achal Shah <achals@gmail.com> * WIP Trying fixtures more effectively Signed-off-by: Achal Shah <achals@gmail.com> * fix refs Signed-off-by: Achal Shah <achals@gmail.com> * Fix refs Signed-off-by: Achal Shah <achals@gmail.com> * Fix refs Signed-off-by: Achal Shah <achals@gmail.com> * Fix refs Signed-off-by: Achal Shah <achals@gmail.com> * fix historical tests Signed-off-by: Achal Shah <achals@gmail.com> * renames Signed-off-by: Achal Shah <achals@gmail.com> * CR updates Signed-off-by: Achal Shah <achals@gmail.com> * use the actual ref to data source creators Signed-off-by: Achal Shah <achals@gmail.com> * format Signed-off-by: Achal Shah <achals@gmail.com> * unused imports' Signed-off-by: Achal Shah <achals@gmail.com> * Add ids for pytest params Signed-off-by: Achal Shah <achals@gmail.com>
- Loading branch information
Showing
20 changed files
with
629 additions
and
1,195 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
192 changes: 192 additions & 0 deletions
192
sdk/python/tests/integration/feature_repos/repo_configuration.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
import tempfile | ||
import uuid | ||
from contextlib import contextmanager | ||
from dataclasses import dataclass, field | ||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
from typing import Any, Dict, List, Optional, Type, Union | ||
|
||
import pandas as pd | ||
|
||
from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data | ||
from feast.data_source import DataSource | ||
from tests.integration.feature_repos.universal.data_source_creator import ( | ||
DataSourceCreator, | ||
) | ||
from tests.integration.feature_repos.universal.data_sources.bigquery import ( | ||
BigQueryDataSourceCreator, | ||
) | ||
from tests.integration.feature_repos.universal.data_sources.file import ( | ||
FileDataSourceCreator, | ||
) | ||
from tests.integration.feature_repos.universal.data_sources.redshift import ( | ||
RedshiftDataSourceCreator, | ||
) | ||
from tests.integration.feature_repos.universal.feature_views import ( | ||
create_customer_daily_profile_feature_view, | ||
create_driver_hourly_stats_feature_view, | ||
) | ||
|
||
|
||
@dataclass(frozen=True, repr=True) | ||
class IntegrationTestRepoConfig: | ||
""" | ||
This class should hold all possible parameters that may need to be varied by individual tests. | ||
""" | ||
|
||
provider: str = "local" | ||
online_store: Union[str, Dict] = "sqlite" | ||
|
||
offline_store_creator: Type[DataSourceCreator] = FileDataSourceCreator | ||
|
||
full_feature_names: bool = True | ||
infer_event_timestamp_col: bool = True | ||
infer_features: bool = False | ||
|
||
|
||
DYNAMO_CONFIG = {"type": "dynamodb", "region": "us-west-2"} | ||
REDIS_CONFIG = {"type": "redis", "connection_string": "localhost:6379,db=0"} | ||
FULL_REPO_CONFIGS: List[IntegrationTestRepoConfig] = [ | ||
# Local configurations | ||
IntegrationTestRepoConfig(), | ||
IntegrationTestRepoConfig(online_store=REDIS_CONFIG), | ||
# GCP configurations | ||
IntegrationTestRepoConfig( | ||
provider="gcp", | ||
offline_store_creator=BigQueryDataSourceCreator, | ||
online_store="datastore", | ||
), | ||
IntegrationTestRepoConfig( | ||
provider="gcp", | ||
offline_store_creator=BigQueryDataSourceCreator, | ||
online_store=REDIS_CONFIG, | ||
), | ||
# AWS configurations | ||
IntegrationTestRepoConfig( | ||
provider="aws", | ||
offline_store_creator=RedshiftDataSourceCreator, | ||
online_store=DYNAMO_CONFIG, | ||
), | ||
IntegrationTestRepoConfig( | ||
provider="aws", | ||
offline_store_creator=RedshiftDataSourceCreator, | ||
online_store=REDIS_CONFIG, | ||
), | ||
] | ||
|
||
|
||
def construct_universal_entities() -> Dict[str, List[Any]]: | ||
return {"customer": list(range(1001, 1110)), "driver": list(range(5001, 5110))} | ||
|
||
|
||
def construct_universal_datasets( | ||
entities: Dict[str, List[Any]], start_time: datetime, end_time: datetime | ||
) -> Dict[str, pd.DataFrame]: | ||
customer_df = driver_test_data.create_customer_daily_profile_df( | ||
entities["customer"], start_time, end_time | ||
) | ||
driver_df = driver_test_data.create_driver_hourly_stats_df( | ||
entities["driver"], start_time, end_time | ||
) | ||
orders_df = driver_test_data.create_orders_df( | ||
customers=entities["customer"], | ||
drivers=entities["driver"], | ||
start_date=end_time - timedelta(days=365), | ||
end_date=end_time + timedelta(days=365), | ||
order_count=1000, | ||
) | ||
|
||
return {"customer": customer_df, "driver": driver_df, "orders": orders_df} | ||
|
||
|
||
def construct_universal_data_sources( | ||
datasets: Dict[str, pd.DataFrame], data_source_creator: DataSourceCreator | ||
) -> Dict[str, DataSource]: | ||
customer_ds = data_source_creator.create_data_source( | ||
datasets["customer"], | ||
destination_name="customer_profile", | ||
event_timestamp_column="event_timestamp", | ||
created_timestamp_column="created", | ||
) | ||
driver_ds = data_source_creator.create_data_source( | ||
datasets["driver"], | ||
destination_name="driver_hourly", | ||
event_timestamp_column="event_timestamp", | ||
created_timestamp_column="created", | ||
) | ||
orders_ds = data_source_creator.create_data_source( | ||
datasets["orders"], | ||
destination_name="orders", | ||
event_timestamp_column="event_timestamp", | ||
created_timestamp_column="created", | ||
) | ||
return {"customer": customer_ds, "driver": driver_ds, "orders": orders_ds} | ||
|
||
|
||
def construct_universal_feature_views( | ||
data_sources: Dict[str, DataSource], | ||
) -> Dict[str, FeatureView]: | ||
return { | ||
"customer": create_customer_daily_profile_feature_view( | ||
data_sources["customer"] | ||
), | ||
"driver": create_driver_hourly_stats_feature_view(data_sources["driver"]), | ||
} | ||
|
||
|
||
@dataclass | ||
class Environment: | ||
name: str | ||
test_repo_config: IntegrationTestRepoConfig | ||
feature_store: FeatureStore | ||
data_source_creator: DataSourceCreator | ||
|
||
end_date: datetime = field( | ||
default=datetime.now().replace(microsecond=0, second=0, minute=0) | ||
) | ||
|
||
def __post_init__(self): | ||
self.start_date: datetime = self.end_date - timedelta(days=7) | ||
|
||
|
||
def table_name_from_data_source(ds: DataSource) -> Optional[str]: | ||
if hasattr(ds, "table_ref"): | ||
return ds.table_ref | ||
elif hasattr(ds, "table"): | ||
return ds.table | ||
return None | ||
|
||
|
||
@contextmanager | ||
def construct_test_environment( | ||
test_repo_config: IntegrationTestRepoConfig, | ||
test_suite_name: str = "integration_test", | ||
) -> Environment: | ||
project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}" | ||
|
||
offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project) | ||
|
||
offline_store_config = offline_creator.create_offline_store_config() | ||
online_store = test_repo_config.online_store | ||
|
||
with tempfile.TemporaryDirectory() as repo_dir_name: | ||
config = RepoConfig( | ||
registry=str(Path(repo_dir_name) / "registry.db"), | ||
project=project, | ||
provider=test_repo_config.provider, | ||
offline_store=offline_store_config, | ||
online_store=online_store, | ||
repo_path=repo_dir_name, | ||
) | ||
fs = FeatureStore(config=config) | ||
environment = Environment( | ||
name=project, | ||
test_repo_config=test_repo_config, | ||
feature_store=fs, | ||
data_source_creator=offline_creator, | ||
) | ||
|
||
try: | ||
yield environment | ||
finally: | ||
fs.teardown() |
Oops, something went wrong.