-
Notifications
You must be signed in to change notification settings - Fork 996
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for DynamoDB and S3 registry (#1483)
* Add support for DynamoDB and S3 registry Signed-off-by: lblokhin <lenin133@yandex.ru> * rcu and wcu as a parameter of dynamodb online store Signed-off-by: lblokhin <lenin133@yandex.ru> * fix linter Signed-off-by: lblokhin <lenin133@yandex.ru> * aws dependency to extras Signed-off-by: lblokhin <lenin133@yandex.ru> * FEAST_S3_ENDPOINT_URL Signed-off-by: lblokhin <lenin133@yandex.ru> * tests Signed-off-by: lblokhin <lenin133@yandex.ru> * fix signature, after merge Signed-off-by: lblokhin <lenin133@yandex.ru> * aws default region name configurable Signed-off-by: lblokhin <lenin133@yandex.ru> * add offlinestore config type to test Signed-off-by: lblokhin <lenin133@yandex.ru> * review changes Signed-off-by: lblokhin <lenin133@yandex.ru> * review requested changes Signed-off-by: lblokhin <lenin133@yandex.ru> * integration test for Dynamo Signed-off-by: lblokhin <lenin133@yandex.ru> * change the rest of table_name to table_instance (where table_name is actually an instance of DynamoDB Table object) Signed-off-by: lblokhin <lenin133@yandex.ru> * fix DynamoDBOnlineStore commit Signed-off-by: lblokhin <lenin133@yandex.ru> * move client to _initialize_dynamodb Signed-off-by: lblokhin <lenin133@yandex.ru> * rename document_id to entity_id and Row to entity_id Signed-off-by: lblokhin <lenin133@yandex.ru> * The default value is None Signed-off-by: lblokhin <lenin133@yandex.ru> * Remove Datastore from the docstring. Signed-off-by: lblokhin <lenin133@yandex.ru> * get rid of the return call from S3RegistryStore Signed-off-by: lblokhin <lenin133@yandex.ru> * merge two exceptions Signed-off-by: lblokhin <lenin133@yandex.ru> * For ci requirement Signed-off-by: lblokhin <lenin133@yandex.ru> * remove configuration from test Signed-off-by: lblokhin <lenin133@yandex.ru> * feast-integration-tests for tests Signed-off-by: lblokhin <lenin133@yandex.ru> * change test path Signed-off-by: lblokhin <lenin133@yandex.ru> * add fixture feature_store_with_s3_registry to test Signed-off-by: lblokhin <lenin133@yandex.ru> * region required Signed-off-by: lblokhin <lenin133@yandex.ru> * Address the rest of the comments Signed-off-by: Tsotne Tabidze <tsotne@tecton.ai> * Update to_table to to_arrow Signed-off-by: Tsotne Tabidze <tsotne@tecton.ai> Co-authored-by: Tsotne Tabidze <tsotne@tecton.ai>
- Loading branch information
Showing
20 changed files
with
678 additions
and
21 deletions.
There are no files selected for viewing
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
from datetime import datetime | ||
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union | ||
|
||
import pandas | ||
from tqdm import tqdm | ||
|
||
from feast import FeatureTable | ||
from feast.entity import Entity | ||
from feast.feature_view import FeatureView | ||
from feast.infra.offline_stores.helpers import get_offline_store_from_config | ||
from feast.infra.online_stores.helpers import get_online_store_from_config | ||
from feast.infra.provider import ( | ||
Provider, | ||
RetrievalJob, | ||
_convert_arrow_to_proto, | ||
_get_column_names, | ||
_run_field_mapping, | ||
) | ||
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto | ||
from feast.protos.feast.types.Value_pb2 import Value as ValueProto | ||
from feast.registry import Registry | ||
from feast.repo_config import RepoConfig | ||
|
||
|
||
class AwsProvider(Provider): | ||
def __init__(self, config: RepoConfig): | ||
self.repo_config = config | ||
self.offline_store = get_offline_store_from_config(config.offline_store) | ||
self.online_store = get_online_store_from_config(config.online_store) | ||
|
||
def update_infra( | ||
self, | ||
project: str, | ||
tables_to_delete: Sequence[Union[FeatureTable, FeatureView]], | ||
tables_to_keep: Sequence[Union[FeatureTable, FeatureView]], | ||
entities_to_delete: Sequence[Entity], | ||
entities_to_keep: Sequence[Entity], | ||
partial: bool, | ||
): | ||
self.online_store.update( | ||
config=self.repo_config, | ||
tables_to_delete=tables_to_delete, | ||
tables_to_keep=tables_to_keep, | ||
entities_to_keep=entities_to_keep, | ||
entities_to_delete=entities_to_delete, | ||
partial=partial, | ||
) | ||
|
||
def teardown_infra( | ||
self, | ||
project: str, | ||
tables: Sequence[Union[FeatureTable, FeatureView]], | ||
entities: Sequence[Entity], | ||
) -> None: | ||
self.online_store.teardown(self.repo_config, tables, entities) | ||
|
||
def online_write_batch( | ||
self, | ||
config: RepoConfig, | ||
table: Union[FeatureTable, FeatureView], | ||
data: List[ | ||
Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] | ||
], | ||
progress: Optional[Callable[[int], Any]], | ||
) -> None: | ||
self.online_store.online_write_batch(config, table, data, progress) | ||
|
||
def online_read( | ||
self, | ||
config: RepoConfig, | ||
table: Union[FeatureTable, FeatureView], | ||
entity_keys: List[EntityKeyProto], | ||
requested_features: List[str] = None, | ||
) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: | ||
result = self.online_store.online_read(config, table, entity_keys) | ||
|
||
return result | ||
|
||
def materialize_single_feature_view( | ||
self, | ||
config: RepoConfig, | ||
feature_view: FeatureView, | ||
start_date: datetime, | ||
end_date: datetime, | ||
registry: Registry, | ||
project: str, | ||
tqdm_builder: Callable[[int], tqdm], | ||
) -> None: | ||
entities = [] | ||
for entity_name in feature_view.entities: | ||
entities.append(registry.get_entity(entity_name, project)) | ||
|
||
( | ||
join_key_columns, | ||
feature_name_columns, | ||
event_timestamp_column, | ||
created_timestamp_column, | ||
) = _get_column_names(feature_view, entities) | ||
|
||
offline_job = self.offline_store.pull_latest_from_table_or_query( | ||
config=config, | ||
data_source=feature_view.input, | ||
join_key_columns=join_key_columns, | ||
feature_name_columns=feature_name_columns, | ||
event_timestamp_column=event_timestamp_column, | ||
created_timestamp_column=created_timestamp_column, | ||
start_date=start_date, | ||
end_date=end_date, | ||
) | ||
|
||
table = offline_job.to_arrow() | ||
|
||
if feature_view.input.field_mapping is not None: | ||
table = _run_field_mapping(table, feature_view.input.field_mapping) | ||
|
||
join_keys = [entity.join_key for entity in entities] | ||
rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys) | ||
|
||
with tqdm_builder(len(rows_to_write)) as pbar: | ||
self.online_write_batch( | ||
self.repo_config, feature_view, rows_to_write, lambda x: pbar.update(x) | ||
) | ||
|
||
def get_historical_features( | ||
self, | ||
config: RepoConfig, | ||
feature_views: List[FeatureView], | ||
feature_refs: List[str], | ||
entity_df: Union[pandas.DataFrame, str], | ||
registry: Registry, | ||
project: str, | ||
) -> RetrievalJob: | ||
job = self.offline_store.get_historical_features( | ||
config=config, | ||
feature_views=feature_views, | ||
feature_refs=feature_refs, | ||
entity_df=entity_df, | ||
registry=registry, | ||
project=project, | ||
) | ||
return job |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.