From e3a4cb70721cdd79f470286f570337e9fe36d363 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 24 Nov 2021 12:35:37 -0800 Subject: [PATCH 1/4] Refactor feast registry-dump cli command Signed-off-by: Felix Wang --- sdk/python/feast/registry.py | 36 ++++++++++++++++++++++++++++- sdk/python/feast/repo_operations.py | 26 ++------------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index 8cb646f656..05c97261ca 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -12,12 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import defaultdict from datetime import datetime, timedelta from pathlib import Path -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from urllib.parse import urlparse from google.protobuf.internal.containers import RepeatedCompositeFieldContainer +from google.protobuf.json_format import MessageToDict from proto import Message from feast import importer @@ -671,6 +673,38 @@ def teardown(self): """Tears down (removes) the registry.""" self._registry_store.teardown() + def to_dict(self, project: str) -> Dict[str, Any]: + """Returns a dictionary representation of the registry contents for the specified project. + + Args: + project: Feast project to convert to a dict + """ + registry_dict = defaultdict(list) + + for entity in self.list_entities(project=project): + registry_dict["entities"].append(MessageToDict(entity.to_proto())) + for feature_view in self.list_feature_views(project=project): + registry_dict["featureViews"].append(MessageToDict(feature_view.to_proto())) + for feature_table in self.list_feature_tables(project=project): + registry_dict["featureTables"].append( + MessageToDict(feature_table.to_proto()) + ) + for feature_service in self.list_feature_services(project=project): + registry_dict["featureServices"].append( + MessageToDict(feature_service.to_proto()) + ) + for on_demand_feature_view in self.list_on_demand_feature_views( + project=project + ): + registry_dict["onDemandFeatureViews"].append( + MessageToDict(on_demand_feature_view.to_proto()) + ) + for request_feature_view in self.list_request_feature_views(project=project): + registry_dict["requestFeatureViews"].append( + MessageToDict(request_feature_view.to_proto()) + ) + return registry_dict + def _prepare_registry_for_changes(self): """Prepares the Registry for changes by refreshing the cache if necessary.""" try: diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index a4ead6d0e2..fb5f9badea 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -4,14 +4,12 @@ import random import re import sys -from collections import defaultdict from importlib.abc import Loader from pathlib import Path from typing import List, NamedTuple, Set, Tuple, Union, cast import click from click.exceptions import BadParameter -from google.protobuf.json_format import MessageToDict from feast import Entity, FeatureTable from feast.base_feature_view import BaseFeatureView @@ -339,28 +337,8 @@ def registry_dump(repo_config: RepoConfig, repo_path: Path): registry_config = repo_config.get_registry_config() project = repo_config.project registry = Registry(registry_config=registry_config, repo_path=repo_path) - registry_dict = defaultdict(list) - - for entity in registry.list_entities(project=project): - registry_dict["entities"].append(MessageToDict(entity.to_proto())) - for feature_view in registry.list_feature_views(project=project): - registry_dict["featureViews"].append(MessageToDict(feature_view.to_proto())) - for feature_table in registry.list_feature_tables(project=project): - registry_dict["featureTables"].append(MessageToDict(feature_table.to_proto())) - for feature_service in registry.list_feature_services(project=project): - registry_dict["featureServices"].append( - MessageToDict(feature_service.to_proto()) - ) - for on_demand_feature_view in registry.list_on_demand_feature_views( - project=project - ): - registry_dict["onDemandFeatureViews"].append( - MessageToDict(on_demand_feature_view.to_proto()) - ) - for request_feature_view in registry.list_request_feature_views(project=project): - registry_dict["requestFeatureViews"].append( - MessageToDict(request_feature_view.to_proto()) - ) + registry_dict = registry.to_dict(project=project) + warning = ( "Warning: The registry-dump command is for debugging only and may contain " "breaking changes in the future. No guarantees are made on this interface." From b9b64f45caeeb57bfaa304cce946cc0cf2c0bfc1 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 24 Nov 2021 12:58:05 -0800 Subject: [PATCH 2/4] Test that running feast apply twice does not change registry contents Signed-off-by: Felix Wang --- sdk/python/tests/integration/registration/test_cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index 38663685e2..de0e5d5629 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -39,6 +39,10 @@ def test_universal_cli(test_repo_config) -> None: result = runner.run(["apply"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) + # Store registry contents, to be compared later. + fs = FeatureStore(repo_path=str(repo_path)) + registry_dict = fs.registry.to_dict(project=project) + # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) @@ -58,8 +62,6 @@ def test_universal_cli(test_repo_config) -> None: ["feature-services", "describe", "driver_locations_service"], cwd=repo_path ) assertpy.assert_that(result.returncode).is_equal_to(0) - - fs = FeatureStore(repo_path=str(repo_path)) assertpy.assert_that(fs.list_feature_views()).is_length(3) # entity & feature view describe commands should fail when objects don't exist @@ -78,6 +80,11 @@ def test_universal_cli(test_repo_config) -> None: view_name="driver_locations", ) + # Confirm that registry contents have not changed. + assertpy.assert_that(registry_dict).is_equal_to( + fs.registry.to_dict(project=project) + ) + result = runner.run(["teardown"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) From 719134e179f12b71c8ba6dc9de328291a6d3ddb1 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 24 Nov 2021 13:52:24 -0800 Subject: [PATCH 3/4] Ensure that Registry.to_dict sorts contents Signed-off-by: Felix Wang --- sdk/python/feast/registry.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index 05c97261ca..7b523c9274 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -673,33 +673,51 @@ def teardown(self): """Tears down (removes) the registry.""" self._registry_store.teardown() - def to_dict(self, project: str) -> Dict[str, Any]: + def to_dict(self, project: str) -> Dict[str, List[Any]]: """Returns a dictionary representation of the registry contents for the specified project. + For each list in the dictionary, the elements are sorted by name, so this + method can be used to compare two registries. + Args: project: Feast project to convert to a dict """ registry_dict = defaultdict(list) - for entity in self.list_entities(project=project): + for entity in sorted( + self.list_entities(project=project), key=lambda entity: entity.name + ): registry_dict["entities"].append(MessageToDict(entity.to_proto())) - for feature_view in self.list_feature_views(project=project): + for feature_view in sorted( + self.list_feature_views(project=project), + key=lambda feature_view: feature_view.name, + ): registry_dict["featureViews"].append(MessageToDict(feature_view.to_proto())) - for feature_table in self.list_feature_tables(project=project): + for feature_table in sorted( + self.list_feature_tables(project=project), + key=lambda feature_table: feature_table.name, + ): registry_dict["featureTables"].append( MessageToDict(feature_table.to_proto()) ) - for feature_service in self.list_feature_services(project=project): + for feature_service in sorted( + self.list_feature_services(project=project), + key=lambda feature_service: feature_service.name, + ): registry_dict["featureServices"].append( MessageToDict(feature_service.to_proto()) ) - for on_demand_feature_view in self.list_on_demand_feature_views( - project=project + for on_demand_feature_view in sorted( + self.list_on_demand_feature_views(project=project), + key=lambda on_demand_feature_view: on_demand_feature_view.name, ): registry_dict["onDemandFeatureViews"].append( MessageToDict(on_demand_feature_view.to_proto()) ) - for request_feature_view in self.list_request_feature_views(project=project): + for request_feature_view in sorted( + self.list_request_feature_views(project=project), + key=lambda request_feature_view: request_feature_view.name, + ): registry_dict["requestFeatureViews"].append( MessageToDict(request_feature_view.to_proto()) ) From 0387e2a133fd79a7400800b3445a776460bb646a Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 24 Nov 2021 13:53:11 -0800 Subject: [PATCH 4/4] Do not remove dummy entity Signed-off-by: Felix Wang --- sdk/python/feast/repo_operations.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index fb5f9badea..a45d3dfd66 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -15,7 +15,7 @@ from feast.base_feature_view import BaseFeatureView from feast.feature_service import FeatureService from feast.feature_store import FeatureStore -from feast.feature_view import FeatureView +from feast.feature_view import DUMMY_ENTITY_NAME, FeatureView from feast.names import adjectives, animals from feast.on_demand_feature_view import OnDemandFeatureView from feast.registry import Registry @@ -265,7 +265,11 @@ def _tag_registry_entities_for_keep_delete( entities_to_delete: Set[Entity] = set() repo_entities_names = set([e.name for e in repo.entities]) for registry_entity in registry.list_entities(project=project): - if registry_entity.name not in repo_entities_names: + # Do not delete dummy entity. + if ( + registry_entity.name not in repo_entities_names + and registry_entity.name != DUMMY_ENTITY_NAME + ): entities_to_delete.add(registry_entity) return entities_to_keep, entities_to_delete