diff --git a/CODEOWNERS b/CODEOWNERS index 75ede8b6aab..e4eb72958c2 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,6 +1,15 @@ # See https://help.github.com/articles/about-codeowners/ # for more info about CODEOWNERS file +/docs/ @feast-dev/reviewers-and-approvers +/examples/ @feast-dev/reviewers-and-approvers +/go/ @feast-dev/reviewers-and-approvers +/infra/ @feast-dev/reviewers-and-approvers +/java/ @feast-dev/reviewers-and-approvers +/protos/ @feast-dev/reviewers-and-approvers +/sdk/ @feast-dev/reviewers-and-approvers +/ui/ @feast-dev/reviewers-and-approvers + # Core Interfaces /sdk/python/feast/infra/offline_stores/offline_store.py @feast-dev/maintainers /sdk/python/feast/infra/online_stores/online_store.py @feast-dev/maintainers diff --git a/OWNERS b/OWNERS deleted file mode 100644 index 1072fc2187b..00000000000 --- a/OWNERS +++ /dev/null @@ -1,47 +0,0 @@ -# This file is different from the CODEOWNERS file. -# OWNERS is used by feast-ci-bot to accept commands like `/ok-to-test` and `/lgtm` -# More info at https://www.kubernetes.dev/docs/guide/owners/ -approvers: - - woop - - achals - - adchia - - felixwang9817 - - MattDelac - - kevjumba - - chhabrakadabra - - gbmarc1 - - sfc-gh-madkins - - zhilingc - - whoahbot - - niklasvm - - toping4445 - - DvirDukhan - - hemidactylus - - franciscojavierarceo - - haoxuai - - jeremyary - - shuchu - - tokoko - -reviewers: - - woop - - achals - - tedhtchang - - adchia - - felixwang9817 - - MattDelac - - kevjumba - - chhabrakadabra - - gbmarc1 - - sfc-gh-madkins - - zhilingc - - whoahbot - - niklasvm - - toping4445 - - DvirDukhan - - hemidactylus - - franciscojavierarceo - - haoxuai - - jeremyary - - shuchu - - tokoko diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index a9f5c094044..b4ed591b04c 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -307,7 +307,7 @@ def load_config(self): if key.lower() in self.cfg.settings and value is not None: self.cfg.set(key.lower(), value) - self.cfg.set("worker_class", "uvicorn.workers.UvicornWorker") + self.cfg.set("worker_class", "uvicorn_worker.UvicornWorker") def load(self): return self._app diff --git a/sdk/python/feast/infra/online_stores/contrib/faiss_online_store.py b/sdk/python/feast/infra/online_stores/contrib/faiss_online_store.py new file mode 100644 index 00000000000..cc2e75800e6 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/faiss_online_store.py @@ -0,0 +1,241 @@ +import logging +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + +import faiss +import numpy as np +from google.protobuf.timestamp_pb2 import Timestamp + +from feast import Entity, FeatureView, RepoConfig +from feast.infra.key_encoding_utils import serialize_entity_key +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import FeastConfigBaseModel + + +class FaissOnlineStoreConfig(FeastConfigBaseModel): + dimension: int + index_path: str + index_type: str = "IVFFlat" + nlist: int = 100 + + +class InMemoryStore: + def __init__(self): + self.feature_names: List[str] = [] + self.entity_keys: Dict[str, int] = {} + + def update(self, feature_names: List[str], entity_keys: Dict[str, int]): + self.feature_names = feature_names + self.entity_keys = entity_keys + + def delete(self, entity_keys: List[str]): + for entity_key in entity_keys: + if entity_key in self.entity_keys: + del self.entity_keys[entity_key] + + def read(self, entity_keys: List[str]) -> List[Optional[int]]: + return [self.entity_keys.get(entity_key) for entity_key in entity_keys] + + def teardown(self): + self.feature_names = [] + self.entity_keys = {} + + +class FaissOnlineStore(OnlineStore): + _index: Optional[faiss.IndexIVFFlat] = None + _in_memory_store: InMemoryStore = InMemoryStore() + _config: Optional[FaissOnlineStoreConfig] = None + _logger: logging.Logger = logging.getLogger(__name__) + + def _get_index(self, config: RepoConfig) -> faiss.IndexIVFFlat: + if self._index is None or self._config is None: + raise ValueError("Index is not initialized") + return self._index + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + feature_views = tables_to_keep + if not feature_views: + return + + feature_names = [f.name for f in feature_views[0].features] + dimension = len(feature_names) + + self._config = FaissOnlineStoreConfig(**config.online_store.dict()) + if self._index is None or not partial: + quantizer = faiss.IndexFlatL2(dimension) + self._index = faiss.IndexIVFFlat(quantizer, dimension, self._config.nlist) + self._index.train( + np.random.rand(self._config.nlist * 100, dimension).astype(np.float32) + ) + self._in_memory_store = InMemoryStore() + + self._in_memory_store.update(feature_names, {}) + + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + self._index = None + self._in_memory_store.teardown() + + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + if self._index is None: + return [(None, None)] * len(entity_keys) + + results: List[Tuple[Optional[datetime], Optional[Dict[str, Any]]]] = [] + for entity_key in entity_keys: + serialized_key = serialize_entity_key( + entity_key, config.entity_key_serialization_version + ).hex() + idx = self._in_memory_store.entity_keys.get(serialized_key, -1) + if idx == -1: + results.append((None, None)) + else: + feature_vector = self._index.reconstruct(int(idx)) + feature_dict = { + name: ValueProto(double_val=value) + for name, value in zip( + self._in_memory_store.feature_names, feature_vector + ) + } + results.append((None, feature_dict)) + return results + + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + if self._index is None: + self._logger.warning("Index is not initialized. Skipping write operation.") + return + + feature_vectors = [] + serialized_keys = [] + + for entity_key, feature_dict, _, _ in data: + serialized_key = serialize_entity_key( + entity_key, config.entity_key_serialization_version + ).hex() + feature_vector = np.array( + [ + feature_dict[name].double_val + for name in self._in_memory_store.feature_names + ], + dtype=np.float32, + ) + + feature_vectors.append(feature_vector) + serialized_keys.append(serialized_key) + + feature_vectors_array = np.array(feature_vectors) + + existing_indices = [ + self._in_memory_store.entity_keys.get(sk, -1) for sk in serialized_keys + ] + mask = np.array(existing_indices) != -1 + if np.any(mask): + self._index.remove_ids( + np.array([idx for idx in existing_indices if idx != -1]) + ) + + new_indices = np.arange( + self._index.ntotal, self._index.ntotal + len(feature_vectors_array) + ) + self._index.add(feature_vectors_array) + + for sk, idx in zip(serialized_keys, new_indices): + self._in_memory_store.entity_keys[sk] = idx + + if progress: + progress(len(data)) + + def retrieve_online_documents( + self, + config: RepoConfig, + table: FeatureView, + requested_feature: str, + embedding: List[float], + top_k: int, + distance_metric: Optional[str] = None, + ) -> List[ + Tuple[ + Optional[datetime], + Optional[EntityKeyProto], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ]: + if self._index is None: + self._logger.warning("Index is not initialized. Returning empty result.") + return [] + + query_vector = np.array(embedding, dtype=np.float32).reshape(1, -1) + distances, indices = self._index.search(query_vector, top_k) + + results: List[ + Tuple[ + Optional[datetime], + Optional[EntityKeyProto], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ] = [] + for i, idx in enumerate(indices[0]): + if idx == -1: + continue + + feature_vector = self._index.reconstruct(int(idx)) + + timestamp = Timestamp() + timestamp.GetCurrentTime() + entity_value = EntityKeyProto() + feature_value = ValueProto(string_val=",".join(map(str, feature_vector))) + vector_value = ValueProto(string_val=",".join(map(str, feature_vector))) + distance_value = ValueProto(float_val=distances[0][i]) + + results.append( + ( + timestamp.ToDatetime(), + entity_value, + feature_value, + vector_value, + distance_value, + ) + ) + + return results + + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + # Implement async read if needed + raise NotImplementedError("Async read is not implemented for FaissOnlineStore") diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 6629768375a..4db0bbc6fdb 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -468,6 +468,10 @@ def init_repo(repo_name: str, template: str): raise IOError(f"Could not find template {template}") copytree(template_path, str(repo_path), dirs_exist_ok=True) + # Rename gitignore files back to .gitignore + for gitignore_path in repo_path.rglob("gitignore"): + gitignore_path.rename(gitignore_path.with_name(".gitignore")) + # Seed the repository bootstrap_path = repo_path / "bootstrap.py" if os.path.exists(bootstrap_path): diff --git a/sdk/python/feast/templates/athena/.gitignore b/sdk/python/feast/templates/athena/gitignore similarity index 100% rename from sdk/python/feast/templates/athena/.gitignore rename to sdk/python/feast/templates/athena/gitignore diff --git a/sdk/python/feast/templates/aws/.gitignore b/sdk/python/feast/templates/aws/gitignore similarity index 100% rename from sdk/python/feast/templates/aws/.gitignore rename to sdk/python/feast/templates/aws/gitignore diff --git a/sdk/python/feast/templates/cassandra/.gitignore b/sdk/python/feast/templates/cassandra/gitignore similarity index 100% rename from sdk/python/feast/templates/cassandra/.gitignore rename to sdk/python/feast/templates/cassandra/gitignore diff --git a/sdk/python/feast/templates/gcp/.gitignore b/sdk/python/feast/templates/gcp/gitignore similarity index 100% rename from sdk/python/feast/templates/gcp/.gitignore rename to sdk/python/feast/templates/gcp/gitignore diff --git a/sdk/python/feast/templates/hazelcast/.gitignore b/sdk/python/feast/templates/hazelcast/gitignore similarity index 100% rename from sdk/python/feast/templates/hazelcast/.gitignore rename to sdk/python/feast/templates/hazelcast/gitignore diff --git a/sdk/python/feast/templates/hbase/.gitignore b/sdk/python/feast/templates/hbase/gitignore similarity index 100% rename from sdk/python/feast/templates/hbase/.gitignore rename to sdk/python/feast/templates/hbase/gitignore diff --git a/sdk/python/feast/templates/minimal/.gitignore b/sdk/python/feast/templates/minimal/gitignore similarity index 100% rename from sdk/python/feast/templates/minimal/.gitignore rename to sdk/python/feast/templates/minimal/gitignore diff --git a/sdk/python/feast/templates/postgres/.gitignore b/sdk/python/feast/templates/postgres/gitignore similarity index 100% rename from sdk/python/feast/templates/postgres/.gitignore rename to sdk/python/feast/templates/postgres/gitignore diff --git a/sdk/python/feast/templates/snowflake/.gitignore b/sdk/python/feast/templates/snowflake/gitignore similarity index 100% rename from sdk/python/feast/templates/snowflake/.gitignore rename to sdk/python/feast/templates/snowflake/gitignore diff --git a/sdk/python/feast/templates/spark/.gitignore b/sdk/python/feast/templates/spark/gitignore similarity index 100% rename from sdk/python/feast/templates/spark/.gitignore rename to sdk/python/feast/templates/spark/gitignore diff --git a/sdk/python/feast/utils.py b/sdk/python/feast/utils.py index ec2da79782c..32cd2f606c2 100644 --- a/sdk/python/feast/utils.py +++ b/sdk/python/feast/utils.py @@ -42,7 +42,7 @@ from feast.protos.feast.types.Value_pb2 import RepeatedValue as RepeatedValueProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.type_map import python_values_to_proto_values -from feast.types import from_feast_to_pyarrow_type +from feast.types import ComplexFeastType, PrimitiveFeastType, from_feast_to_pyarrow_type from feast.value_type import ValueType from feast.version import get_version @@ -552,13 +552,27 @@ def _augment_response_with_on_demand_transforms( selected_subset = [f for f in transformed_columns if f in _feature_refs] proto_values = [] + schema_dict = {k.name: k.dtype for k in odfv.schema} for selected_feature in selected_subset: feature_vector = transformed_features[selected_feature] + selected_feature_type = schema_dict.get(selected_feature, None) + feature_type: ValueType = ValueType.UNKNOWN + if selected_feature_type is not None: + if isinstance( + selected_feature_type, (ComplexFeastType, PrimitiveFeastType) + ): + feature_type = selected_feature_type.to_value_type() + elif not isinstance(selected_feature_type, ValueType): + raise TypeError( + f"Unexpected type for feature_type: {type(feature_type)}" + ) + proto_values.append( - python_values_to_proto_values(feature_vector, ValueType.UNKNOWN) - if odfv.mode == "python" - else python_values_to_proto_values( - feature_vector.to_numpy(), ValueType.UNKNOWN + python_values_to_proto_values( + feature_vector + if odfv.mode == "python" + else feature_vector.to_numpy(), + feature_type, ) ) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index be13d71b829..57a21cd6d95 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -167,7 +167,7 @@ execnet==2.1.1 # via pytest-xdist executing==2.1.0 # via stack-data -fastapi==0.115.0 +fastapi==0.115.2 fastjsonschema==2.20.0 # via nbformat filelock==3.16.1 @@ -889,6 +889,7 @@ urllib3==2.2.3 # responses # testcontainers uvicorn[standard]==0.30.6 +uvicorn-worker uvloop==0.20.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 8d34dcdcf33..c5549401ea7 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -29,7 +29,7 @@ dask-expr==1.1.14 dill==0.3.8 exceptiongroup==1.2.2 # via anyio -fastapi==0.115.0 +fastapi==0.115.2 fsspec==2024.9.0 # via dask greenlet==3.1.0 @@ -136,6 +136,7 @@ tzdata==2024.1 urllib3==2.2.3 # via requests uvicorn[standard]==0.30.6 +uvicorn-worker uvloop==0.20.0 # via uvicorn watchfiles==0.24.0 diff --git a/sdk/python/requirements/py3.11-ci-requirements.txt b/sdk/python/requirements/py3.11-ci-requirements.txt index 1c0d09139a0..ed6dc239d37 100644 --- a/sdk/python/requirements/py3.11-ci-requirements.txt +++ b/sdk/python/requirements/py3.11-ci-requirements.txt @@ -160,7 +160,7 @@ execnet==2.1.1 # via pytest-xdist executing==2.1.0 # via stack-data -fastapi==0.115.0 +fastapi==0.115.2 fastjsonschema==2.20.0 # via nbformat filelock==3.16.1 @@ -866,6 +866,7 @@ urllib3==2.2.3 # responses # testcontainers uvicorn[standard]==0.30.6 +uvicorn-worker uvloop==0.20.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.11-requirements.txt b/sdk/python/requirements/py3.11-requirements.txt index 649b08f492a..d7ed97723f6 100644 --- a/sdk/python/requirements/py3.11-requirements.txt +++ b/sdk/python/requirements/py3.11-requirements.txt @@ -27,7 +27,7 @@ dask[dataframe]==2024.9.0 dask-expr==1.1.14 # via dask dill==0.3.8 -fastapi==0.115.0 +fastapi==0.115.2 fsspec==2024.9.0 # via dask greenlet==3.1.0 @@ -130,6 +130,7 @@ tzdata==2024.1 urllib3==2.2.3 # via requests uvicorn[standard]==0.30.6 +uvicorn-worker uvloop==0.20.0 # via uvicorn watchfiles==0.24.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 3dba480af68..e7d6686b4d2 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -169,7 +169,7 @@ execnet==2.1.1 # via pytest-xdist executing==2.1.0 # via stack-data -fastapi==0.115.0 +fastapi==0.115.2 fastjsonschema==2.20.0 # via nbformat filelock==3.16.1 @@ -900,6 +900,7 @@ urllib3==1.26.20 # snowflake-connector-python # testcontainers uvicorn[standard]==0.30.6 +uvicorn-worker uvloop==0.20.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index ba30a4ecf52..16afecdfb5e 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -29,7 +29,7 @@ dask-expr==1.1.10 dill==0.3.8 exceptiongroup==1.2.2 # via anyio -fastapi==0.115.0 +fastapi==0.115.2 fsspec==2024.9.0 # via dask greenlet==3.1.0 @@ -139,6 +139,7 @@ tzdata==2024.1 urllib3==2.2.3 # via requests uvicorn[standard]==0.30.6 +uvicorn-worker uvloop==0.20.0 # via uvicorn watchfiles==0.24.0 diff --git a/sdk/python/tests/unit/test_on_demand_python_transformation.py b/sdk/python/tests/unit/test_on_demand_python_transformation.py index b7ddfb9e75c..530bf1fa0ab 100644 --- a/sdk/python/tests/unit/test_on_demand_python_transformation.py +++ b/sdk/python/tests/unit/test_on_demand_python_transformation.py @@ -609,19 +609,19 @@ def pandas_view(features_df: pd.DataFrame) -> pd.DataFrame: "val_to_add", "val_to_add_2", ] - with pytest.raises(TypeError): - _ = self.store.get_online_features( - entity_rows=[ - {"driver_id": 1234567890, "val_to_add": 0, "val_to_add_2": 1} - ], - features=[ - "driver_hourly_stats:conv_rate", - "driver_hourly_stats:acc_rate", - "driver_hourly_stats:avg_daily_trips", - "pandas_view:conv_rate_plus_val1", - "pandas_view:conv_rate_plus_val2", - ], - ) + resp_online_missing_entity = self.store.get_online_features( + entity_rows=[ + {"driver_id": 1234567890, "val_to_add": 0, "val_to_add_2": 1} + ], + features=[ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + "pandas_view:conv_rate_plus_val1", + "pandas_view:conv_rate_plus_val2", + ], + ) + assert resp_online_missing_entity is not None resp_online = self.store.get_online_features( entity_rows=[{"driver_id": 1001, "val_to_add": 0, "val_to_add_2": 1}], features=[ diff --git a/setup.py b/setup.py index 1cf51e32896..7c75625d302 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ "typeguard>=4.0.0", "fastapi>=0.68.0", "uvicorn[standard]>=0.14.0,<1", + "uvicorn-worker", "gunicorn; platform_system != 'Windows'", "dask[dataframe]>=2024.2.1", "prometheus_client", @@ -143,6 +144,8 @@ MSSQL_REQUIRED = ["ibis-framework[mssql]>=9.0.0,<10"] +FAISS_REQUIRED = ["faiss-cpu>=1.7.0,<2"] + CI_REQUIRED = ( [ "build", @@ -209,6 +212,7 @@ + SQLITE_VEC_REQUIRED + SINGLESTORE_REQUIRED + OPENTELEMETRY + + FAISS_REQUIRED ) DOCS_REQUIRED = CI_REQUIRED @@ -278,6 +282,7 @@ "sqlite_vec": SQLITE_VEC_REQUIRED, "singlestore": SINGLESTORE_REQUIRED, "opentelemetry": OPENTELEMETRY, + "faiss": FAISS_REQUIRED, }, include_package_data=True, license="Apache", diff --git a/ui/package.json b/ui/package.json index f793cd06e64..9a1876809b0 100644 --- a/ui/package.json +++ b/ui/package.json @@ -107,5 +107,6 @@ "license": "Apache-2.0", "bugs": { "url": "https://github.com/feast-dev/feast/issues" - } + }, + "packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e" }