Skip to content

Commit

Permalink
Merge branch 'feast-dev:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
emgeee authored Oct 23, 2024
2 parents 31857e6 + 5291289 commit a928510
Show file tree
Hide file tree
Showing 25 changed files with 306 additions and 73 deletions.
9 changes: 9 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
# See https://help.github.com/articles/about-codeowners/
# for more info about CODEOWNERS file

/docs/ @feast-dev/reviewers-and-approvers
/examples/ @feast-dev/reviewers-and-approvers
/go/ @feast-dev/reviewers-and-approvers
/infra/ @feast-dev/reviewers-and-approvers
/java/ @feast-dev/reviewers-and-approvers
/protos/ @feast-dev/reviewers-and-approvers
/sdk/ @feast-dev/reviewers-and-approvers
/ui/ @feast-dev/reviewers-and-approvers

# Core Interfaces
/sdk/python/feast/infra/offline_stores/offline_store.py @feast-dev/maintainers
/sdk/python/feast/infra/online_stores/online_store.py @feast-dev/maintainers
Expand Down
47 changes: 0 additions & 47 deletions OWNERS

This file was deleted.

2 changes: 1 addition & 1 deletion sdk/python/feast/feature_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def load_config(self):
if key.lower() in self.cfg.settings and value is not None:
self.cfg.set(key.lower(), value)

self.cfg.set("worker_class", "uvicorn.workers.UvicornWorker")
self.cfg.set("worker_class", "uvicorn_worker.UvicornWorker")

def load(self):
return self._app
Expand Down
241 changes: 241 additions & 0 deletions sdk/python/feast/infra/online_stores/contrib/faiss_online_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
import logging
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple

import faiss
import numpy as np
from google.protobuf.timestamp_pb2 import Timestamp

from feast import Entity, FeatureView, RepoConfig
from feast.infra.key_encoding_utils import serialize_entity_key
from feast.infra.online_stores.online_store import OnlineStore
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
from feast.repo_config import FeastConfigBaseModel


class FaissOnlineStoreConfig(FeastConfigBaseModel):
dimension: int
index_path: str
index_type: str = "IVFFlat"
nlist: int = 100


class InMemoryStore:
def __init__(self):
self.feature_names: List[str] = []
self.entity_keys: Dict[str, int] = {}

def update(self, feature_names: List[str], entity_keys: Dict[str, int]):
self.feature_names = feature_names
self.entity_keys = entity_keys

def delete(self, entity_keys: List[str]):
for entity_key in entity_keys:
if entity_key in self.entity_keys:
del self.entity_keys[entity_key]

def read(self, entity_keys: List[str]) -> List[Optional[int]]:
return [self.entity_keys.get(entity_key) for entity_key in entity_keys]

def teardown(self):
self.feature_names = []
self.entity_keys = {}


class FaissOnlineStore(OnlineStore):
_index: Optional[faiss.IndexIVFFlat] = None
_in_memory_store: InMemoryStore = InMemoryStore()
_config: Optional[FaissOnlineStoreConfig] = None
_logger: logging.Logger = logging.getLogger(__name__)

def _get_index(self, config: RepoConfig) -> faiss.IndexIVFFlat:
if self._index is None or self._config is None:
raise ValueError("Index is not initialized")
return self._index

def update(
self,
config: RepoConfig,
tables_to_delete: Sequence[FeatureView],
tables_to_keep: Sequence[FeatureView],
entities_to_delete: Sequence[Entity],
entities_to_keep: Sequence[Entity],
partial: bool,
):
feature_views = tables_to_keep
if not feature_views:
return

feature_names = [f.name for f in feature_views[0].features]
dimension = len(feature_names)

self._config = FaissOnlineStoreConfig(**config.online_store.dict())
if self._index is None or not partial:
quantizer = faiss.IndexFlatL2(dimension)
self._index = faiss.IndexIVFFlat(quantizer, dimension, self._config.nlist)
self._index.train(
np.random.rand(self._config.nlist * 100, dimension).astype(np.float32)
)
self._in_memory_store = InMemoryStore()

self._in_memory_store.update(feature_names, {})

def teardown(
self,
config: RepoConfig,
tables: Sequence[FeatureView],
entities: Sequence[Entity],
):
self._index = None
self._in_memory_store.teardown()

def online_read(
self,
config: RepoConfig,
table: FeatureView,
entity_keys: List[EntityKeyProto],
requested_features: Optional[List[str]] = None,
) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
if self._index is None:
return [(None, None)] * len(entity_keys)

results: List[Tuple[Optional[datetime], Optional[Dict[str, Any]]]] = []
for entity_key in entity_keys:
serialized_key = serialize_entity_key(
entity_key, config.entity_key_serialization_version
).hex()
idx = self._in_memory_store.entity_keys.get(serialized_key, -1)
if idx == -1:
results.append((None, None))
else:
feature_vector = self._index.reconstruct(int(idx))
feature_dict = {
name: ValueProto(double_val=value)
for name, value in zip(
self._in_memory_store.feature_names, feature_vector
)
}
results.append((None, feature_dict))
return results

def online_write_batch(
self,
config: RepoConfig,
table: FeatureView,
data: List[
Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
],
progress: Optional[Callable[[int], Any]],
) -> None:
if self._index is None:
self._logger.warning("Index is not initialized. Skipping write operation.")
return

feature_vectors = []
serialized_keys = []

for entity_key, feature_dict, _, _ in data:
serialized_key = serialize_entity_key(
entity_key, config.entity_key_serialization_version
).hex()
feature_vector = np.array(
[
feature_dict[name].double_val
for name in self._in_memory_store.feature_names
],
dtype=np.float32,
)

feature_vectors.append(feature_vector)
serialized_keys.append(serialized_key)

feature_vectors_array = np.array(feature_vectors)

existing_indices = [
self._in_memory_store.entity_keys.get(sk, -1) for sk in serialized_keys
]
mask = np.array(existing_indices) != -1
if np.any(mask):
self._index.remove_ids(
np.array([idx for idx in existing_indices if idx != -1])
)

new_indices = np.arange(
self._index.ntotal, self._index.ntotal + len(feature_vectors_array)
)
self._index.add(feature_vectors_array)

for sk, idx in zip(serialized_keys, new_indices):
self._in_memory_store.entity_keys[sk] = idx

if progress:
progress(len(data))

def retrieve_online_documents(
self,
config: RepoConfig,
table: FeatureView,
requested_feature: str,
embedding: List[float],
top_k: int,
distance_metric: Optional[str] = None,
) -> List[
Tuple[
Optional[datetime],
Optional[EntityKeyProto],
Optional[ValueProto],
Optional[ValueProto],
Optional[ValueProto],
]
]:
if self._index is None:
self._logger.warning("Index is not initialized. Returning empty result.")
return []

query_vector = np.array(embedding, dtype=np.float32).reshape(1, -1)
distances, indices = self._index.search(query_vector, top_k)

results: List[
Tuple[
Optional[datetime],
Optional[EntityKeyProto],
Optional[ValueProto],
Optional[ValueProto],
Optional[ValueProto],
]
] = []
for i, idx in enumerate(indices[0]):
if idx == -1:
continue

feature_vector = self._index.reconstruct(int(idx))

timestamp = Timestamp()
timestamp.GetCurrentTime()
entity_value = EntityKeyProto()
feature_value = ValueProto(string_val=",".join(map(str, feature_vector)))
vector_value = ValueProto(string_val=",".join(map(str, feature_vector)))
distance_value = ValueProto(float_val=distances[0][i])

results.append(
(
timestamp.ToDatetime(),
entity_value,
feature_value,
vector_value,
distance_value,
)
)

return results

async def online_read_async(
self,
config: RepoConfig,
table: FeatureView,
entity_keys: List[EntityKeyProto],
requested_features: Optional[List[str]] = None,
) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
# Implement async read if needed
raise NotImplementedError("Async read is not implemented for FaissOnlineStore")
4 changes: 4 additions & 0 deletions sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,10 @@ def init_repo(repo_name: str, template: str):
raise IOError(f"Could not find template {template}")
copytree(template_path, str(repo_path), dirs_exist_ok=True)

# Rename gitignore files back to .gitignore
for gitignore_path in repo_path.rglob("gitignore"):
gitignore_path.rename(gitignore_path.with_name(".gitignore"))

# Seed the repository
bootstrap_path = repo_path / "bootstrap.py"
if os.path.exists(bootstrap_path):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
24 changes: 19 additions & 5 deletions sdk/python/feast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from feast.protos.feast.types.Value_pb2 import RepeatedValue as RepeatedValueProto
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
from feast.type_map import python_values_to_proto_values
from feast.types import from_feast_to_pyarrow_type
from feast.types import ComplexFeastType, PrimitiveFeastType, from_feast_to_pyarrow_type
from feast.value_type import ValueType
from feast.version import get_version

Expand Down Expand Up @@ -552,13 +552,27 @@ def _augment_response_with_on_demand_transforms(
selected_subset = [f for f in transformed_columns if f in _feature_refs]

proto_values = []
schema_dict = {k.name: k.dtype for k in odfv.schema}
for selected_feature in selected_subset:
feature_vector = transformed_features[selected_feature]
selected_feature_type = schema_dict.get(selected_feature, None)
feature_type: ValueType = ValueType.UNKNOWN
if selected_feature_type is not None:
if isinstance(
selected_feature_type, (ComplexFeastType, PrimitiveFeastType)
):
feature_type = selected_feature_type.to_value_type()
elif not isinstance(selected_feature_type, ValueType):
raise TypeError(
f"Unexpected type for feature_type: {type(feature_type)}"
)

proto_values.append(
python_values_to_proto_values(feature_vector, ValueType.UNKNOWN)
if odfv.mode == "python"
else python_values_to_proto_values(
feature_vector.to_numpy(), ValueType.UNKNOWN
python_values_to_proto_values(
feature_vector
if odfv.mode == "python"
else feature_vector.to_numpy(),
feature_type,
)
)

Expand Down
3 changes: 2 additions & 1 deletion sdk/python/requirements/py3.10-ci-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ execnet==2.1.1
# via pytest-xdist
executing==2.1.0
# via stack-data
fastapi==0.115.0
fastapi==0.115.2
fastjsonschema==2.20.0
# via nbformat
filelock==3.16.1
Expand Down Expand Up @@ -889,6 +889,7 @@ urllib3==2.2.3
# responses
# testcontainers
uvicorn[standard]==0.30.6
uvicorn-worker
uvloop==0.20.0
# via uvicorn
virtualenv==20.23.0
Expand Down
Loading

0 comments on commit a928510

Please sign in to comment.