Skip to content

Commit

Permalink
fix: Add new value types to types.ts for web ui (feast-dev#2463)
Browse files Browse the repository at this point in the history
* add new value-types

Signed-off-by: Miray Yuce <myuce@twitter.com>

* auto formatted files

Signed-off-by: Miray Yuce <myuce@twitter.com>

* make format-python

Signed-off-by: Achal Shah <achals@gmail.com>

Co-authored-by: Miray Yuce <myuce@twitter.com>
Co-authored-by: Achal Shah <achals@gmail.com>
Signed-off-by: joostvan <joost.vaningen@adyen.com>
  • Loading branch information
3 people authored and joostvan committed Mar 30, 2022
1 parent 3fdfcf2 commit 081bc63
Show file tree
Hide file tree
Showing 20 changed files with 121 additions and 111 deletions.
90 changes: 45 additions & 45 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,14 +399,14 @@ def delete_feature_view(self, name: str):
@log_exceptions_and_usage
def delete_feature_service(self, name: str):
"""
Deletes a feature service.
Deletes a feature service.
Args:
name: Name of feature service.
Args:
name: Name of feature service.
Raises:
FeatureServiceNotFoundException: The feature view could not be found.
"""
Raises:
FeatureServiceNotFoundException: The feature view could not be found.
"""
return self._registry.delete_feature_service(name, self.project)

def _get_features(
Expand Down Expand Up @@ -903,17 +903,17 @@ def create_saved_dataset(
feature_service: Optional[FeatureService] = None,
) -> SavedDataset:
"""
Execute provided retrieval job and persist its outcome in given storage.
Storage type (eg, BigQuery or Redshift) must be the same as globally configured offline store.
After data successfully persisted saved dataset object with dataset metadata is committed to the registry.
Name for the saved dataset should be unique within project, since it's possible to overwrite previously stored dataset
with the same name.
Execute provided retrieval job and persist its outcome in given storage.
Storage type (eg, BigQuery or Redshift) must be the same as globally configured offline store.
After data successfully persisted saved dataset object with dataset metadata is committed to the registry.
Name for the saved dataset should be unique within project, since it's possible to overwrite previously stored dataset
with the same name.
Returns:
SavedDataset object with attached RetrievalJob
Returns:
SavedDataset object with attached RetrievalJob
Raises:
ValueError if given retrieval job doesn't have metadata
Raises:
ValueError if given retrieval job doesn't have metadata
"""
warnings.warn(
"Saving dataset is an experimental feature. "
Expand Down Expand Up @@ -1589,11 +1589,11 @@ def _get_unique_entities(
join_key_values: Dict[str, List[Value]],
entity_name_to_join_key_map: Dict[str, str],
) -> Tuple[Tuple[Dict[str, Value], ...], Tuple[List[int], ...]]:
""" Return the set of unique composite Entities for a Feature View and the indexes at which they appear.
"""Return the set of unique composite Entities for a Feature View and the indexes at which they appear.
This method allows us to query the OnlineStore for data we need only once
rather than requesting and processing data for the same combination of
Entities multiple times.
This method allows us to query the OnlineStore for data we need only once
rather than requesting and processing data for the same combination of
Entities multiple times.
"""
# Get the correct set of entity values with the correct join keys.
table_entity_values = self._get_table_entity_values(
Expand Down Expand Up @@ -1629,14 +1629,14 @@ def _read_from_online_store(
requested_features: List[str],
table: FeatureView,
) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]:
""" Read and process data from the OnlineStore for a given FeatureView.
"""Read and process data from the OnlineStore for a given FeatureView.
This method guarantees that the order of the data in each element of the
List returned is the same as the order of `requested_features`.
This method guarantees that the order of the data in each element of the
List returned is the same as the order of `requested_features`.
This method assumes that `provider.online_read` returns data for each
combination of Entities in `entity_rows` in the same order as they
are provided.
This method assumes that `provider.online_read` returns data for each
combination of Entities in `entity_rows` in the same order as they
are provided.
"""
# Instantiate one EntityKeyProto per Entity.
entity_key_protos = [
Expand Down Expand Up @@ -1693,23 +1693,23 @@ def _populate_response_from_feature_data(
requested_features: Iterable[str],
table: FeatureView,
):
""" Populate the GetOnlineFeaturesResponse with feature data.
This method assumes that `_read_from_online_store` returns data for each
combination of Entities in `entity_rows` in the same order as they
are provided.
Args:
feature_data: A list of data in Protobuf form which was retrieved from the OnlineStore.
indexes: A list of indexes which should be the same length as `feature_data`. Each list
of indexes corresponds to a set of result rows in `online_features_response`.
online_features_response: The object to populate.
full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names,
changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to
"customer_fv__daily_transactions").
requested_features: The names of the features in `feature_data`. This should be ordered in the same way as the
data in `feature_data`.
table: The FeatureView that `feature_data` was retrieved from.
"""Populate the GetOnlineFeaturesResponse with feature data.
This method assumes that `_read_from_online_store` returns data for each
combination of Entities in `entity_rows` in the same order as they
are provided.
Args:
feature_data: A list of data in Protobuf form which was retrieved from the OnlineStore.
indexes: A list of indexes which should be the same length as `feature_data`. Each list
of indexes corresponds to a set of result rows in `online_features_response`.
online_features_response: The object to populate.
full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names,
changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to
"customer_fv__daily_transactions").
requested_features: The names of the features in `feature_data`. This should be ordered in the same way as the
data in `feature_data`.
table: The FeatureView that `feature_data` was retrieved from.
"""
# Add the feature names to the response.
requested_feature_refs = [
Expand Down Expand Up @@ -1973,7 +1973,7 @@ def _group_feature_refs(
List[Tuple[RequestFeatureView, List[str]]],
Set[str],
]:
""" Get list of feature views and corresponding feature names based on feature references"""
"""Get list of feature views and corresponding feature names based on feature references"""

# view name to view proto
view_index = {view.projection.name_to_use(): view for view in all_feature_views}
Expand Down Expand Up @@ -2046,7 +2046,7 @@ def _print_materialization_log(


def _validate_feature_views(feature_views: List[BaseFeatureView]):
""" Verify feature views have case-insensitively unique names"""
"""Verify feature views have case-insensitively unique names"""
fv_names = set()
for fv in feature_views:
case_insensitive_fv_name = fv.name.lower()
Expand All @@ -2061,7 +2061,7 @@ def _validate_feature_views(feature_views: List[BaseFeatureView]):


def _validate_data_sources(data_sources: List[DataSource]):
""" Verify data sources have case-insensitively unique names"""
"""Verify data sources have case-insensitively unique names"""
ds_names = set()
for fv in data_sources:
case_insensitive_ds_name = fv.name.lower()
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/offline_stores/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@


class BigQueryOfflineStoreConfig(FeastConfigBaseModel):
""" Offline store config for GCP BigQuery """
"""Offline store config for GCP BigQuery"""

type: Literal["bigquery"] = "bigquery"
""" Offline store type selector"""
Expand Down
28 changes: 14 additions & 14 deletions sdk/python/feast/infra/offline_stores/bigquery_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,20 @@ def __init__(
):
"""Create a BigQuerySource from an existing table or query.
Args:
table (optional): The BigQuery table where features can be found.
table_ref (optional): (Deprecated) The BigQuery table where features can be found.
event_timestamp_column: Event timestamp column used for point in time joins of feature values.
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
or view. Only used for feature columns, not entities or timestamp columns.
date_partition_column (optional): Timestamp column used for partitioning.
query (optional): SQL query to execute to generate data for this data source.
name (optional): Name for the source. Defaults to the table_ref if not specified.
Example:
>>> from feast import BigQuerySource
>>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table")
"""
Args:
table (optional): The BigQuery table where features can be found.
table_ref (optional): (Deprecated) The BigQuery table where features can be found.
event_timestamp_column: Event timestamp column used for point in time joins of feature values.
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
or view. Only used for feature columns, not entities or timestamp columns.
date_partition_column (optional): Timestamp column used for partitioning.
query (optional): SQL query to execute to generate data for this data source.
name (optional): Name for the source. Defaults to the table_ref if not specified.
Example:
>>> from feast import BigQuerySource
>>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table")
"""
if table is None and table_ref is None and query is None:
raise ValueError('No "table" or "query" argument provided.')
if not table and table_ref:
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/offline_stores/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@


class FileOfflineStoreConfig(FeastConfigBaseModel):
""" Offline store config for local (file-based) store """
"""Offline store config for local (file-based) store"""

type: Literal["file"] = "file"
""" Offline store type selector"""
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/infra/offline_stores/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@


class RedshiftOfflineStoreConfig(FeastConfigBaseModel):
""" Offline store config for AWS Redshift """
"""Offline store config for AWS Redshift"""

type: Literal["redshift"] = "redshift"
""" Offline store type selector"""
Expand Down Expand Up @@ -341,7 +341,7 @@ def _to_arrow_internal(self) -> pa.Table:

@log_exceptions_and_usage
def to_s3(self) -> str:
""" Export dataset to S3 in Parquet format and return path """
"""Export dataset to S3 in Parquet format and return path"""
if self.on_demand_feature_views:
transformed_df = self.to_df()
aws_utils.upload_df_to_s3(self._s3_resource, self._s3_path, transformed_df)
Expand All @@ -361,7 +361,7 @@ def to_s3(self) -> str:

@log_exceptions_and_usage
def to_redshift(self, table_name: str) -> None:
""" Save dataset as a new Redshift table """
"""Save dataset as a new Redshift table"""
if self.on_demand_feature_views:
transformed_df = self.to_df()
aws_utils.upload_df_to_redshift(
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/infra/offline_stores/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@


class SnowflakeOfflineStoreConfig(FeastConfigBaseModel):
""" Offline store config for Snowflake """
"""Offline store config for Snowflake"""

type: Literal["snowflake.offline"] = "snowflake.offline"
""" Offline store type selector"""
Expand Down Expand Up @@ -336,7 +336,7 @@ def _to_arrow_internal(self) -> pa.Table:
)

def to_snowflake(self, table_name: str) -> None:
""" Save dataset as a new Snowflake table """
"""Save dataset as a new Snowflake table"""
if self.on_demand_feature_views is not None:
transformed_df = self.to_df()

Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/online_stores/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@


class DatastoreOnlineStoreConfig(FeastConfigBaseModel):
""" Online store config for GCP Datastore """
"""Online store config for GCP Datastore"""

type: Literal["datastore"] = "datastore"
""" Online store type selector"""
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/online_stores/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@


class SqliteOnlineStoreConfig(FeastConfigBaseModel):
""" Online store config for local (SQLite-based) store """
"""Online store config for local (SQLite-based) store"""

type: Literal[
"sqlite", "feast.infra.online_stores.sqlite.SqliteOnlineStore"
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def retrieve_saved_dataset(
Returns:
RetrievalJob object, which is lazy wrapper for actual query performed under the hood.
"""
"""
...

def get_feature_server_endpoint(self) -> Optional[str]:
Expand Down
10 changes: 5 additions & 5 deletions sdk/python/feast/infra/utils/aws_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def execute_redshift_statement(


def get_redshift_statement_result(redshift_data_client, statement_id: str) -> dict:
""" Get the Redshift statement result """
"""Get the Redshift statement result"""
return redshift_data_client.get_statement_result(Id=statement_id)


Expand Down Expand Up @@ -306,7 +306,7 @@ def temporarily_upload_df_to_redshift(


def download_s3_directory(s3_resource, bucket: str, key: str, local_dir: str):
""" Download the S3 directory to a local disk """
"""Download the S3 directory to a local disk"""
bucket_obj = s3_resource.Bucket(bucket)
if key != "" and not key.endswith("/"):
key = key + "/"
Expand All @@ -318,7 +318,7 @@ def download_s3_directory(s3_resource, bucket: str, key: str, local_dir: str):


def delete_s3_directory(s3_resource, bucket: str, key: str):
""" Delete S3 directory recursively """
"""Delete S3 directory recursively"""
bucket_obj = s3_resource.Bucket(bucket)
if key != "" and not key.endswith("/"):
key = key + "/"
Expand Down Expand Up @@ -365,7 +365,7 @@ def unload_redshift_query_to_pa(
iam_role: str,
query: str,
) -> pa.Table:
""" Unload Redshift Query results to S3 and get the results in PyArrow Table format """
"""Unload Redshift Query results to S3 and get the results in PyArrow Table format"""
bucket, key = get_bucket_and_key(s3_path)

execute_redshift_query_and_unload_to_s3(
Expand All @@ -388,7 +388,7 @@ def unload_redshift_query_to_df(
iam_role: str,
query: str,
) -> pd.DataFrame:
""" Unload Redshift Query results to S3 and get the results in Pandas DataFrame format """
"""Unload Redshift Query results to S3 and get the results in Pandas DataFrame format"""
table = unload_redshift_query_to_pa(
redshift_data_client,
cluster_id,
Expand Down
8 changes: 4 additions & 4 deletions sdk/python/feast/repo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,23 @@


class FeastBaseModel(BaseModel):
""" Feast Pydantic Configuration Class """
"""Feast Pydantic Configuration Class"""

class Config:
arbitrary_types_allowed = True
extra = "allow"


class FeastConfigBaseModel(BaseModel):
""" Feast Pydantic Configuration Class """
"""Feast Pydantic Configuration Class"""

class Config:
arbitrary_types_allowed = True
extra = "forbid"


class RegistryConfig(FeastBaseModel):
""" Metadata Store Configuration. Configuration that relates to reading from and writing to the Feast registry."""
"""Metadata Store Configuration. Configuration that relates to reading from and writing to the Feast registry."""

registry_store_type: Optional[StrictStr]
""" str: Provider name or a class name that implements RegistryStore. """
Expand All @@ -89,7 +89,7 @@ class RegistryConfig(FeastBaseModel):


class RepoConfig(FeastBaseModel):
""" Repo config. Typically loaded from `feature_store.yaml` """
"""Repo config. Typically loaded from `feature_store.yaml`"""

registry: Union[StrictStr, RegistryConfig] = "data/registry.db"
""" str: Path to metadata store. Can be a local path, or remote object storage path, e.g. a GCS URI """
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_repo_files(repo_root: Path) -> List[Path]:


def parse_repo(repo_root: Path) -> RepoContents:
""" Collect feature table definitions from feature repo """
"""Collect feature table definitions from feature repo"""
res = RepoContents(
data_sources=set(),
entities=set(),
Expand Down Expand Up @@ -264,7 +264,7 @@ def teardown(repo_config: RepoConfig, repo_path: Path):

@log_exceptions_and_usage
def registry_dump(repo_config: RepoConfig, repo_path: Path):
""" For debugging only: output contents of the metadata registry """
"""For debugging only: output contents of the metadata registry"""
registry_config = repo_config.get_registry_config()
project = repo_config.project
registry = Registry(registry_config=registry_config, repo_path=repo_path)
Expand Down
8 changes: 4 additions & 4 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,10 @@ def pa_to_redshift_value_type(pa_type: pyarrow.DataType) -> str:

def _non_empty_value(value: Any) -> bool:
"""
Check that there's enough data we can use for type inference.
If primitive type - just checking that it's not None
If iterable - checking that there's some elements (len > 0)
String is special case: "" - empty string is considered non empty
Check that there's enough data we can use for type inference.
If primitive type - just checking that it's not None
If iterable - checking that there's some elements (len > 0)
String is special case: "" - empty string is considered non empty
"""
return value is not None and (
not isinstance(value, Sized) or len(value) > 0 or isinstance(value, str)
Expand Down
Loading

0 comments on commit 081bc63

Please sign in to comment.