diff --git a/docs/getting-started/concepts/feature-view.md b/docs/getting-started/concepts/feature-view.md index 3f7f59547a..d94b1359cd 100644 --- a/docs/getting-started/concepts/feature-view.md +++ b/docs/getting-started/concepts/feature-view.md @@ -7,12 +7,14 @@ A feature view is an object that represents a logical group of time-series featu {% tabs %} {% tab title="driver_trips_feature_view.py" %} ```python +from feast import BigQuerySource, FeatureView, Field, Float32, Int64 + driver_stats_fv = FeatureView( name="driver_activity", entities=["driver"], - features=[ - Feature(name="trips_today", dtype=ValueType.INT64), - Feature(name="rating", dtype=ValueType.FLOAT), + schema=[ + Field(name="trips_today", dtype=Int64), + Field(name="rating", dtype=Float32), ], batch_source=BigQuerySource( table_ref="feast-oss.demo_data.driver_activity" @@ -39,11 +41,13 @@ If a feature view contains features that are not related to a specific entity, t {% tabs %} {% tab title="global_stats.py" %} ```python +from feast import BigQuerySource, FeatureView, Field, Int64 + global_stats_fv = FeatureView( name="global_stats", entities=[], - features=[ - Feature(name="total_trips_today_by_all_drivers", dtype=ValueType.INT64), + schema=[ + Field(name="total_trips_today_by_all_drivers", dtype=Int64), ], batch_source=BigQuerySource( table_ref="feast-oss.demo_data.global_stats" @@ -70,13 +74,15 @@ It is suggested that you dynamically specify the new FeatureView name using `.wi {% tabs %} {% tab title="location_stats_feature_view.py" %} ```python +from feast import BigQuerySource, Entity, FeatureView, Field, Int32, ValueType + location = Entity(name="location", join_key="location_id", value_type=ValueType.INT64) location_stats_fv= FeatureView( name="location_stats", entities=["location"], - features=[ - Feature(name="temperature", dtype=ValueType.INT32) + schema=[ + Field(name="temperature", dtype=Int32) ], batch_source=BigQuerySource( table_ref="feast-oss.demo_data.location_stats" @@ -115,9 +121,11 @@ A feature is an individual measurable property. It is typically a property obser Features are defined as part of feature views. Since Feast does not transform data, a feature is essentially a schema that only contains a name and a type: ```python -trips_today = Feature( +from feast import Field, Float32 + +trips_today = Field( name="trips_today", - dtype=ValueType.FLOAT + dtype=Float32 ) ``` diff --git a/docs/getting-started/concepts/point-in-time-joins.md b/docs/getting-started/concepts/point-in-time-joins.md index 3e876ba01f..163128437d 100644 --- a/docs/getting-started/concepts/point-in-time-joins.md +++ b/docs/getting-started/concepts/point-in-time-joins.md @@ -7,12 +7,14 @@ Feature values in Feast are modeled as time-series records. Below is an example The above table can be registered with Feast through the following feature view: ```python +from feast import FeatureView, Field, FileSource, Float32, Int64 + driver_stats_fv = FeatureView( name="driver_hourly_stats", entities=["driver"], - features=[ - Feature(name="trips_today", dtype=ValueType.INT64), - Feature(name="earnings_today", dtype=ValueType.FLOAT), + schema=[ + Field(name="trips_today", dtype=Int64), + Field(name="earnings_today", dtype=Float32), ], ttl=timedelta(hours=2), batch_source=FileSource( diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index ed70f75712..f086d10a3d 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -80,16 +80,16 @@ online_store: ```python # This is an example feature definition file -from google.protobuf.duration_pb2 import Duration +from datetime import timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType +from feast import Entity, FeatureView, Field, FileSource, Float32, Int64, ValueType # Read data from parquet files. Parquet is convenient for local development mode. For # production, you can use your favorite DWH, such as BigQuery. See Feast documentation # for more info. driver_hourly_stats = FileSource( path="/content/feature_repo/data/driver_stats.parquet", - event_timestamp_column="event_timestamp", + timestamp_field="event_timestamp", created_timestamp_column="created", ) @@ -106,10 +106,10 @@ driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver"], # reference entity by name ttl=Duration(seconds=86400 * 1), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, @@ -149,16 +149,16 @@ feast apply ```python # This is an example feature definition file -from google.protobuf.duration_pb2 import Duration +from datetime import timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType +from feast import Entity, FeatureView, Field, FileSource, Float32, Int64, ValueType # Read data from parquet files. Parquet is convenient for local development mode. For # production, you can use your favorite DWH, such as BigQuery. See Feast documentation # for more info. driver_hourly_stats = FileSource( path="/content/feature_repo/data/driver_stats.parquet", - event_timestamp_column="event_timestamp", + timestamp_field="event_timestamp", created_timestamp_column="created", ) @@ -175,10 +175,10 @@ driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", entities=["driver"], # reference entity by name ttl=Duration(seconds=86400 * 1), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, diff --git a/docs/reference/data-sources/push.md b/docs/reference/data-sources/push.md index ec43518b84..df2858a3bc 100644 --- a/docs/reference/data-sources/push.md +++ b/docs/reference/data-sources/push.md @@ -14,7 +14,7 @@ When using a PushSource as a stream source in the definition of a feature view, ### Defining a push source ```python -from feast import PushSource, ValueType, BigQuerySource, FeatureView, Feature +from feast import PushSource, ValueType, BigQuerySource, FeatureView, Feature, Field, Int64 push_source = PushSource( name="push_source", @@ -25,7 +25,7 @@ push_source = PushSource( fv = FeatureView( name="feature view", entities=["user_id"], - features=[Feature(name="life_time_value", dtype=ValueType.INT64)], + schema=[Field(name="life_time_value", dtype=Int64)], stream_source=push_source, ) ``` diff --git a/docs/reference/feature-repository.md b/docs/reference/feature-repository.md index f6f880d56f..703e970c9f 100644 --- a/docs/reference/feature-repository.md +++ b/docs/reference/feature-repository.md @@ -89,7 +89,7 @@ A feature repository can also contain one or more Python files that contain feat ```python from datetime import timedelta -from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType +from feast import BigQuerySource, Entity, Feature, FeatureView, Field, Float32, String, ValueType driver_locations_source = BigQuerySource( table_ref="rh_prod.ride_hailing_co.drivers", @@ -107,9 +107,9 @@ driver_locations = FeatureView( name="driver_locations", entities=["driver"], ttl=timedelta(days=1), - features=[ - Feature(name="lat", dtype=ValueType.FLOAT), - Feature(name="lon", dtype=ValueType.STRING), + schema=[ + Field(name="lat", dtype=Float32), + Field(name="lon", dtype=String), ], batch_source=driver_locations_source, ) diff --git a/docs/reference/feature-repository/README.md b/docs/reference/feature-repository/README.md index aba6ffb408..c4d02d7b64 100644 --- a/docs/reference/feature-repository/README.md +++ b/docs/reference/feature-repository/README.md @@ -94,7 +94,7 @@ A feature repository can also contain one or more Python files that contain feat ```python from datetime import timedelta -from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType +from feast import BigQuerySource, Entity, Feature, FeatureView, Field, Float32, String, ValueType driver_locations_source = BigQuerySource( table_ref="rh_prod.ride_hailing_co.drivers", @@ -112,9 +112,9 @@ driver_locations = FeatureView( name="driver_locations", entities=["driver"], ttl=timedelta(days=1), - features=[ - Feature(name="lat", dtype=ValueType.FLOAT), - Feature(name="lon", dtype=ValueType.STRING), + schema=[ + Field(name="lat", dtype=Float32), + Field(name="lon", dtype=String), ], batch_source=driver_locations_source, ) diff --git a/docs/tutorials/validating-historical-features.md b/docs/tutorials/validating-historical-features.md index e9124b08ed..79d16a74b7 100644 --- a/docs/tutorials/validating-historical-features.md +++ b/docs/tutorials/validating-historical-features.md @@ -107,7 +107,7 @@ pyarrow.parquet.write_table(entities_2019_table, "entities.parquet") import pyarrow.parquet import pandas as pd -from feast import Feature, FeatureView, Entity, FeatureStore +from feast import Feature, FeatureView, Entity, FeatureStore, Field, Float64, Int64 from feast.value_type import ValueType from feast.data_format import ParquetFormat from feast.on_demand_feature_view import on_demand_feature_view @@ -137,10 +137,10 @@ trips_stats_fv = FeatureView( name='trip_stats', entities=['taxi'], features=[ - Feature("total_miles_travelled", ValueType.DOUBLE), - Feature("total_trip_seconds", ValueType.DOUBLE), - Feature("total_earned", ValueType.DOUBLE), - Feature("trip_count", ValueType.INT64), + Field(name="total_miles_travelled", dtype=Float64), + Field(name="total_trip_seconds", dtype=Float64), + Field(name="total_earned", dtype=Float64), + Field(name="trip_count", dtype=Int64), ], ttl=Duration(seconds=86400), diff --git a/protos/feast/core/FeatureView.proto b/protos/feast/core/FeatureView.proto index a4fca76df9..2662350540 100644 --- a/protos/feast/core/FeatureView.proto +++ b/protos/feast/core/FeatureView.proto @@ -48,7 +48,7 @@ message FeatureViewSpec { // Feature View. Not updatable. repeated string entities = 3; - // List of features specifications for each feature defined with this feature view. + // List of specifications for each field defined as part of this feature view. repeated FeatureSpecV2 features = 4; // Description of the feature view. diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index 144b87b042..6e3a3955af 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -13,9 +13,22 @@ from .feature_service import FeatureService from .feature_store import FeatureStore from .feature_view import FeatureView +from .field import Field from .on_demand_feature_view import OnDemandFeatureView from .repo_config import RepoConfig from .request_feature_view import RequestFeatureView +from .types import ( + Array, + Bool, + Bytes, + Float32, + Float64, + Int32, + Int64, + Invalid, + String, + UnixTimestamp, +) from .value_type import ValueType logging.basicConfig( @@ -35,6 +48,7 @@ "KafkaSource", "KinesisSource", "Feature", + "Field", "FeatureService", "FeatureStore", "FeatureView", @@ -48,4 +62,15 @@ "RequestFeatureView", "SnowflakeSource", "PushSource", + # Types + "Array", + "Invalid", + "Bytes", + "String", + "Bool", + "Int32", + "Int64", + "Float32", + "Float64", + "UnixTimestamp", ] diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 5078d021fb..67435fa44c 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -18,8 +18,8 @@ from google.protobuf.json_format import MessageToJson from proto import Message -from feast.feature import Feature from feast.feature_view_projection import FeatureViewProjection +from feast.field import Field class BaseFeatureView(ABC): @@ -41,7 +41,7 @@ class BaseFeatureView(ABC): """ name: str - features: List[Feature] + features: List[Field] description: str tags: Dict[str, str] owner: str @@ -53,8 +53,8 @@ class BaseFeatureView(ABC): def __init__( self, *, - name: Optional[str] = None, - features: Optional[List[Feature]] = None, + name: str, + features: Optional[List[Field]] = None, description: str = "", tags: Optional[Dict[str, str]] = None, owner: str = "", @@ -64,7 +64,7 @@ def __init__( Args: name: The unique name of the base feature view. - features: The list of features defined as part of this base feature view. + features (optional): The list of features defined as part of this base feature view. description (optional): A human-readable description. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the base feature view, typically the email of the @@ -73,12 +73,9 @@ def __init__( Raises: ValueError: A field mapping conflicts with an Entity or a Feature. """ - if not name: - raise ValueError("Name needs to be provided") + assert name is not None self.name = name - self.features = features or [] - self.description = description self.tags = tags or {} self.owner = owner diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 33fe3fe49b..1a885443b9 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -24,6 +24,7 @@ from feast.entity import Entity from feast.feature import Feature from feast.feature_view_projection import FeatureViewProjection +from feast.field import Field from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto from feast.protos.feast.core.FeatureView_pb2 import ( FeatureViewMeta as FeatureViewMetaProto, @@ -58,12 +59,15 @@ class FeatureView(BaseFeatureView): ttl: The amount of time this group of features lives. A ttl of 0 indicates that this group of features lives forever. Note that large ttl's or a ttl of 0 can result in extremely computationally intensive queries. - batch_source (optional): The batch source of data where this group of features is stored. - This is optional ONLY a push source is specified as the stream_source, since push sources - contain their own batch sources. + batch_source (optional): The batch source of data where this group of features + is stored. This is optional ONLY if a push source is specified as the + stream_source, since push sources contain their own batch sources. stream_source (optional): The stream source of data where this group of features is stored. - features: The list of features defined as part of this feature view. + schema: The schema of the feature view, including feature, timestamp, and entity + columns. + features: The list of features defined as part of this feature view. Each + feature should also be included in the schema. online: A boolean indicating whether online retrieval is enabled for this feature view. description: A human-readable description. @@ -77,7 +81,8 @@ class FeatureView(BaseFeatureView): ttl: Optional[timedelta] batch_source: DataSource stream_source: Optional[DataSource] - features: List[Feature] + schema: List[Field] + features: List[Field] online: bool description: str tags: Dict[str, str] @@ -98,6 +103,7 @@ def __init__( online: bool = True, description: str = "", owner: str = "", + schema: Optional[List[Field]] = None, ): """ Creates a FeatureView object. @@ -111,13 +117,15 @@ def __init__( batch_source: The batch source of data where this group of features is stored. stream_source (optional): The stream source of data where this group of features is stored. - features (optional): The list of features defined as part of this feature view. + features (deprecated): The list of features defined as part of this feature view. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. online (optional): A boolean indicating whether online retrieval is enabled for this feature view. description (optional): A human-readable description. owner (optional): The owner of the feature view, typically the email of the primary maintainer. + schema (optional): The schema of the feature view, including feature, timestamp, + and entity columns. Raises: ValueError: A field mapping conflicts with an Entity or a Feature. @@ -170,7 +178,26 @@ def __init__( else: raise ValueError(f"unknown value type specified for ttl {type(_ttl)}") - _features = features or [] + if features is not None: + warnings.warn( + ( + "The `features` parameter is being deprecated in favor of the `schema` parameter. " + "Please switch from using `features` to `schema`. This will also requiring switching " + "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not " + "support the `features` parameter." + ), + DeprecationWarning, + ) + + _schema = schema or [] + if len(_schema) == 0 and features is not None: + _schema = [Field.from_feature(feature) for feature in features] + self.schema = _schema + + # TODO(felixwang9817): Infer which fields in the schema are features, timestamps, + # and entities. For right now we assume that all fields are features, since the + # current `features` parameter only accepts feature columns. + _features = _schema if stream_source is not None and isinstance(stream_source, PushSource): if stream_source.batch_source is None or not isinstance( @@ -187,7 +214,9 @@ def __init__( ) self.batch_source = batch_source - cols = [entity for entity in self.entities] + [feat.name for feat in _features] + cols = [entity for entity in self.entities] + [ + field.name for field in _features + ] for col in cols: if ( self.batch_source.field_mapping is not None @@ -200,7 +229,7 @@ def __init__( ) super().__init__( - name=name, + name=_name, features=_features, description=description, tags=tags, @@ -221,7 +250,7 @@ def __copy__(self): ttl=self.ttl, batch_source=self.batch_source, stream_source=self.stream_source, - features=self.features, + schema=self.schema, tags=self.tags, online=self.online, ) @@ -338,7 +367,7 @@ def to_proto(self) -> FeatureViewProto: spec = FeatureViewSpecProto( name=self.name, entities=self.entities, - features=[feature.to_proto() for feature in self.features], + features=[field.to_proto() for field in self.schema], description=self.description, tags=self.tags, owner=self.owner, @@ -370,13 +399,9 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): feature_view = cls( name=feature_view_proto.spec.name, entities=[entity for entity in feature_view_proto.spec.entities], - features=[ - Feature( - name=feature.name, - dtype=ValueType(feature.value_type), - labels=dict(feature.labels), - ) - for feature in feature_view_proto.spec.features + schema=[ + Field.from_proto(field_proto) + for field_proto in feature_view_proto.spec.features ], description=feature_view_proto.spec.description, tags=dict(feature_view_proto.spec.tags), diff --git a/sdk/python/feast/feature_view_projection.py b/sdk/python/feast/feature_view_projection.py index 04d923122c..a8e0e8cfe5 100644 --- a/sdk/python/feast/feature_view_projection.py +++ b/sdk/python/feast/feature_view_projection.py @@ -1,12 +1,15 @@ -from typing import Dict, List, Optional +from typing import TYPE_CHECKING, Dict, List, Optional from attr import dataclass -from feast.feature import Feature +from feast.field import Field from feast.protos.feast.core.FeatureViewProjection_pb2 import ( FeatureViewProjection as FeatureViewProjectionProto, ) +if TYPE_CHECKING: + from feast.base_feature_view import BaseFeatureView + @dataclass class FeatureViewProjection: @@ -24,7 +27,7 @@ class FeatureViewProjection: name: str name_alias: Optional[str] - features: List[Feature] + features: List[Field] join_key_map: Dict[str, str] = {} def name_to_use(self): @@ -50,14 +53,14 @@ def from_proto(proto: FeatureViewProjectionProto): join_key_map=dict(proto.join_key_map), ) for feature_column in proto.feature_columns: - feature_view_projection.features.append(Feature.from_proto(feature_column)) + feature_view_projection.features.append(Field.from_proto(feature_column)) return feature_view_projection @staticmethod - def from_definition(feature_grouping): + def from_definition(base_feature_view: "BaseFeatureView"): return FeatureViewProjection( - name=feature_grouping.name, + name=base_feature_view.name, name_alias=None, - features=feature_grouping.features, + features=base_feature_view.features, ) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index a2726f9a70..9d15a6a25f 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -1,17 +1,11 @@ import re from typing import List -from feast import ( - BigQuerySource, - Entity, - Feature, - FileSource, - RedshiftSource, - SnowflakeSource, -) +from feast import BigQuerySource, Entity, FileSource, RedshiftSource, SnowflakeSource from feast.data_source import DataSource, RequestSource from feast.errors import RegistryInferenceFailure from feast.feature_view import FeatureView +from feast.field import Field, from_value_type from feast.repo_config import RepoConfig from feast.value_type import ValueType @@ -193,14 +187,18 @@ def update_feature_views_with_inferred_features( if col_name in fv.batch_source.field_mapping else col_name ) - fv.features.append( - Feature( - feature_name, + field = Field( + name=feature_name, + dtype=from_value_type( fv.batch_source.source_datatype_to_feast_value_type()( col_datatype - ), - ) + ) + ), ) + # Note that schema and features are two different attributes of a + # FeatureView, and that features should be present in both. + fv.schema.append(field) + fv.features.append(field) if not fv.features: raise RegistryInferenceFailure( diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index b1d4ea39f4..b379193ba3 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -352,9 +352,9 @@ def _convert_arrow_to_proto( if isinstance(table, pyarrow.Table): table = table.to_batches()[0] - columns = [(f.name, f.dtype) for f in feature_view.features] + list( - join_keys.items() - ) + columns = [ + (field.name, field.dtype.to_value_type()) for field in feature_view.schema + ] + list(join_keys.items()) proto_values_by_column = { column: python_values_to_proto_values( diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index 9ea1c055de..33c4c25508 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -13,6 +13,7 @@ from feast.feature import Feature from feast.feature_view import FeatureView from feast.feature_view_projection import FeatureViewProjection +from feast.field import Field, from_value_type from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( OnDemandFeatureView as OnDemandFeatureViewProto, ) @@ -57,7 +58,7 @@ class OnDemandFeatureView(BaseFeatureView): # TODO(adchia): remove inputs from proto and declaration name: str - features: List[Feature] + features: List[Field] source_feature_view_projections: Dict[str, FeatureViewProjection] source_request_sources: Dict[str, RequestSource] udf: MethodType @@ -68,8 +69,9 @@ class OnDemandFeatureView(BaseFeatureView): @log_exceptions def __init__( self, - name: str, - features: List[Feature], + *args, + name: Optional[str] = None, + features: Optional[List[Feature]] = None, sources: Optional[ Dict[str, Union[FeatureView, FeatureViewProjection, RequestSource]] ] = None, @@ -77,6 +79,7 @@ def __init__( inputs: Optional[ Dict[str, Union[FeatureView, FeatureViewProjection, RequestSource]] ] = None, + schema: Optional[List[Field]] = None, description: str = "", tags: Optional[Dict[str, str]] = None, owner: str = "", @@ -86,8 +89,8 @@ def __init__( Args: name: The unique name of the on demand feature view. - features: The list of features in the output of the on demand feature view, after - the transformation has been applied. + features (deprecated): The list of features in the output of the on demand + feature view, after the transformation has been applied. sources (optional): A map from input source names to the actual input sources, which may be feature views, feature view projections, or request data sources. These sources serve as inputs to the udf, which will refer to them by name. @@ -96,18 +99,32 @@ def __init__( inputs (optional): A map from input source names to the actual input sources, which may be feature views, feature view projections, or request data sources. These sources serve as inputs to the udf, which will refer to them by name. + schema (optional): The list of features in the output of the on demand feature + view, after the transformation has been applied. description (optional): A human-readable description. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the on demand feature view, typically the email of the primary maintainer. """ - super().__init__( - name=name, - features=features, - description=description, - tags=tags, - owner=owner, - ) + positional_attributes = ["name", "features", "sources", "udf"] + + _name = name + + _schema = schema or [] + if len(_schema) == 0 and features is not None: + _schema = [Field.from_feature(feature) for feature in features] + if features is not None: + warnings.warn( + ( + "The `features` parameter is being deprecated in favor of the `schema` parameter. " + "Please switch from using `features` to `schema`. This will also requiring switching " + "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not " + "support the `features` parameter." + ), + DeprecationWarning, + ) + + _sources = sources or inputs if inputs and sources: raise ValueError("At most one of `sources` or `inputs` can be specified.") elif inputs: @@ -118,14 +135,71 @@ def __init__( ), DeprecationWarning, ) - sources = inputs - elif not inputs and not sources: - raise ValueError("At least one of `inputs` or `sources` must be specified.") - assert sources is not None + _udf = udf + + if args: + warnings.warn( + ( + "On demand feature view parameters should be specified as keyword arguments " + "instead of positional arguments. Feast 0.23 and onwards will not support " + "positional arguments in on demand feature view definitions." + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args " + f"when defining feature views, for backwards compatibility." + ) + if len(args) >= 1: + _name = args[0] + if len(args) >= 2: + _schema = args[1] + # Convert Features to Fields. + if len(_schema) > 0 and isinstance(_schema[0], Feature): + _schema = [Field.from_feature(feature) for feature in _schema] + warnings.warn( + ( + "The `features` parameter is being deprecated in favor of the `schema` parameter. " + "Please switch from using `features` to `schema`. This will also requiring switching " + "feature definitions from using `Feature` to `Field`. Feast 0.21 and onwards will not " + "support the `features` parameter." + ), + DeprecationWarning, + ) + if len(args) >= 3: + _sources = args[2] + warnings.warn( + ( + "The `inputs` parameter is being deprecated. Please use `sources` instead. " + "Feast 0.21 and onwards will not support the `inputs` parameter." + ), + DeprecationWarning, + ) + if len(args) >= 4: + _udf = args[3] + + if not _name: + raise ValueError( + "The name of the on demand feature view must be specified." + ) + + if not _sources: + raise ValueError("The `sources` parameter must be specified.") + + super().__init__( + name=_name, + features=_schema, + description=description, + tags=tags, + owner=owner, + ) + + assert _sources is not None self.source_feature_view_projections: Dict[str, FeatureViewProjection] = {} self.source_request_sources: Dict[str, RequestSource] = {} - for source_name, odfv_source in sources.items(): + for source_name, odfv_source in _sources.items(): if isinstance(odfv_source, RequestSource): self.source_request_sources[source_name] = odfv_source elif isinstance(odfv_source, FeatureViewProjection): @@ -135,10 +209,10 @@ def __init__( source_name ] = odfv_source.projection - if udf is None: + if _udf is None: raise ValueError("The `udf` parameter must be specified.") - assert udf - self.udf = udf + assert _udf + self.udf = _udf @property def proto_class(self) -> Type[OnDemandFeatureViewProto]: @@ -147,7 +221,7 @@ def proto_class(self) -> Type[OnDemandFeatureViewProto]: def __copy__(self): fv = OnDemandFeatureView( name=self.name, - features=self.features, + schema=self.features, sources=dict( **self.source_feature_view_projections, **self.source_request_sources, ), @@ -242,11 +316,10 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): ) on_demand_feature_view_obj = cls( name=on_demand_feature_view_proto.spec.name, - features=[ - Feature( + schema=[ + Field( name=feature.name, - dtype=ValueType(feature.value_type), - labels=dict(feature.labels), + dtype=from_value_type(ValueType(feature.value_type)), ) for feature in on_demand_feature_view_proto.spec.features ], @@ -330,7 +403,7 @@ def infer_features(self): df = pd.DataFrame() for feature_view_projection in self.source_feature_view_projections.values(): for feature in feature_view_projection.features: - dtype = feast_value_type_to_pandas_type(feature.dtype) + dtype = feast_value_type_to_pandas_type(feature.dtype.to_value_type()) df[f"{feature_view_projection.name}__{feature.name}"] = pd.Series( dtype=dtype ) @@ -343,8 +416,11 @@ def infer_features(self): inferred_features = [] for f, dt in zip(output_df.columns, output_df.dtypes): inferred_features.append( - Feature( - name=f, dtype=python_type_to_feast_value_type(f, type_name=str(dt)) + Field( + name=f, + dtype=from_value_type( + python_type_to_feast_value_type(f, type_name=str(dt)) + ), ) ) @@ -380,6 +456,8 @@ def get_requested_odfvs(feature_refs, project, registry): return requested_on_demand_feature_views +# TODO(felixwang9817): Force this decorator to accept kwargs and switch from +# `features` to `schema`. def on_demand_feature_view( features: List[Feature], sources: Dict[str, Union[FeatureView, RequestSource]] ): diff --git a/sdk/python/feast/request_feature_view.py b/sdk/python/feast/request_feature_view.py index 52b1fa56a9..863a4b4964 100644 --- a/sdk/python/feast/request_feature_view.py +++ b/sdk/python/feast/request_feature_view.py @@ -4,8 +4,8 @@ from feast.base_feature_view import BaseFeatureView from feast.data_source import RequestSource -from feast.feature import Feature from feast.feature_view_projection import FeatureViewProjection +from feast.field import Field, from_value_type from feast.protos.feast.core.RequestFeatureView_pb2 import ( RequestFeatureView as RequestFeatureViewProto, ) @@ -31,7 +31,7 @@ class RequestFeatureView(BaseFeatureView): name: str request_source: RequestSource - features: List[Feature] + features: List[Field] description: str tags: Dict[str, str] owner: str @@ -66,8 +66,8 @@ def __init__( super().__init__( name=name, features=[ - Feature(name=name, dtype=dtype) - for name, dtype in request_data_source.schema.items() + Field(name=name, dtype=from_value_type(value_type)) + for name, value_type in request_data_source.schema.items() ], description=description, tags=tags, diff --git a/sdk/python/feast/templates/aws/driver_repo.py b/sdk/python/feast/templates/aws/driver_repo.py index 8c2f884490..f77afbd60f 100644 --- a/sdk/python/feast/templates/aws/driver_repo.py +++ b/sdk/python/feast/templates/aws/driver_repo.py @@ -1,6 +1,6 @@ from datetime import timedelta -from feast import Entity, Feature, FeatureView, RedshiftSource, ValueType +from feast import Entity, FeatureView, Field, Float32, Int64, RedshiftSource, ValueType # Define an entity for the driver. Entities can be thought of as primary keys used to # retrieve features. Entities are also used to join multiple tables/views during the @@ -51,10 +51,10 @@ # The list of features defined below act as a schema to both define features # for both materialization of features into a store, and are used as references # during retrieval for building a training dataset or serving features - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], # Batch sources are used to find feature values. In the case of this feature # view we will query a source table on Redshift for driver statistics diff --git a/sdk/python/feast/templates/gcp/driver_repo.py b/sdk/python/feast/templates/gcp/driver_repo.py index 7ad7586020..25ba6edccd 100644 --- a/sdk/python/feast/templates/gcp/driver_repo.py +++ b/sdk/python/feast/templates/gcp/driver_repo.py @@ -1,6 +1,6 @@ from datetime import timedelta -from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType +from feast import BigQuerySource, Entity, FeatureView, Field, Float32, Int64, ValueType # Define an entity for the driver. Entities can be thought of as primary keys used to # retrieve features. Entities are also used to join multiple tables/views during the @@ -49,10 +49,10 @@ # The list of features defined below act as a schema to both define features # for both materialization of features into a store, and are used as references # during retrieval for building a training dataset or serving features - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], # Batch sources are used to find feature values. In the case of this feature # view we will query a source table on BigQuery for driver statistics diff --git a/sdk/python/feast/templates/local/example.py b/sdk/python/feast/templates/local/example.py index 4ce2533325..859a0dd20e 100644 --- a/sdk/python/feast/templates/local/example.py +++ b/sdk/python/feast/templates/local/example.py @@ -2,7 +2,7 @@ from datetime import timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType +from feast import Entity, FeatureView, Field, FileSource, Float32, Int64, ValueType # Read data from parquet files. Parquet is convenient for local development mode. For # production, you can use your favorite DWH, such as BigQuery. See Feast documentation @@ -24,10 +24,10 @@ name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(days=1), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, diff --git a/sdk/python/feast/templates/snowflake/driver_repo.py b/sdk/python/feast/templates/snowflake/driver_repo.py index b05080038c..e656f90226 100644 --- a/sdk/python/feast/templates/snowflake/driver_repo.py +++ b/sdk/python/feast/templates/snowflake/driver_repo.py @@ -2,7 +2,7 @@ import yaml -from feast import Entity, Feature, FeatureView, SnowflakeSource, ValueType +from feast import Entity, FeatureView, Field, Float32, Int64, SnowflakeSource # Define an entity for the driver. Entities can be thought of as primary keys used to # retrieve features. Entities are also used to join multiple tables/views during the @@ -53,10 +53,10 @@ # The list of features defined below act as a schema to both define features # for both materialization of features into a store, and are used as references # during retrieval for building a training dataset or serving features - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], # Batch sources are used to find feature values. In the case of this feature # view we will query a source table on Redshift for driver statistics diff --git a/sdk/python/feast/templates/spark/example.py b/sdk/python/feast/templates/spark/example.py index ce565923d8..24c0485289 100644 --- a/sdk/python/feast/templates/spark/example.py +++ b/sdk/python/feast/templates/spark/example.py @@ -5,7 +5,7 @@ from datetime import timedelta from pathlib import Path -from feast import Entity, Feature, FeatureView, ValueType +from feast import Entity, FeatureView, Field, Float32, Int64, ValueType from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( SparkSource, ) @@ -41,10 +41,10 @@ name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(days=7), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, @@ -54,10 +54,10 @@ name="customer_daily_profile", entities=["customer_id"], ttl=timedelta(days=7), - features=[ - Feature(name="current_balance", dtype=ValueType.FLOAT), - Feature(name="avg_passenger_count", dtype=ValueType.FLOAT), - Feature(name="lifetime_trip_count", dtype=ValueType.INT64), + schema=[ + Field(name="current_balance", dtype=Float32), + Field(name="avg_passenger_count", dtype=Float32), + Field(name="lifetime_trip_count", dtype=Int64), ], online=True, batch_source=customer_daily_profile, diff --git a/sdk/python/tests/doctest/test_all.py b/sdk/python/tests/doctest/test_all.py index 7d29c406e9..e7d855c4b3 100644 --- a/sdk/python/tests/doctest/test_all.py +++ b/sdk/python/tests/doctest/test_all.py @@ -11,7 +11,16 @@ def setup_feature_store(): """Prepares the local environment for a FeatureStore docstring test.""" from datetime import datetime, timedelta - from feast import Entity, Feature, FeatureStore, FeatureView, FileSource, ValueType + from feast import ( + Entity, + FeatureStore, + FeatureView, + Field, + FileSource, + Float32, + Int64, + ValueType, + ) from feast.repo_operations import init_repo init_repo("feature_repo", "local") @@ -28,10 +37,10 @@ def setup_feature_store(): name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(seconds=86400 * 1), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], batch_source=driver_hourly_stats, ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py index 0ceeb605bc..2237ad62cf 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_1.py +++ b/sdk/python/tests/example_repos/example_feature_repo_1.py @@ -3,10 +3,13 @@ from feast import ( BigQuerySource, Entity, - Feature, FeatureService, FeatureView, + Field, + Float32, + Int64, PushSource, + String, ValueType, ) @@ -66,10 +69,7 @@ name="driver_locations", entities=["driver"], ttl=timedelta(days=1), - features=[ - Feature(name="lat", dtype=ValueType.FLOAT), - Feature(name="lon", dtype=ValueType.STRING), - ], + schema=[Field(name="lat", dtype=Float32), Field(name="lon", dtype=String)], online=True, batch_source=driver_locations_source, tags={}, @@ -79,9 +79,9 @@ name="pushed_driver_locations", entities=["driver"], ttl=timedelta(days=1), - features=[ - Feature(name="driver_lat", dtype=ValueType.FLOAT), - Feature(name="driver_long", dtype=ValueType.STRING), + schema=[ + Field(name="driver_lat", dtype=Float32), + Field(name="driver_long", dtype=String), ], online=True, stream_source=driver_locations_push_source, @@ -92,10 +92,10 @@ name="customer_profile", entities=["customer"], ttl=timedelta(days=1), - features=[ - Feature(name="avg_orders_day", dtype=ValueType.FLOAT), - Feature(name="name", dtype=ValueType.STRING), - Feature(name="age", dtype=ValueType.INT64), + schema=[ + Field(name="avg_orders_day", dtype=Float32), + Field(name="name", dtype=String), + Field(name="age", dtype=Int64), ], online=True, batch_source=customer_profile_source, @@ -106,7 +106,7 @@ name="customer_driver_combined", entities=["customer", "driver"], ttl=timedelta(days=1), - features=[Feature(name="trips", dtype=ValueType.INT64)], + schema=[Field(name="trips", dtype=Int64)], online=True, batch_source=customer_driver_combined_source, tags={}, diff --git a/sdk/python/tests/example_repos/example_feature_repo_2.py b/sdk/python/tests/example_repos/example_feature_repo_2.py index fe95291bda..0dc63c3f2c 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_2.py +++ b/sdk/python/tests/example_repos/example_feature_repo_2.py @@ -1,6 +1,15 @@ from datetime import timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType +from feast import ( + Entity, + FeatureView, + Field, + FileSource, + Float32, + Int32, + Int64, + ValueType, +) driver_hourly_stats = FileSource( path="%PARQUET_PATH%", # placeholder to be replaced by the test @@ -15,10 +24,10 @@ name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(days=1), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, @@ -37,9 +46,9 @@ name="global_daily_stats", entities=[], ttl=timedelta(days=1), - features=[ - Feature(name="num_rides", dtype=ValueType.INT32), - Feature(name="avg_ride_length", dtype=ValueType.FLOAT), + schema=[ + Field(name="num_rides", dtype=Int32), + Field(name="avg_ride_length", dtype=Float32), ], online=True, batch_source=global_daily_stats, diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py index 110ea163de..032f61fad3 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py @@ -1,6 +1,6 @@ from datetime import timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType +from feast import Entity, FeatureView, Field, FileSource, Float32, Int64, ValueType driver_hourly_stats = FileSource( path="%PARQUET_PATH%", # placeholder to be replaced by the test @@ -22,10 +22,10 @@ name="driver_hourly_stats", entities=["driver_id"], ttl=timedelta(days=1), - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), ], online=True, batch_source=driver_hourly_stats, diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index fb5bd8e455..02d8baddad 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -4,20 +4,32 @@ import numpy as np import pandas as pd -from feast import Feature, FeatureView, OnDemandFeatureView, PushSource, ValueType +from feast import ( + Array, + Feature, + FeatureView, + Field, + Float32, + Float64, + Int32, + OnDemandFeatureView, + PushSource, + ValueType, +) from feast.data_source import DataSource, RequestSource +from feast.types import FeastType def driver_feature_view( data_source: DataSource, name="test_correctness", infer_features: bool = False, - value_type: ValueType = ValueType.FLOAT, + dtype: FeastType = Float32, ) -> FeatureView: return FeatureView( name=name, entities=["driver"], - features=None if infer_features else [Feature("value", value_type)], + schema=None if infer_features else [Field(name="value", dtype=dtype)], ttl=timedelta(days=5), batch_source=data_source, ) @@ -32,7 +44,10 @@ def global_feature_view( return FeatureView( name=name, entities=[], - features=None if infer_features else [Feature("entityless_value", value_type)], + # Test that Features still work for FeatureViews. + features=None + if infer_features + else [Feature(name="entityless_value", dtype=value_type)], ttl=timedelta(days=5), batch_source=data_source, ) @@ -55,16 +70,17 @@ def conv_rate_plus_100_feature_view( infer_features: bool = False, features: Optional[List[Feature]] = None, ) -> OnDemandFeatureView: + # Test that positional arguments and Features still work for ODFVs. _features = features or [ - Feature("conv_rate_plus_100", ValueType.DOUBLE), - Feature("conv_rate_plus_val_to_add", ValueType.DOUBLE), - Feature("conv_rate_plus_100_rounded", ValueType.INT32), + Feature(name="conv_rate_plus_100", dtype=ValueType.DOUBLE), + Feature(name="conv_rate_plus_val_to_add", dtype=ValueType.DOUBLE), + Feature(name="conv_rate_plus_100_rounded", dtype=ValueType.INT32), ] return OnDemandFeatureView( - name=conv_rate_plus_100.__name__, - sources=sources, - features=[] if infer_features else _features, - udf=conv_rate_plus_100, + conv_rate_plus_100.__name__, + [] if infer_features else _features, + sources, + conv_rate_plus_100, ) @@ -90,14 +106,17 @@ def similarity_feature_view( infer_features: bool = False, features: Optional[List[Feature]] = None, ) -> OnDemandFeatureView: - _features = features or [ - Feature("cos_double", ValueType.DOUBLE), - Feature("cos_float", ValueType.FLOAT), + _fields = [ + Field(name="cos_double", dtype=Float64), + Field(name="cos_float", dtype=Float32), ] + if features is not None: + _fields = [Field.from_feature(feature) for feature in features] + return OnDemandFeatureView( name=similarity.__name__, sources=sources, - features=[] if infer_features else _features, + schema=[] if infer_features else _fields, udf=similarity, ) @@ -120,11 +139,11 @@ def create_item_embeddings_feature_view(source, infer_features: bool = False): item_embeddings_feature_view = FeatureView( name="item_embeddings", entities=["item"], - features=None + schema=None if infer_features else [ - Feature(name="embedding_double", dtype=ValueType.DOUBLE_LIST), - Feature(name="embedding_float", dtype=ValueType.FLOAT_LIST), + Field(name="embedding_double", dtype=Array(Float64)), + Field(name="embedding_float", dtype=Array(Float32)), ], batch_source=source, ttl=timedelta(hours=2), @@ -136,12 +155,12 @@ def create_driver_hourly_stats_feature_view(source, infer_features: bool = False driver_stats_feature_view = FeatureView( name="driver_stats", entities=["driver"], - features=None + schema=None if infer_features else [ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT32), + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int32), ], batch_source=source, ttl=timedelta(hours=2), @@ -153,12 +172,12 @@ def create_customer_daily_profile_feature_view(source, infer_features: bool = Fa customer_profile_feature_view = FeatureView( name="customer_profile", entities=["customer_id"], - features=None + schema=None if infer_features else [ - Feature(name="current_balance", dtype=ValueType.FLOAT), - Feature(name="avg_passenger_count", dtype=ValueType.FLOAT), - Feature(name="lifetime_trip_count", dtype=ValueType.INT32), + Field(name="current_balance", dtype=Float32), + Field(name="avg_passenger_count", dtype=Float32), + Field(name="lifetime_trip_count", dtype=Int32), ], batch_source=source, ttl=timedelta(days=2), @@ -173,6 +192,7 @@ def create_global_stats_feature_view(source, infer_features: bool = False): features=None if infer_features else [ + # Test that Features still work for FeatureViews. Feature(name="num_rides", dtype=ValueType.INT32), Feature(name="avg_ride_length", dtype=ValueType.FLOAT), ], @@ -186,9 +206,9 @@ def create_order_feature_view(source, infer_features: bool = False): return FeatureView( name="order", entities=["driver", "customer_id"], - features=None + schema=None if infer_features - else [Feature(name="order_is_success", dtype=ValueType.INT32)], + else [Field(name="order_is_success", dtype=Int32)], batch_source=source, ttl=timedelta(days=2), ) @@ -198,9 +218,7 @@ def create_location_stats_feature_view(source, infer_features: bool = False): location_stats_feature_view = FeatureView( name="location_stats", entities=["location_id"], - features=None - if infer_features - else [Feature(name="temperature", dtype=ValueType.INT32)], + schema=None if infer_features else [Field(name="temperature", dtype=Int32)], batch_source=source, ttl=timedelta(days=2), ) @@ -211,6 +229,7 @@ def create_field_mapping_feature_view(source): return FeatureView( name="field_mapping", entities=[], + # Test that Features still work for FeatureViews. features=[Feature(name="feature_name", dtype=ValueType.INT32)], batch_source=source, ttl=timedelta(days=2), @@ -230,6 +249,7 @@ def create_pushable_feature_view(batch_source: DataSource): return FeatureView( name="pushable_location_stats", entities=["location_id"], + # Test that Features still work for FeatureViews. features=[Feature(name="temperature", dtype=ValueType.INT32)], ttl=timedelta(days=2), stream_source=push_source, diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index be354058d0..34d1bd5c52 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -15,12 +15,13 @@ FeatureNameCollisionError, RequestDataNotFoundInEntityDfException, ) -from feast.feature import Feature from feast.feature_service import FeatureService from feast.feature_view import FeatureView +from feast.field import Field from feast.infra.offline_stores.offline_utils import ( DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL, ) +from feast.types import Int32 from feast.value_type import ValueType from tests.integration.feature_repos.repo_configuration import ( construct_universal_feature_views, @@ -688,7 +689,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm driver_fv = FeatureView( name="driver_stats", entities=["driver"], - features=[Feature(name="avg_daily_trips", dtype=ValueType.INT32)], + schema=[Field(name="avg_daily_trips", dtype=Int32)], batch_source=driver_stats_data_source, ttl=None, ) diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 113fae5b79..c3923d23e3 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -13,7 +13,7 @@ import requests from botocore.exceptions import BotoCoreError -from feast import Entity, Feature, FeatureService, FeatureView, ValueType +from feast import Entity, FeatureService, FeatureView, Field, String, ValueType from feast.errors import ( FeatureNameCollisionError, RequestDataNotFoundInEntityRowsException, @@ -121,7 +121,7 @@ def test_write_to_online_store_event_check(local_redis_environment): # Create Feature View fv1 = FeatureView( name="feature_view_123", - features=[Feature(name="string_col", dtype=ValueType.STRING)], + schema=[Field(name="string_col", dtype=String)], entities=["id"], batch_source=file_source, ttl=timedelta(minutes=5), diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index eed7b144ea..39de7fc688 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -21,14 +21,15 @@ from feast import FileSource from feast.data_format import ParquetFormat from feast.entity import Entity -from feast.feature import Feature from feast.feature_store import FeatureStore from feast.feature_view import FeatureView +from feast.field import Field from feast.infra.offline_stores.file import FileOfflineStoreConfig from feast.infra.online_stores.dynamodb import DynamoDBOnlineStoreConfig from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.protos.feast.types import Value_pb2 as ValueProto from feast.repo_config import RepoConfig +from feast.types import Array, Bytes, Float64, Int64, String from feast.value_type import ValueType from tests.utils.data_source_utils import ( prep_file_source, @@ -174,11 +175,11 @@ def test_apply_feature_view_success(test_feature_store): fv1 = FeatureView( name="my_feature_view_1", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, @@ -196,13 +197,13 @@ def test_apply_feature_view_success(test_feature_store): len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == ValueType.INT64 + and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == ValueType.STRING + and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == ValueType.STRING_LIST + and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == ValueType.BYTES_LIST + and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1" ) @@ -262,9 +263,9 @@ def test_feature_view_inference_success(test_feature_store, dataframe_source): (feature.name, feature.dtype) for feature in feature_view_3.features } expected = { - ("float_col", ValueType.DOUBLE), - ("int64_col", ValueType.INT64), - ("string_col", ValueType.STRING), + ("float_col", Float64), + ("int64_col", Int64), + ("string_col", String), } assert ( @@ -297,11 +298,11 @@ def test_apply_feature_view_integration(test_feature_store): fv1 = FeatureView( name="my_feature_view_1", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, @@ -319,13 +320,13 @@ def test_apply_feature_view_integration(test_feature_store): len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == ValueType.INT64 + and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == ValueType.STRING + and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == ValueType.STRING_LIST + and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == ValueType.BYTES_LIST + and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1" ) @@ -333,13 +334,13 @@ def test_apply_feature_view_integration(test_feature_store): assert ( feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == ValueType.INT64 + and feature_view.features[0].dtype == Int64 and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == ValueType.STRING + and feature_view.features[1].dtype == String and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == ValueType.STRING_LIST + and feature_view.features[2].dtype == Array(String) and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == ValueType.BYTES_LIST + and feature_view.features[3].dtype == Array(Bytes) and feature_view.entities[0] == "fs1_my_entity_1" ) @@ -373,11 +374,11 @@ def test_apply_object_and_read(test_feature_store): fv1 = FeatureView( name="my_feature_view_1", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, @@ -387,11 +388,11 @@ def test_apply_object_and_read(test_feature_store): fv2 = FeatureView( name="my_feature_view_2", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, @@ -440,7 +441,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source): # Create Feature View fv1 = FeatureView( name="my_feature_view_1", - features=[Feature(name="string_col", dtype=ValueType.STRING)], + schema=[Field(name="string_col", dtype=String)], entities=["id"], batch_source=file_source, ttl=timedelta(minutes=5), @@ -470,7 +471,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source): # Change and apply Feature View fv1 = FeatureView( name="my_feature_view_1", - features=[Feature(name="int64_col", dtype=ValueType.INT64)], + schema=[Field(name="int64_col", dtype=Int64)], entities=["id"], batch_source=file_source, ttl=timedelta(minutes=5), diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index aa359771b9..b69191d308 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -172,8 +172,8 @@ def test_on_demand_features_type_inference(): @on_demand_feature_view( sources={"date_request": date_request}, features=[ - Feature("output", ValueType.UNIX_TIMESTAMP), - Feature("string_output", ValueType.STRING), + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="string_output", dtype=ValueType.STRING), ], ) def test_view(features_df: pd.DataFrame) -> pd.DataFrame: @@ -187,8 +187,8 @@ def test_view(features_df: pd.DataFrame) -> pd.DataFrame: @on_demand_feature_view( sources={"date_request": date_request}, features=[ - Feature("output", ValueType.UNIX_TIMESTAMP), - Feature("object_output", ValueType.STRING), + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="object_output", dtype=ValueType.STRING), ], ) def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: @@ -203,8 +203,8 @@ def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: @on_demand_feature_view( sources={"date_request": date_request}, features=[ - Feature("output", ValueType.UNIX_TIMESTAMP), - Feature("missing", ValueType.STRING), + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="missing", dtype=ValueType.STRING), ], ) def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: @@ -225,8 +225,8 @@ def test_datasource_inference(): @on_demand_feature_view( sources={"date_request": date_request}, features=[ - Feature("output", ValueType.UNIX_TIMESTAMP), - Feature("string_output", ValueType.STRING), + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="string_output", dtype=ValueType.STRING), ], ) def test_view(features_df: pd.DataFrame) -> pd.DataFrame: @@ -240,8 +240,8 @@ def test_view(features_df: pd.DataFrame) -> pd.DataFrame: @on_demand_feature_view( sources={"date_request": date_request}, features=[ - Feature("output", ValueType.UNIX_TIMESTAMP), - Feature("object_output", ValueType.STRING), + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="object_output", dtype=ValueType.STRING), ], ) def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: @@ -256,8 +256,8 @@ def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: @on_demand_feature_view( sources={"date_request": date_request}, features=[ - Feature("output", ValueType.UNIX_TIMESTAMP), - Feature("missing", ValueType.STRING), + Feature(name="output", dtype=ValueType.UNIX_TIMESTAMP), + Feature(name="missing", dtype=ValueType.STRING), ], ) def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index e504771ae7..25b0e7714e 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -24,10 +24,12 @@ from feast.entity import Entity from feast.feature import Feature from feast.feature_view import FeatureView +from feast.field import Field from feast.on_demand_feature_view import RequestSource, on_demand_feature_view from feast.protos.feast.types import Value_pb2 as ValueProto from feast.registry import Registry from feast.repo_config import RegistryConfig +from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType @@ -173,11 +175,11 @@ def test_apply_feature_view_success(test_registry): fv1 = FeatureView( name="my_feature_view_1", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, @@ -197,13 +199,13 @@ def test_apply_feature_view_success(test_registry): len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == ValueType.INT64 + and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == ValueType.STRING + and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == ValueType.STRING_LIST + and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == ValueType.BYTES_LIST + and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1" ) @@ -211,13 +213,13 @@ def test_apply_feature_view_success(test_registry): assert ( feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == ValueType.INT64 + and feature_view.features[0].dtype == Int64 and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == ValueType.STRING + and feature_view.features[1].dtype == String and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == ValueType.STRING_LIST + and feature_view.features[2].dtype == Array(String) and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == ValueType.BYTES_LIST + and feature_view.features[3].dtype == Array(Bytes) and feature_view.entities[0] == "fs1_my_entity_1" ) @@ -250,7 +252,7 @@ def test_modify_feature_views_success(test_registry): fv1 = FeatureView( name="my_feature_view_1", - features=[Feature(name="fs1_my_feature_1", dtype=ValueType.INT64)], + schema=[Field(name="fs1_my_feature_1", dtype=Int64)], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, batch_source=batch_source, @@ -300,9 +302,9 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: len(on_demand_feature_views) == 1 and on_demand_feature_views[0].name == "odfv1" and on_demand_feature_views[0].features[0].name == "odfv1_my_feature_1" - and on_demand_feature_views[0].features[0].dtype == ValueType.FLOAT + and on_demand_feature_views[0].features[0].dtype == Float32 and on_demand_feature_views[0].features[1].name == "odfv1_my_feature_2" - and on_demand_feature_views[0].features[1].dtype == ValueType.INT32 + and on_demand_feature_views[0].features[1].dtype == Int32 ) request_schema = on_demand_feature_views[0].get_request_data_schema() assert ( @@ -314,9 +316,9 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: assert ( feature_view.name == "odfv1" and feature_view.features[0].name == "odfv1_my_feature_1" - and feature_view.features[0].dtype == ValueType.FLOAT + and feature_view.features[0].dtype == Float32 and feature_view.features[1].name == "odfv1_my_feature_2" - and feature_view.features[1].dtype == ValueType.INT32 + and feature_view.features[1].dtype == Int32 ) request_schema = feature_view.get_request_data_schema() assert ( @@ -332,7 +334,7 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == ValueType.INT64 + and feature_views[0].features[0].dtype == Int64 and feature_views[0].entities[0] == "fs1_my_entity_1" ) @@ -340,7 +342,7 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: assert ( feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == ValueType.INT64 + and feature_view.features[0].dtype == Int64 and feature_view.entities[0] == "fs1_my_entity_1" ) @@ -366,11 +368,11 @@ def test_apply_feature_view_integration(test_registry): fv1 = FeatureView( name="my_feature_view_1", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, @@ -390,13 +392,13 @@ def test_apply_feature_view_integration(test_registry): len(feature_views) == 1 and feature_views[0].name == "my_feature_view_1" and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == ValueType.INT64 + and feature_views[0].features[0].dtype == Int64 and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == ValueType.STRING + and feature_views[0].features[1].dtype == String and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == ValueType.STRING_LIST + and feature_views[0].features[2].dtype == Array(String) and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == ValueType.BYTES_LIST + and feature_views[0].features[3].dtype == Array(Bytes) and feature_views[0].entities[0] == "fs1_my_entity_1" ) @@ -404,13 +406,13 @@ def test_apply_feature_view_integration(test_registry): assert ( feature_view.name == "my_feature_view_1" and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == ValueType.INT64 + and feature_view.features[0].dtype == Int64 and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == ValueType.STRING + and feature_view.features[1].dtype == String and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == ValueType.STRING_LIST + and feature_view.features[2].dtype == Array(String) and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == ValueType.BYTES_LIST + and feature_view.features[3].dtype == Array(Bytes) and feature_view.entities[0] == "fs1_my_entity_1" ) @@ -441,11 +443,11 @@ def test_apply_data_source(test_registry: Registry): fv1 = FeatureView( name="my_feature_view_1", - features=[ - Feature(name="fs1_my_feature_1", dtype=ValueType.INT64), - Feature(name="fs1_my_feature_2", dtype=ValueType.STRING), - Feature(name="fs1_my_feature_3", dtype=ValueType.STRING_LIST), - Feature(name="fs1_my_feature_4", dtype=ValueType.BYTES_LIST), + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], entities=["fs1_my_entity_1"], tags={"team": "matchmaking"}, diff --git a/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py b/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py index 27f21655ed..61e41e63f0 100644 --- a/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py +++ b/sdk/python/tests/integration/registration/test_universal_odfv_feature_inference.py @@ -3,7 +3,7 @@ import pandas as pd import pytest -from feast import Feature, ValueType +from feast import Field, Float64 from feast.errors import SpecifiedFeaturesNotPresentError from feast.infra.offline_stores.file_source import FileSource from tests.integration.feature_repos.universal.entities import customer, driver, item @@ -76,7 +76,7 @@ def test_infer_odfv_features_with_error(environment, universal_data_sources): (entities, datasets, data_sources) = universal_data_sources - features = [Feature("conv_rate_plus_200", ValueType.DOUBLE)] + features = [Field(name="conv_rate_plus_200", dtype=Float64)] driver_hourly_stats = create_driver_hourly_stats_feature_view(data_sources.driver) request_source = create_conv_rate_request_source() driver_odfv = conv_rate_plus_100_feature_view( diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py index 59ca119f98..4be055cbe7 100644 --- a/sdk/python/tests/integration/registration/test_universal_types.py +++ b/sdk/python/tests/integration/registration/test_universal_types.py @@ -9,6 +9,7 @@ import pytest from feast.infra.offline_stores.offline_store import RetrievalJob +from feast.types import Array, Bool, Float32, Int32, Int64, UnixTimestamp from feast.value_type import ValueType from tests.data.data_creator import create_dataset from tests.integration.feature_repos.repo_configuration import ( @@ -265,28 +266,28 @@ def create_feature_view( ): if feature_is_list is True: if feature_dtype == "int32": - value_type = ValueType.INT32_LIST + dtype = Array(Int32) elif feature_dtype == "int64": - value_type = ValueType.INT64_LIST + dtype = Array(Int64) elif feature_dtype == "float": - value_type = ValueType.FLOAT_LIST + dtype = Array(Float32) elif feature_dtype == "bool": - value_type = ValueType.BOOL_LIST + dtype = Array(Bool) elif feature_dtype == "datetime": - value_type = ValueType.UNIX_TIMESTAMP_LIST + dtype = Array(UnixTimestamp) else: if feature_dtype == "int32": - value_type = ValueType.INT32 + dtype = Int32 elif feature_dtype == "int64": - value_type = ValueType.INT64 + dtype = Int64 elif feature_dtype == "float": - value_type = ValueType.FLOAT + dtype = Float32 elif feature_dtype == "bool": - value_type = ValueType.BOOL + dtype = Bool elif feature_dtype == "datetime": - value_type = ValueType.UNIX_TIMESTAMP + dtype = UnixTimestamp - return driver_feature_view(data_source, name=name, value_type=value_type,) + return driver_feature_view(data_source, name=name, dtype=dtype,) def assert_expected_historical_feature_types( diff --git a/sdk/python/tests/integration/scaffolding/test_partial_apply.py b/sdk/python/tests/integration/scaffolding/test_partial_apply.py index 758ff9e536..3c5b23d620 100644 --- a/sdk/python/tests/integration/scaffolding/test_partial_apply.py +++ b/sdk/python/tests/integration/scaffolding/test_partial_apply.py @@ -2,7 +2,7 @@ import pytest -from feast import BigQuerySource, Feature, FeatureView, ValueType +from feast import BigQuerySource, FeatureView, Field, Float32, String from tests.utils.cli_utils import CliRunner, get_example_repo from tests.utils.online_read_write_test import basic_rw_test @@ -29,10 +29,10 @@ def test_partial() -> None: name="driver_locations_100", entities=["driver"], ttl=timedelta(days=1), - features=[ - Feature(name="lat", dtype=ValueType.FLOAT), - Feature(name="lon", dtype=ValueType.STRING), - Feature(name="name", dtype=ValueType.STRING), + schema=[ + Field(name="lat", dtype=Float32), + Field(name="lon", dtype=String), + Field(name="name", dtype=String), ], online=True, batch_source=driver_locations_source, diff --git a/sdk/python/tests/unit/infra/test_provider.py b/sdk/python/tests/unit/infra/test_provider.py index 08337ea74e..43c09760e9 100644 --- a/sdk/python/tests/unit/infra/test_provider.py +++ b/sdk/python/tests/unit/infra/test_provider.py @@ -16,9 +16,10 @@ from feast import BigQuerySource from feast.entity import Entity -from feast.feature import Feature from feast.feature_view import FeatureView +from feast.field import Field from feast.infra.provider import _get_column_names +from feast.types import String from feast.value_type import ValueType @@ -29,17 +30,17 @@ def test_get_column_names_preserves_feature_ordering(): entities=["my-entity"], ttl=timedelta(days=1), batch_source=BigQuerySource(table="non-existent-mock"), - features=[ - Feature(name="a", dtype=ValueType.STRING), - Feature(name="b", dtype=ValueType.STRING), - Feature(name="c", dtype=ValueType.STRING), - Feature(name="d", dtype=ValueType.STRING), - Feature(name="e", dtype=ValueType.STRING), - Feature(name="f", dtype=ValueType.STRING), - Feature(name="g", dtype=ValueType.STRING), - Feature(name="h", dtype=ValueType.STRING), - Feature(name="i", dtype=ValueType.STRING), - Feature(name="j", dtype=ValueType.STRING), + schema=[ + Field(name="a", dtype=String), + Field(name="b", dtype=String), + Field(name="c", dtype=String), + Field(name="d", dtype=String), + Field(name="e", dtype=String), + Field(name="f", dtype=String), + Field(name="g", dtype=String), + Field(name="h", dtype=String), + Field(name="i", dtype=String), + Field(name="j", dtype=String), ], ) diff --git a/sdk/python/tests/utils/online_write_benchmark.py b/sdk/python/tests/utils/online_write_benchmark.py index 77d3fe5838..6d6b73d5da 100644 --- a/sdk/python/tests/utils/online_write_benchmark.py +++ b/sdk/python/tests/utils/online_write_benchmark.py @@ -11,11 +11,12 @@ from feast import FileSource from feast.driver_test_data import create_driver_hourly_stats_df from feast.entity import Entity -from feast.feature import Feature from feast.feature_store import FeatureStore from feast.feature_view import FeatureView +from feast.field import Field from feast.infra.provider import _convert_arrow_to_proto from feast.repo_config import RepoConfig +from feast.types import Float32, Int32 from feast.value_type import ValueType @@ -23,10 +24,10 @@ def create_driver_hourly_stats_feature_view(source): driver_stats_feature_view = FeatureView( name="driver_stats", entities=["driver_id"], - features=[ - Feature(name="conv_rate", dtype=ValueType.FLOAT), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT32), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int32), ], batch_source=source, ttl=timedelta(hours=2), diff --git a/ui/feature_repo/features.py b/ui/feature_repo/features.py index 16c5fa049d..934e712447 100644 --- a/ui/feature_repo/features.py +++ b/ui/feature_repo/features.py @@ -1,11 +1,14 @@ from datetime import timedelta from feast import ( + Bool, Entity, - Feature, FeatureService, FeatureView, + Field, FileSource, + Int64, + String, ValueType, ) from feast.data_source import RequestSource @@ -31,13 +34,13 @@ name="zipcode_features", entities=["zipcode"], ttl=timedelta(days=3650), - features=[ - Feature(name="city", dtype=ValueType.STRING), - Feature(name="state", dtype=ValueType.STRING), - Feature(name="location_type", dtype=ValueType.STRING), - Feature(name="tax_returns_filed", dtype=ValueType.INT64), - Feature(name="population", dtype=ValueType.INT64), - Feature(name="total_wages", dtype=ValueType.INT64), + schema=[ + Field(name="city", dtype=String), + Field(name="state", dtype=String), + Field(name="location_type", dtype=String), + Field(name="tax_returns_filed", dtype=Int64), + Field(name="population", dtype=Int64), + Field(name="total_wages", dtype=Int64), ], batch_source=zipcode_source, tags={ @@ -52,13 +55,13 @@ name="zipcode_features", entities=["zipcode"], ttl=timedelta(days=3650), - features=[ - Feature(name="city", dtype=ValueType.STRING), - Feature(name="state", dtype=ValueType.STRING), - Feature(name="location_type", dtype=ValueType.STRING), - Feature(name="tax_returns_filed", dtype=ValueType.INT64), - Feature(name="population", dtype=ValueType.INT64), - Feature(name="total_wages", dtype=ValueType.INT64), + schema=[ + Field(name="city", dtype=String), + Field(name="state", dtype=String), + Field(name="location_type", dtype=String), + Field(name="tax_returns_filed", dtype=Int64), + Field(name="population", dtype=Int64), + Field(name="total_wages", dtype=Int64), ], batch_source=zipcode_source, tags={ @@ -73,9 +76,9 @@ name="zipcode_money_features", entities=["zipcode"], ttl=timedelta(days=3650), - features=[ - Feature(name="tax_returns_filed", dtype=ValueType.INT64), - Feature(name="total_wages", dtype=ValueType.INT64), + schema=[ + Field(name="tax_returns_filed", dtype=Int64), + Field(name="total_wages", dtype=Int64), ], batch_source=zipcode_source, tags={ @@ -104,16 +107,16 @@ name="credit_history", entities=["dob_ssn"], ttl=timedelta(days=9000), - features=[ - Feature(name="credit_card_due", dtype=ValueType.INT64), - Feature(name="mortgage_due", dtype=ValueType.INT64), - Feature(name="student_loan_due", dtype=ValueType.INT64), - Feature(name="vehicle_loan_due", dtype=ValueType.INT64), - Feature(name="hard_pulls", dtype=ValueType.INT64), - Feature(name="missed_payments_2y", dtype=ValueType.INT64), - Feature(name="missed_payments_1y", dtype=ValueType.INT64), - Feature(name="missed_payments_6m", dtype=ValueType.INT64), - Feature(name="bankruptcies", dtype=ValueType.INT64), + schema=[ + Field(name="credit_card_due", dtype=Int64), + Field(name="mortgage_due", dtype=Int64), + Field(name="student_loan_due", dtype=Int64), + Field(name="vehicle_loan_due", dtype=Int64), + Field(name="hard_pulls", dtype=Int64), + Field(name="missed_payments_2y", dtype=Int64), + Field(name="missed_payments_1y", dtype=Int64), + Field(name="missed_payments_6m", dtype=Int64), + Field(name="bankruptcies", dtype=Int64), ], batch_source=credit_history_source, tags={ @@ -134,8 +137,8 @@ # existing feature views and RequestSource features @on_demand_feature_view( inputs={"credit_history": credit_history, "transaction": input_request,}, - features=[ - Feature(name="transaction_gt_last_credit_card_due", dtype=ValueType.BOOL), + schema=[ + Field(name="transaction_gt_last_credit_card_due", dtype=Bool), ], ) def transaction_gt_last_credit_card_due(inputs: pd.DataFrame) -> pd.DataFrame: diff --git a/ui/public/registry.json b/ui/public/registry.json index 024fb1ac19..70709bf28d 100644 --- a/ui/public/registry.json +++ b/ui/public/registry.json @@ -627,7 +627,7 @@ }, "userDefinedFunction": { "name": "transaction_gt_last_credit_card_due", - "body": "@on_demand_feature_view(\n inputs={\"credit_history\": credit_history, \"transaction\": input_request,},\n features=[\n Feature(name=\"transaction_gt_last_credit_card_due\", dtype=ValueType.BOOL),\n ],\n)\ndef transaction_gt_last_credit_card_due(inputs: pd.DataFrame) -> pd.DataFrame:\n df = pd.DataFrame()\n df[\"transaction_gt_last_credit_card_due\"] = (\n inputs[\"transaction_amt\"] > inputs[\"credit_card_due\"]\n )\n return df\n" + "body": "@on_demand_feature_view(\n inputs={\"credit_history\": credit_history, \"transaction\": input_request,},\n schema=[\n Field(name=\"transaction_gt_last_credit_card_due\", dtype=Bool),\n ],\n)\ndef transaction_gt_last_credit_card_due(inputs: pd.DataFrame) -> pd.DataFrame:\n df = pd.DataFrame()\n df[\"transaction_gt_last_credit_card_due\"] = (\n inputs[\"transaction_amt\"] > inputs[\"credit_card_due\"]\n )\n return df\n" } }, "meta": {