Skip to content

Commit

Permalink
Add export_schema method to export schema from FeatureSet
Browse files Browse the repository at this point in the history
  • Loading branch information
davidheryanto committed Jan 31, 2020
1 parent a831a8c commit 9a1f24a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 6 deletions.
47 changes: 47 additions & 0 deletions sdk/python/feast/feature_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
from google.protobuf import json_format
from google.protobuf.duration_pb2 import Duration
from google.protobuf.json_format import MessageToJson
from google.protobuf.message import Message
from pandas.api.types import is_datetime64_ns_dtype
from pyarrow.lib import TimestampType
from tensorflow_metadata.proto.v0 import schema_pb2
from tensorflow_metadata.proto.v0.schema_pb2 import Schema

from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto
Expand Down Expand Up @@ -695,6 +697,51 @@ def update_schema(self, schema: Schema):
f"that does not exist in the FeatureSet '{self.name}' in Feast"
)

def export_schema(self) -> Schema:
schema = Schema()
for _, field in self._fields.items():
# TODO: export type as well
feature = schema_pb2.Feature()
attributes_to_copy_from_field_to_feature = [
"name",
"presence",
"group_presence",
"shape",
"value_count",
"domain",
"int_domain",
"float_domain",
"string_domain",
"bool_domain",
"struct_domain",
"_natural_language_domain",
"image_domain",
"mid_domain",
"url_domain",
"time_domain",
"time_of_day_domain",
]
for attr in attributes_to_copy_from_field_to_feature:
if getattr(field, attr) is None:
continue

if issubclass(type(getattr(feature, attr)), Message):
# Proto message field to copy is an embedded field, so MergeFrom() method must be used
getattr(feature, attr).MergeFrom(getattr(field, attr))
elif issubclass(type(getattr(feature, attr)), (int, str, bool)):
# Proto message field is a simple Python type, so setattr() can be used
setattr(feature, attr, getattr(field, attr))
else:
warnings.warn(
f"Attribute '{attr}' cannot be copied from Field "
f"'{field.name}' in FeatureSet '{self.name}' to a "
f"Feature in the Schema in Tensorflow metadata, because"
f"the type is neither a Protobuf message or Python "
f"int, str and bool"
)
schema.feature.append(feature)
return schema

@classmethod
def from_yaml(cls, yml: str):
"""
Expand Down
12 changes: 6 additions & 6 deletions sdk/python/tests/test_feature_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_update_schema(self):
json_format.Parse(
open(test_data_folder / "schema_bikeshare.json").read(), schema_bikeshare
)
feature_set_bikeshare = FeatureSet(
feature_set = FeatureSet(
name="bikeshare",
entities=[Entity(name="station_id", dtype=ValueType.INT64),],
features=[
Expand All @@ -191,23 +191,23 @@ def test_update_schema(self):
],
)
# Before update
for entity in feature_set_bikeshare.entities:
for entity in feature_set.entities:
assert entity.presence is None
assert entity.shape is None
for feature in feature_set_bikeshare.features:
for feature in feature_set.features:
assert feature.presence is None
assert feature.shape is None
assert feature.string_domain is None
assert feature.float_domain is None
assert feature.int_domain is None

feature_set_bikeshare.update_schema(schema_bikeshare)
feature_set.update_schema(schema_bikeshare)

# After update
for entity in feature_set_bikeshare.entities:
for entity in feature_set.entities:
assert entity.presence is not None
assert entity.shape is not None
for feature in feature_set_bikeshare.features:
for feature in feature_set.features:
assert feature.presence is not None
assert feature.shape is not None
if feature.name in ["location", "name", "status"]:
Expand Down

0 comments on commit 9a1f24a

Please sign in to comment.