Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pydantic model compatibility #1

Merged
merged 32 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
98aa248
change package name, create branch
Jun 23, 2023
8b5b2dd
remove publish and release actions
Jun 23, 2023
b18eee1
Subclass Entity to Pydantic BaseModel so it can be sent and received …
expediamatt Jun 29, 2023
53a6fe7
Add Pydantic Model conversion to Entity instead of subclassing it
expediamatt Jun 30, 2023
9a8259d
Fix accidental python requirement change
expediamatt Jun 30, 2023
de11bcf
Add Pydantic Model conversion to two DataSources, RequestSource and S…
expediamatt Jun 30, 2023
051631f
Add Pydantic Model conversion to FeatureView
expediamatt Jun 30, 2023
caf9c59
Fix some optional strings for Pydantic models
expediamatt Jun 30, 2023
623fab7
Fix small errors in DataSource and FeatureView pydantic models, do pa…
expediamatt Jun 30, 2023
83fdbc9
Add FeatureView Pydantic Model test, fix FeatureView model conversion…
expediamatt Jul 2, 2023
eae1e94
Fix Entity json encoding in FeatureView pydantic model
expediamatt Jul 2, 2023
d62e2d7
Small bug fix to make Entity list optional in FeatureViews pydantic m…
expediamatt Jul 2, 2023
66fb923
Fix Pydantic model conversions with dependencies. FeatureView depends…
expediamatt Jul 2, 2023
f0c46b6
WIP. Try adding more types to json encoder to get type checking to work.
expediamatt Jul 2, 2023
db65d80
WIP. Fixing json encoding of types
expediamatt Jul 2, 2023
db3197e
Make FeastTypes Pydantic compatible
expediamatt Jul 2, 2023
ba7bb27
Fix DatatSourceModel to SparkSource pydantic conversion and add tests
expediamatt Jul 3, 2023
4768532
Allow dtypes to be defined by strings, add more tests
expediamatt Jul 3, 2023
65f8698
Adjust schema conversion in RequestSource datasource_from_pydantic_model
expediamatt Jul 3, 2023
34109e5
Remove unnecessary properties from FeatureView pydantic model
expediamatt Jul 3, 2023
47e98b9
Remove parameters that weren't removed last time from FeatureView pyd…
expediamatt Jul 3, 2023
a6f0253
Finish linting
expediamatt Jul 3, 2023
82cdec7
Fix source choice in FeatureView pydantic model conversion to match F…
expediamatt Jul 3, 2023
89eb753
Refine FeastType json outputs
expediamatt Jul 3, 2023
f4a856e
First round of PR fixes, the small stuff
expediamatt Jul 5, 2023
493dbd4
Second round of PR fixes. Move Pydantic model conversions out of main…
expediamatt Jul 5, 2023
59e1f4e
Restore github workflow publish.yml
expediamatt Jul 6, 2023
e3fe369
Fix entity comparison in FeatureView
expediamatt Jul 6, 2023
6b9be17
Fix type checking in Pydantic models
expediamatt Jul 6, 2023
b709ce1
Update pydantic model type checking to avoid Subscripted generics error
expediamatt Jul 6, 2023
d0866d1
Small lint fix
expediamatt Jul 6, 2023
1d622e7
Restory github workflow release.yml
expediamatt Jul 6, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions sdk/python/feast/expediagroup/pydantic_models/data_source_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""
Pydantic Model for Data Source

Copyright 2023 Expedia Group
Author: matcarlin@expediagroup.com
"""
from typing import Dict, List, Literal, Optional, Union

from pydantic import BaseModel
from pydantic import Field as PydanticField
from typing_extensions import Annotated

from feast.data_source import RequestSource
from feast.field import Field
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
SparkSource,
)


class DataSourceModel(BaseModel):
"""
Pydantic Model of a Feast DataSource.
"""

def to_data_source(self):
"""
Given a Pydantic DataSourceModel, create and return a DataSource.

Returns:
A DataSource.
"""
raise NotImplementedError

@classmethod
def from_data_source(cls, data_source):
"""
Converts a DataSource object to its pydantic model representation.

Returns:
A DataSourceModel.
"""
raise NotImplementedError


class RequestSourceModel(DataSourceModel):
"""
Pydantic Model of a Feast RequestSource.
"""

name: str
model_type: Literal["RequestSourceModel"] = "RequestSourceModel"
schema_: List[Field] = PydanticField(None, alias="schema")
description: Optional[str] = ""
tags: Optional[Dict[str, str]] = None
owner: Optional[str] = ""

class Config:
arbitrary_types_allowed = True
extra = "allow"

def to_data_source(self):
"""
Given a Pydantic RequestSourceModel, create and return a RequestSource.

Returns:
A RequestSource.
"""
params = {
"name": self.name,
"description": self.description,
"tags": self.tags if self.tags else None,
"owner": self.owner,
}
params["schema"] = [
Field(
name=sch.name,
dtype=sch.dtype,
description=sch.description,
tags=sch.tags,
)
for sch in self.schema_
]
return RequestSource(**params)

@classmethod
def from_data_source(cls, data_source):
"""
Converts a RequestSource object to its pydantic model representation.

Returns:
A RequestSourceModel.
"""
return cls(
name=data_source.name,
schema=data_source.schema,
description=data_source.description,
tags=data_source.tags if data_source.tags else None,
owner=data_source.owner,
)


class SparkSourceModel(DataSourceModel):
"""
Pydantic Model of a Feast SparkSource.
"""

name: str
model_type: Literal["SparkSourceModel"] = "SparkSourceModel"
table: Optional[str] = None
query: Optional[str] = None
path: Optional[str] = None
file_format: Optional[str] = None
created_timestamp_column: Optional[str] = None
field_mapping: Optional[Dict[str, str]] = None
description: Optional[str] = ""
tags: Optional[Dict[str, str]] = None
owner: Optional[str] = ""
timestamp_field: Optional[str] = None

class Config:
arbitrary_types_allowed = True
extra = "allow"

def to_data_source(self):
"""
Given a Pydantic SparkSourceModel, create and return a SparkSource.

Returns:
A SparkSource.
"""
return SparkSource(
name=self.name,
table=self.table if hasattr(self, "table") else "",
query=self.query if hasattr(self, "query") else "",
path=self.path if hasattr(self, "path") else "",
file_format=self.file_format if hasattr(self, "file_format") else "",
created_timestamp_column=self.created_timestamp_column
if hasattr(self, "created_timestamp_column")
else "",
field_mapping=self.field_mapping if self.field_mapping else None,
description=self.description or "",
tags=self.tags if self.tags else None,
owner=self.owner or "",
timestamp_field=self.timestamp_field
if hasattr(self, "timestamp_field")
else "",
)

@classmethod
def from_data_source(cls, data_source):
"""
Converts a SparkSource object to its pydantic model representation.

Returns:
A SparkSourceModel.
"""
return cls(
name=data_source.name,
table=data_source.table,
query=data_source.query,
path=data_source.path,
file_format=data_source.file_format,
created_timestamp_column=data_source.created_timestamp_column
if data_source.created_timestamp_column
else "",
field_mapping=data_source.field_mapping
if data_source.field_mapping
else None,
description=data_source.description if data_source.description else "",
tags=data_source.tags if data_source.tags else None,
owner=data_source.owner if data_source.owner else "",
timestamp_field=data_source.timestamp_field
if data_source.timestamp_field
else "",
)


# https://blog.devgenius.io/deserialize-child-classes-with-pydantic-that-gonna-work-784230e1cf83
# This lets us discriminate child classes of DataSourceModel with type hints.
AnyDataSource = Annotated[
Union[RequestSourceModel, SparkSourceModel],
PydanticField(discriminator="model_type"),
]
75 changes: 75 additions & 0 deletions sdk/python/feast/expediagroup/pydantic_models/entity_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
Pydantic Model for Entity

Copyright 2023 Expedia Group
Author: matcarlin@expediagroup.com
"""

from datetime import datetime
from json import dumps
from typing import Callable, Dict, Optional

from pydantic import BaseModel

from feast.entity import Entity
from feast.value_type import ValueType


class EntityModel(BaseModel):
"""
Pydantic Model of a Feast Entity.
"""

name: str
join_key: str
value_type: Optional[ValueType] = None
description: str = ""
tags: Optional[Dict[str, str]] = None
owner: str = ""
created_timestamp: Optional[datetime] = None
last_updated_timestamp: Optional[datetime] = None

class Config:
arbitrary_types_allowed = True
extra = "allow"
json_encoders: Dict[object, Callable] = {
ValueType: lambda v: int(dumps(v.value, default=str))
}

def to_entity(self):
"""
Given a Pydantic EntityModel, create and return an Entity.

Returns:
An Entity.
"""
entity = Entity(
name=self.name,
join_keys=[self.join_key],
value_type=self.value_type,
description=self.description,
tags=self.tags if self.tags else None,
owner=self.owner,
)
entity.created_timestamp = (self.created_timestamp,)
entity.last_updated_timestamp = self.last_updated_timestamp
return entity

@classmethod
def from_entity(cls, entity):
"""
Converts an entity object to its pydantic model representation.

Returns:
An EntityModel.
"""
return cls(
name=entity.name,
join_key=entity.join_key,
value_type=entity.value_type,
description=entity.description,
tags=entity.tags if entity.tags else None,
owner=entity.owner,
created_timestamp=entity.created_timestamp,
last_updated_timestamp=entity.last_updated_timestamp,
)
136 changes: 136 additions & 0 deletions sdk/python/feast/expediagroup/pydantic_models/feature_view_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""
Pydantic Model for Data Source

Copyright 2023 Expedia Group
Author: matcarlin@expediagroup.com
"""
import sys
from datetime import timedelta
from json import dumps
from typing import Callable, Dict, List, Optional

from pydantic import BaseModel

from feast.data_source import DataSource
from feast.entity import Entity
from feast.expediagroup.pydantic_models.data_source_model import (
AnyDataSource,
RequestSourceModel,
SparkSourceModel,
)
from feast.expediagroup.pydantic_models.entity_model import EntityModel
from feast.feature_view import FeatureView
from feast.field import Field
from feast.types import ComplexFeastType, PrimitiveFeastType

SUPPORTED_DATA_SOURCES = [RequestSourceModel, SparkSourceModel]


class FeatureViewModel(BaseModel):
"""
Pydantic Model of a Feast FeatureView.
"""

name: str
original_entities: List[EntityModel] = []
original_schema: Optional[List[Field]] = None
ttl: Optional[timedelta]
batch_source: AnyDataSource
stream_source: Optional[AnyDataSource]
online: bool = True
description: str = ""
tags: Optional[Dict[str, str]] = None
owner: str = ""

class Config:
arbitrary_types_allowed = True
extra = "allow"
json_encoders: Dict[object, Callable] = {
Field: lambda v: int(dumps(v.value, default=str)),
DataSource: lambda v: v.to_pydantic_model(),
Entity: lambda v: v.to_pydantic_model(),
ComplexFeastType: lambda v: str(v),
PrimitiveFeastType: lambda v: str(v),
}

def to_feature_view(self):
"""
Given a Pydantic FeatureViewModel, create and return a FeatureView.

Returns:
A FeatureView.
"""
# Convert each of the sources if they exist
batch_source = self.batch_source.to_data_source() if self.batch_source else None
stream_source = (
self.stream_source.to_data_source() if self.stream_source else None
)

# Mirror the stream/batch source conditions in the FeatureView
# constructor; one source is passed, either a stream source
# which contains a batch source inside it, or a batch source
# on its own.
source = stream_source if stream_source else batch_source
if stream_source:
source.batch_source = batch_source

# Create the FeatureView
feature_view = FeatureView(
name=self.name,
source=source,
schema=self.original_schema,
entities=[entity.to_entity() for entity in self.original_entities],
ttl=self.ttl,
online=self.online,
description=self.description,
tags=self.tags if self.tags else None,
owner=self.owner,
)

return feature_view

@classmethod
def from_feature_view(cls, feature_view):
"""
Converts a FeatureView object to its pydantic model representation.

Returns:
A FeatureViewModel.
"""
batch_source = None
if feature_view.batch_source:
class_ = getattr(
sys.modules[__name__],
type(feature_view.batch_source).__name__ + "Model",
)
if class_ not in SUPPORTED_DATA_SOURCES:
raise ValueError(
"Batch source type is not a supported data source type."
)
batch_source = class_.from_data_source(feature_view.batch_source)
stream_source = None
if feature_view.stream_source:
class_ = getattr(
sys.modules[__name__],
type(feature_view.stream_source).__name__ + "Model",
)
if class_ not in SUPPORTED_DATA_SOURCES:
raise ValueError(
"Stream source type is not a supported data source type."
)
stream_source = class_.from_data_source(feature_view.stream_source)
return cls(
name=feature_view.name,
original_entities=[
EntityModel.from_entity(entity)
for entity in feature_view.original_entities
],
ttl=feature_view.ttl,
original_schema=feature_view.original_schema,
batch_source=batch_source,
stream_source=stream_source,
online=feature_view.online,
description=feature_view.description,
tags=feature_view.tags if feature_view.tags else None,
owner=feature_view.owner,
)
Loading