Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Use timestamp type when converting unixtimestamp feature type to arrow #2593

Merged
merged 2 commits into from
Apr 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions go/types/typeconversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ func ProtoTypeToArrowType(sample *types.Value) (arrow.DataType, error) {
case *types.Value_DoubleListVal:
return arrow.ListOf(arrow.PrimitiveTypes.Float64), nil
case *types.Value_UnixTimestampVal:
return arrow.FixedWidthTypes.Time32s, nil
return arrow.FixedWidthTypes.Timestamp_s, nil
case *types.Value_UnixTimestampListVal:
return arrow.ListOf(arrow.FixedWidthTypes.Time32s), nil
return arrow.ListOf(arrow.FixedWidthTypes.Timestamp_s), nil
default:
return nil,
fmt.Errorf("unsupported proto type in proto to arrow conversion: %s", sample.Val)
Expand Down Expand Up @@ -80,9 +80,9 @@ func ValueTypeEnumToArrowType(t types.ValueType_Enum) (arrow.DataType, error) {
case types.ValueType_DOUBLE_LIST:
return arrow.ListOf(arrow.PrimitiveTypes.Float64), nil
case types.ValueType_UNIX_TIMESTAMP:
return arrow.FixedWidthTypes.Time32s, nil
return arrow.FixedWidthTypes.Timestamp_s, nil
case types.ValueType_UNIX_TIMESTAMP_LIST:
return arrow.ListOf(arrow.FixedWidthTypes.Time32s), nil
return arrow.ListOf(arrow.FixedWidthTypes.Timestamp_s), nil
default:
return nil,
fmt.Errorf("unsupported value type enum in enum to arrow type conversion: %s", t)
Expand Down Expand Up @@ -119,9 +119,9 @@ func copyProtoValuesToArrowArray(builder array.Builder, values []*types.Value) e
for _, v := range values {
fieldBuilder.Append(v.GetDoubleVal())
}
case *array.Time32Builder:
case *array.TimestampBuilder:
for _, v := range values {
fieldBuilder.Append(arrow.Time32(v.GetUnixTimestampVal()))
fieldBuilder.Append(arrow.Timestamp(v.GetUnixTimestampVal()))
}
case *array.ListBuilder:
for _, list := range values {
Expand Down Expand Up @@ -157,9 +157,9 @@ func copyProtoValuesToArrowArray(builder array.Builder, values []*types.Value) e
for _, v := range list.GetDoubleListVal().GetVal() {
valueBuilder.Append(v)
}
case *array.Time32Builder:
case *array.TimestampBuilder:
for _, v := range list.GetUnixTimestampListVal().GetVal() {
valueBuilder.Append(arrow.Time32(v))
valueBuilder.Append(arrow.Timestamp(v))
}
}
}
Expand Down Expand Up @@ -227,10 +227,10 @@ func ArrowValuesToProtoValues(arr arrow.Array) ([]*types.Value, error) {
}
values = append(values,
&types.Value{Val: &types.Value_BoolListVal{BoolListVal: &types.BoolList{Val: vals}}})
case arrow.FixedWidthTypes.Time32s:
case arrow.FixedWidthTypes.Timestamp_s:
vals := make([]int64, int(offsets[idx])-pos)
for j := pos; j < int(offsets[idx]); j++ {
vals[j-pos] = int64(listValues.(*array.Time32).Value(j))
vals[j-pos] = int64(listValues.(*array.Timestamp).Value(j))
}

values = append(values,
Expand Down Expand Up @@ -278,11 +278,11 @@ func ArrowValuesToProtoValues(arr arrow.Array) ([]*types.Value, error) {
values = append(values,
&types.Value{Val: &types.Value_StringVal{StringVal: arr.(*array.String).Value(idx)}})
}
case arrow.FixedWidthTypes.Time32s:
case arrow.FixedWidthTypes.Timestamp_s:
for idx := 0; idx < arr.Len(); idx++ {
values = append(values,
&types.Value{Val: &types.Value_UnixTimestampVal{
UnixTimestampVal: int64(arr.(*array.Time32).Value(idx))}})
UnixTimestampVal: int64(arr.(*array.Timestamp).Value(idx))}})
}
default:
return nil, fmt.Errorf("unsupported arrow to proto conversion for type %s", arr.DataType())
Expand Down
80 changes: 9 additions & 71 deletions sdk/python/feast/embedded_go/online_features_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,59 +14,17 @@
from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesResponse
from feast.protos.feast.types import Value_pb2
from feast.repo_config import RepoConfig
from feast.types import from_value_type
from feast.value_type import ValueType

from .lib.embedded import DataTable, NewOnlineFeatureService, OnlineFeatureServiceConfig
from .lib.go import Slice_string
from .type_map import FEAST_TYPE_TO_ARROW_TYPE, arrow_array_to_array_of_proto

if TYPE_CHECKING:
from feast.feature_store import FeatureStore


ARROW_TYPE_TO_PROTO_FIELD = {
pa.int32(): "int32_val",
pa.int64(): "int64_val",
pa.float32(): "float_val",
pa.float64(): "double_val",
pa.bool_(): "bool_val",
pa.string(): "string_val",
pa.binary(): "bytes_val",
pa.time32("s"): "unix_timestamp_val",
}

ARROW_LIST_TYPE_TO_PROTO_FIELD = {
pa.int32(): "int32_list_val",
pa.int64(): "int64_list_val",
pa.float32(): "float_list_val",
pa.float64(): "double_list_val",
pa.bool_(): "bool_list_val",
pa.string(): "string_list_val",
pa.binary(): "bytes_list_val",
pa.time32("s"): "unix_timestamp_list_val",
}

ARROW_LIST_TYPE_TO_PROTO_LIST_CLASS = {
pa.int32(): Value_pb2.Int32List,
pa.int64(): Value_pb2.Int64List,
pa.float32(): Value_pb2.FloatList,
pa.float64(): Value_pb2.DoubleList,
pa.bool_(): Value_pb2.BoolList,
pa.string(): Value_pb2.StringList,
pa.binary(): Value_pb2.BytesList,
pa.time32("s"): Value_pb2.Int64List,
}

# used for entity types only
PROTO_TYPE_TO_ARROW_TYPE = {
ValueType.INT32: pa.int32(),
ValueType.INT64: pa.int64(),
ValueType.FLOAT: pa.float32(),
ValueType.DOUBLE: pa.float64(),
ValueType.STRING: pa.string(),
ValueType.BYTES: pa.binary(),
}


class EmbeddedOnlineFeatureServer:
def __init__(
self, repo_path: str, repo_config: RepoConfig, feature_store: "FeatureStore"
Expand Down Expand Up @@ -179,8 +137,10 @@ def _to_arrow(value, type_hint: Optional[ValueType]) -> pa.Array:
if isinstance(value, Value_pb2.RepeatedValue):
_proto_to_arrow(value)

if type_hint in PROTO_TYPE_TO_ARROW_TYPE:
return pa.array(value, PROTO_TYPE_TO_ARROW_TYPE[type_hint])
if type_hint:
feast_type = from_value_type(type_hint)
if feast_type in FEAST_TYPE_TO_ARROW_TYPE:
return pa.array(value, FEAST_TYPE_TO_ARROW_TYPE[feast_type])

return pa.array(value)

Expand Down Expand Up @@ -263,31 +223,9 @@ def record_batch_to_online_response(record_batch):
[Value_pb2.Value()] * len(record_batch.columns[idx])
)
else:
if isinstance(field.type, pa.ListType):
proto_list_class = ARROW_LIST_TYPE_TO_PROTO_LIST_CLASS[
field.type.value_type
]
proto_field_name = ARROW_LIST_TYPE_TO_PROTO_FIELD[field.type.value_type]

column = record_batch.columns[idx]
if field.type.value_type == pa.time32("s"):
column = column.cast(pa.list_(pa.int32()))

for v in column.tolist():
feature_vector.values.append(
Value_pb2.Value(**{proto_field_name: proto_list_class(val=v)})
)
else:
proto_field_name = ARROW_TYPE_TO_PROTO_FIELD[field.type]

column = record_batch.columns[idx]
if field.type == pa.time32("s"):
column = column.cast(pa.int32())

for v in column.tolist():
feature_vector.values.append(
Value_pb2.Value(**{proto_field_name: v})
)
feature_vector.values.extend(
arrow_array_to_array_of_proto(field.type, record_batch.columns[idx])
)

resp.results.append(feature_vector)
resp.metadata.feature_names.val.append(field.name)
Expand Down
88 changes: 88 additions & 0 deletions sdk/python/feast/embedded_go/type_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from typing import List

import pyarrow as pa
import pytz

from feast.protos.feast.types import Value_pb2
from feast.types import Array, PrimitiveFeastType

PA_TIMESTAMP_TYPE = pa.timestamp("s", tz=pytz.UTC)

ARROW_TYPE_TO_PROTO_FIELD = {
pa.int32(): "int32_val",
pa.int64(): "int64_val",
pa.float32(): "float_val",
pa.float64(): "double_val",
pa.bool_(): "bool_val",
pa.string(): "string_val",
pa.binary(): "bytes_val",
PA_TIMESTAMP_TYPE: "unix_timestamp_val",
}

ARROW_LIST_TYPE_TO_PROTO_FIELD = {
pa.int32(): "int32_list_val",
pa.int64(): "int64_list_val",
pa.float32(): "float_list_val",
pa.float64(): "double_list_val",
pa.bool_(): "bool_list_val",
pa.string(): "string_list_val",
pa.binary(): "bytes_list_val",
PA_TIMESTAMP_TYPE: "unix_timestamp_list_val",
}

ARROW_LIST_TYPE_TO_PROTO_LIST_CLASS = {
pa.int32(): Value_pb2.Int32List,
pa.int64(): Value_pb2.Int64List,
pa.float32(): Value_pb2.FloatList,
pa.float64(): Value_pb2.DoubleList,
pa.bool_(): Value_pb2.BoolList,
pa.string(): Value_pb2.StringList,
pa.binary(): Value_pb2.BytesList,
PA_TIMESTAMP_TYPE: Value_pb2.Int64List,
}

FEAST_TYPE_TO_ARROW_TYPE = {
PrimitiveFeastType.INT32: pa.int32(),
PrimitiveFeastType.INT64: pa.int64(),
PrimitiveFeastType.FLOAT32: pa.float32(),
PrimitiveFeastType.FLOAT64: pa.float64(),
PrimitiveFeastType.STRING: pa.string(),
PrimitiveFeastType.BYTES: pa.binary(),
PrimitiveFeastType.BOOL: pa.bool_(),
PrimitiveFeastType.UNIX_TIMESTAMP: pa.timestamp("s"),
Array(PrimitiveFeastType.INT32): pa.list_(pa.int32()),
Array(PrimitiveFeastType.INT64): pa.list_(pa.int64()),
Array(PrimitiveFeastType.FLOAT32): pa.list_(pa.float32()),
Array(PrimitiveFeastType.FLOAT64): pa.list_(pa.float64()),
Array(PrimitiveFeastType.STRING): pa.list_(pa.string()),
Array(PrimitiveFeastType.BYTES): pa.list_(pa.binary()),
Array(PrimitiveFeastType.BOOL): pa.list_(pa.bool_()),
Array(PrimitiveFeastType.UNIX_TIMESTAMP): pa.list_(pa.timestamp("s")),
}


def arrow_array_to_array_of_proto(
arrow_type: pa.DataType, arrow_array: pa.Array
) -> List[Value_pb2.Value]:
values = []
if isinstance(arrow_type, pa.ListType):
proto_list_class = ARROW_LIST_TYPE_TO_PROTO_LIST_CLASS[arrow_type.value_type]
proto_field_name = ARROW_LIST_TYPE_TO_PROTO_FIELD[arrow_type.value_type]

if arrow_type.value_type == PA_TIMESTAMP_TYPE:
arrow_array = arrow_array.cast(pa.list_(pa.int64()))

for v in arrow_array.tolist():
values.append(
Value_pb2.Value(**{proto_field_name: proto_list_class(val=v)})
)
else:
proto_field_name = ARROW_TYPE_TO_PROTO_FIELD[arrow_type]

if arrow_type == PA_TIMESTAMP_TYPE:
arrow_array = arrow_array.cast(pa.int64())

for v in arrow_array.tolist():
values.append(Value_pb2.Value(**{proto_field_name: v}))

return values