Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle case where_LIST type is empty #1703

Merged
merged 18 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sdk/python/feast/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __init__(
self._name = name
if not isinstance(dtype, ValueType):
raise ValueError("dtype is not a valid ValueType")
if dtype is ValueType.UNKNOWN:
raise ValueError(f"dtype cannont be {dtype}")
adchia marked this conversation as resolved.
Show resolved Hide resolved
self._dtype = dtype
if labels is None:
self._labels = dict()
Expand Down
32 changes: 19 additions & 13 deletions sdk/python/feast/online_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import defaultdict
from typing import Any, Dict, List, cast

import pandas as pd
Expand All @@ -25,7 +26,7 @@
from feast.type_map import (
_python_value_to_proto_value,
feast_value_type_to_python_type,
python_type_to_feast_value_type,
python_values_to_feast_value_type,
)


Expand Down Expand Up @@ -94,25 +95,30 @@ def _infer_online_entity_rows(
entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows)
entity_row_list = []
entity_type_map = dict()
entity_value_map = defaultdict(list)
judahrand marked this conversation as resolved.
Show resolved Hide resolved

# Flatten keys-value dicts into lists for type inference
for entity in entity_rows_dicts:
for key, value in entity.items():
if not isinstance(value, Value):
entity_value_map[key].append(value)

# Loop over all entities to infer dtype first in case of empty lists or nulls
for key, values in entity_value_map.items():
entity_type_map[key] = python_values_to_feast_value_type(key, values)

for entity in entity_rows_dicts:
fields = {}
for key, value in entity.items():
# Allow for feast.types.Value
if key not in entity_type_map:
adchia marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
f"field {key} cannot have all null values for type inference."
)

if isinstance(value, Value):
proto_value = value
else:
# Infer the specific type for this row
current_dtype = python_type_to_feast_value_type(name=key, value=value)

if key not in entity_type_map:
entity_type_map[key] = current_dtype
else:
if current_dtype != entity_type_map[key]:
raise TypeError(
f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. "
)
proto_value = _python_value_to_proto_value(current_dtype, value)
proto_value = _python_value_to_proto_value(entity_type_map[key], value)
fields[key] = proto_value
entity_row_list.append(GetOnlineFeaturesRequestV2.EntityRow(fields=fields))
return entity_row_list
68 changes: 49 additions & 19 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,28 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any:
field_value_dict = MessageToDict(field_value_proto)

for k, v in field_value_dict.items():
if "List" in k:
val = v.get("val", [])
else:
val = v

if k == "int64Val":
return int(v)
return int(val)
if k == "bytesVal":
return bytes(v)
return bytes(val)
if (k == "int64ListVal") or (k == "int32ListVal"):
return [int(item) for item in v["val"]]
return [int(item) for item in val]
if (k == "floatListVal") or (k == "doubleListVal"):
return [float(item) for item in v["val"]]
return [float(item) for item in val]
if k == "stringListVal":
return [str(item) for item in v["val"]]
return [str(item) for item in val]
if k == "bytesListVal":
return [bytes(item) for item in v["val"]]
return [bytes(item) for item in val]
if k == "boolListVal":
return [bool(item) for item in v["val"]]
return [bool(item) for item in val]

if k in ["int32Val", "floatVal", "doubleVal", "stringVal", "boolVal"]:
return v
return val
else:
raise TypeError(
f"Casting to Python native type for type {k} failed. "
Expand Down Expand Up @@ -144,9 +149,9 @@ def python_type_to_feast_value_type(
common_item_value_type = None
for item in list_items:
if isinstance(item, ProtoValue):
current_item_value_type = _proto_str_to_value_type(
str(item.WhichOneof("val"))
)
current_item_value_type: Optional[
ValueType
] = _proto_str_to_value_type(str(item.WhichOneof("val")))
else:
# Get the type from the current item, only one level deep
current_item_value_type = python_type_to_feast_value_type(
Expand All @@ -164,9 +169,7 @@ def python_type_to_feast_value_type(
)
common_item_value_type = current_item_value_type
if common_item_value_type is None:
raise ValueError(
f"field {name} cannot have null values for type inference."
)
return ValueType.UNKNOWN
return ValueType[common_item_value_type.name + "_LIST"]
else:
assert value
Expand All @@ -180,6 +183,28 @@ def python_type_to_feast_value_type(
return type_map[value.dtype.__str__()]


def python_values_to_feast_value_type(name: str, values: Any, recurse: bool = True):
inferred_dtype = ValueType.UNKNOWN
for row in values:
current_dtype = python_type_to_feast_value_type(
name, value=row, recurse=recurse
)

if inferred_dtype is ValueType.UNKNOWN:
inferred_dtype = current_dtype
else:
if current_dtype != inferred_dtype:
judahrand marked this conversation as resolved.
Show resolved Hide resolved
raise TypeError(
f"Input entity {name} has mixed types, {current_dtype} and {inferred_dtype}. That is not allowed. "
)
if inferred_dtype is ValueType.UNKNOWN:
raise ValueError(
f"field {name} cannot have all null values for type inference."
)

return inferred_dtype


def _type_err(item, dtype):
raise ValueError(f'Value "{item}" is of type {type(item)} not of type {dtype}')

Expand Down Expand Up @@ -277,11 +302,16 @@ def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue:
def python_value_to_proto_value(
value: Any, feature_type: ValueType = None
) -> ProtoValue:
value_type = (
python_type_to_feast_value_type("", value)
if value is not None
else feature_type
)
value_type = feature_type
if value is not None:
if isinstance(value, (list, np.ndarray)):
value_type = (
feature_type
if len(value) == 0
else python_type_to_feast_value_type("", value)
)
else:
value_type = python_type_to_feast_value_type("", value)
return _python_value_to_proto_value(value_type, value)


Expand Down
11 changes: 8 additions & 3 deletions sdk/python/tests/data/data_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ def create_dataset(
entity_type: ValueType = ValueType.INT32,
feature_dtype: str = None,
feature_is_list: bool = False,
list_is_empty: bool = False,
) -> pd.DataFrame:
now = datetime.now().replace(microsecond=0, second=0, minute=0)
ts = pd.Timestamp(now).round("ms")
data = {
"driver_id": get_entities_for_value_type(entity_type),
"value": get_feature_values_for_dtype(feature_dtype, feature_is_list),
"value": get_feature_values_for_dtype(
feature_dtype, feature_is_list, list_is_empty
),
"ts_1": [
ts - timedelta(hours=4),
ts,
Expand Down Expand Up @@ -44,7 +47,7 @@ def get_entities_for_value_type(value_type: ValueType) -> List:
return value_type_map[value_type]


def get_feature_values_for_dtype(dtype: str, is_list: bool) -> List:
def get_feature_values_for_dtype(dtype: str, is_list: bool, is_empty: bool) -> List:
if dtype is None:
return [0.1, None, 0.3, 4, 5]
# TODO(adchia): for int columns, consider having a better error when dealing with None values (pandas int dfs can't
Expand All @@ -57,8 +60,10 @@ def get_feature_values_for_dtype(dtype: str, is_list: bool) -> List:
"bool": [True, None, False, True, False],
}
non_list_val = dtype_map[dtype]
# Duplicate the value once if this is a list
if is_list:
if is_empty:
judahrand marked this conversation as resolved.
Show resolved Hide resolved
# Need at least one non-empty element for type inference
return [[] for n in non_list_val[:-1]] + [non_list_val[-1:]]
return [[n, n] if n is not None else None for n in non_list_val]
else:
return non_list_val
Loading