Skip to content

Commit

Permalink
Handle case where_LIST type is empty (#1703)
Browse files Browse the repository at this point in the history
* Add test for empty `list`

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Refactor how types are tested

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Test the type of all elements in list features

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Fix issue with empty list features

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Remove `python_type_to_feast_value_type` from use in tests

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Infer type from all data

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Add one non-empty element to empty list test datasets

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Accept empty lists in tests

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Use `ValueType.UNKNOWN` instead of `None`

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Handle mix of `null` and `non-null` values better

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Handle entity row type inference when Protobuf values used

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Fix typo

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Make test config generate more clear

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Add TODO

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Be more strict about online entity type consistency

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Rename variable to be more precise

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>

* Add `TODO: Add test where all lists are empty`

Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
  • Loading branch information
judahrand authored Sep 21, 2021
1 parent 21f1ef7 commit 7d177b6
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 91 deletions.
2 changes: 2 additions & 0 deletions sdk/python/feast/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def __init__(
self._name = name
if not isinstance(dtype, ValueType):
raise ValueError("dtype is not a valid ValueType")
if dtype is ValueType.UNKNOWN:
raise ValueError(f"dtype cannot be {dtype}")
self._dtype = dtype
if labels is None:
self._labels = dict()
Expand Down
54 changes: 39 additions & 15 deletions sdk/python/feast/online_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, List, cast
from collections import defaultdict
from typing import Any, Dict, List, Optional, cast

import pandas as pd

Expand All @@ -23,10 +24,12 @@
)
from feast.protos.feast.types.Value_pb2 import Value as Value
from feast.type_map import (
_proto_str_to_value_type,
_python_value_to_proto_value,
feast_value_type_to_python_type,
python_type_to_feast_value_type,
python_values_to_feast_value_type,
)
from feast.value_type import ValueType


class OnlineResponse:
Expand Down Expand Up @@ -93,26 +96,47 @@ def _infer_online_entity_rows(

entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows)
entity_row_list = []
entity_type_map = dict()
entity_type_map: Dict[str, Optional[ValueType]] = dict()
entity_python_values_map = defaultdict(list)

# Flatten keys-value dicts into lists for type inference
for entity in entity_rows_dicts:
for key, value in entity.items():
if isinstance(value, Value):
inferred_type = _proto_str_to_value_type(str(value.WhichOneof("val")))
# If any ProtoValues were present their types must all be the same
if key in entity_type_map and entity_type_map.get(key) != inferred_type:
raise TypeError(
f"Input entity {key} has mixed types, {entity_type_map.get(key)} and {inferred_type}. That is not allowed."
)
entity_type_map[key] = inferred_type
else:
entity_python_values_map[key].append(value)

# Loop over all entities to infer dtype first in case of empty lists or nulls
for key, values in entity_python_values_map.items():
inferred_type = python_values_to_feast_value_type(key, values)

# If any ProtoValues were present their types must match the inferred type
if key in entity_type_map and entity_type_map.get(key) != inferred_type:
raise TypeError(
f"Input entity {key} has mixed types, {entity_type_map.get(key)} and {inferred_type}. That is not allowed."
)

entity_type_map[key] = inferred_type

for entity in entity_rows_dicts:
fields = {}
for key, value in entity.items():
# Allow for feast.types.Value
if key not in entity_type_map:
raise ValueError(
f"field {key} cannot have all null values for type inference."
)

if isinstance(value, Value):
proto_value = value
else:
# Infer the specific type for this row
current_dtype = python_type_to_feast_value_type(name=key, value=value)

if key not in entity_type_map:
entity_type_map[key] = current_dtype
else:
if current_dtype != entity_type_map[key]:
raise TypeError(
f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. "
)
proto_value = _python_value_to_proto_value(current_dtype, value)
proto_value = _python_value_to_proto_value(entity_type_map[key], value)
fields[key] = proto_value
entity_row_list.append(GetOnlineFeaturesRequestV2.EntityRow(fields=fields))
return entity_row_list
53 changes: 39 additions & 14 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,28 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any:
field_value_dict = MessageToDict(field_value_proto)

for k, v in field_value_dict.items():
if "List" in k:
val = v.get("val", [])
else:
val = v

if k == "int64Val":
return int(v)
return int(val)
if k == "bytesVal":
return bytes(v)
return bytes(val)
if (k == "int64ListVal") or (k == "int32ListVal"):
return [int(item) for item in v["val"]]
return [int(item) for item in val]
if (k == "floatListVal") or (k == "doubleListVal"):
return [float(item) for item in v["val"]]
return [float(item) for item in val]
if k == "stringListVal":
return [str(item) for item in v["val"]]
return [str(item) for item in val]
if k == "bytesListVal":
return [bytes(item) for item in v["val"]]
return [bytes(item) for item in val]
if k == "boolListVal":
return [bool(item) for item in v["val"]]
return [bool(item) for item in val]

if k in ["int32Val", "floatVal", "doubleVal", "stringVal", "boolVal"]:
return v
return val
else:
raise TypeError(
f"Casting to Python native type for type {k} failed. "
Expand Down Expand Up @@ -144,9 +149,9 @@ def python_type_to_feast_value_type(
common_item_value_type = None
for item in list_items:
if isinstance(item, ProtoValue):
current_item_value_type = _proto_str_to_value_type(
str(item.WhichOneof("val"))
)
current_item_value_type: Optional[
ValueType
] = _proto_str_to_value_type(str(item.WhichOneof("val")))
else:
# Get the type from the current item, only one level deep
current_item_value_type = python_type_to_feast_value_type(
Expand All @@ -164,9 +169,7 @@ def python_type_to_feast_value_type(
)
common_item_value_type = current_item_value_type
if common_item_value_type is None:
raise ValueError(
f"field {name} cannot have null values for type inference."
)
return ValueType.UNKNOWN
return ValueType[common_item_value_type.name + "_LIST"]
else:
assert value
Expand All @@ -180,6 +183,28 @@ def python_type_to_feast_value_type(
return type_map[value.dtype.__str__()]


def python_values_to_feast_value_type(name: str, values: Any, recurse: bool = True):
inferred_dtype = ValueType.UNKNOWN
for row in values:
current_dtype = python_type_to_feast_value_type(
name, value=row, recurse=recurse
)

if inferred_dtype is ValueType.UNKNOWN:
inferred_dtype = current_dtype
else:
if current_dtype != inferred_dtype and current_dtype != ValueType.UNKNOWN:
raise TypeError(
f"Input entity {name} has mixed types, {current_dtype} and {inferred_dtype}. That is not allowed. "
)
if inferred_dtype is ValueType.UNKNOWN:
raise ValueError(
f"field {name} cannot have all null values for type inference."
)

return inferred_dtype


def _type_err(item, dtype):
raise ValueError(f'Value "{item}" is of type {type(item)} not of type {dtype}')

Expand Down
14 changes: 11 additions & 3 deletions sdk/python/tests/data/data_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ def create_dataset(
entity_type: ValueType = ValueType.INT32,
feature_dtype: str = None,
feature_is_list: bool = False,
list_has_empty_list: bool = False,
) -> pd.DataFrame:
now = datetime.now().replace(microsecond=0, second=0, minute=0)
ts = pd.Timestamp(now).round("ms")
data = {
"driver_id": get_entities_for_value_type(entity_type),
"value": get_feature_values_for_dtype(feature_dtype, feature_is_list),
"value": get_feature_values_for_dtype(
feature_dtype, feature_is_list, list_has_empty_list
),
"ts_1": [
ts - timedelta(hours=4),
ts,
Expand Down Expand Up @@ -44,7 +47,9 @@ def get_entities_for_value_type(value_type: ValueType) -> List:
return value_type_map[value_type]


def get_feature_values_for_dtype(dtype: str, is_list: bool) -> List:
def get_feature_values_for_dtype(
dtype: str, is_list: bool, has_empty_list: bool
) -> List:
if dtype is None:
return [0.1, None, 0.3, 4, 5]
# TODO(adchia): for int columns, consider having a better error when dealing with None values (pandas int dfs can't
Expand All @@ -57,8 +62,11 @@ def get_feature_values_for_dtype(dtype: str, is_list: bool) -> List:
"bool": [True, None, False, True, False],
}
non_list_val = dtype_map[dtype]
# Duplicate the value once if this is a list
if is_list:
# TODO: Add test where all lists are empty and type inference is expected to fail.
if has_empty_list:
# Need at least one non-empty element for type inference
return [[] for n in non_list_val[:-1]] + [non_list_val[-1:]]
return [[n, n] if n is not None else None for n in non_list_val]
else:
return non_list_val
Loading

0 comments on commit 7d177b6

Please sign in to comment.