diff --git a/docs/reference/feature-servers/go-feature-retrieval.md b/docs/reference/feature-servers/go-feature-retrieval.md index 999a142c07..415817dd85 100644 --- a/docs/reference/feature-servers/go-feature-retrieval.md +++ b/docs/reference/feature-servers/go-feature-retrieval.md @@ -10,7 +10,7 @@ The Go Feature Retrieval component currently only supports Redis and Sqlite as o ## Installation -As long as you are running macOS or linux x86 with python version 3.7-3.10, the go component comes pre-compiled when you run install feast. +As long as you are running macOS or linux, on x86, with python version 3.7-3.10, the go component comes pre-compiled when you install feast. For developers, if you want to build from source, run `make compile-go-lib` to build and compile the go server. diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index cb6e874f8a..a7d8b25abf 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -299,11 +299,25 @@ def evaluate_offline_job(): if created_timestamp_column else [event_timestamp_column] ) + # try-catch block is added to deal with this issue https://github.com/dask/dask/issues/8939. + # TODO(kevjumba): remove try catch when fix is merged upstream in Dask. + try: + if created_timestamp_column: + source_df = source_df.sort_values(by=created_timestamp_column,) + + source_df = source_df.sort_values(by=event_timestamp_column) + + except ZeroDivisionError: + # Use 1 partition to get around case where everything in timestamp column is the same so the partition algorithm doesn't + # try to divide by zero. + if created_timestamp_column: + source_df = source_df.sort_values( + by=created_timestamp_column, npartitions=1 + ) - if created_timestamp_column: - source_df = source_df.sort_values(by=created_timestamp_column) - - source_df = source_df.sort_values(by=event_timestamp_column) + source_df = source_df.sort_values( + by=event_timestamp_column, npartitions=1 + ) source_df = source_df[ (source_df[event_timestamp_column] >= start_date)