From d91d121e4405d094a4160b8dd51ac12c1a8158f2 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sat, 18 Apr 2020 15:15:19 +0200 Subject: [PATCH 1/4] add default dtype for empty series --- eland/operations.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/eland/operations.py b/eland/operations.py index d9734aee..1d463bf2 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -40,6 +40,10 @@ SizeTask, ) +with warnings.catch_warnings() as w: + warnings.simplefilter("ignore") + EMPTY_SERIES_DTYPE = pd.Series().dtype + class Operations: """ @@ -340,7 +344,8 @@ def _metric_aggs( # Return single value if this is a series # if len(numeric_source_fields) == 1: # return np.float64(results[numeric_source_fields[0]]) - s = pd.Series(data=results, index=results.keys()) + out_dtype = EMPTY_SERIES_DTYPE if not results else None + s = pd.Series(data=results, index=results.keys(), dtype=out_dtype) return s @@ -390,7 +395,8 @@ def _terms_aggs(self, query_compiler, func, es_size=None): except IndexError: name = None - s = pd.Series(data=results, index=results.keys(), name=name) + out_dtype = EMPTY_SERIES_DTYPE if not results else None + s = pd.Series(data=results, index=results.keys(), name=name, dtype=out_dtype) return s From 479755076fadfa55c27023da1f5e8c20b760c0c6 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sat, 18 Apr 2020 17:44:49 +0200 Subject: [PATCH 2/4] add function for building series, add test --- eland/operations.py | 12 ++++++++---- eland/tests/dataframe/test_dtypes_pytest.py | 5 +++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/eland/operations.py b/eland/operations.py index 1d463bf2..340fbbad 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -45,6 +45,12 @@ EMPTY_SERIES_DTYPE = pd.Series().dtype +def build_series(data, dtype=None, **kwargs): + out_dtype = EMPTY_SERIES_DTYPE if not data else dtype + s = pd.Series(data=data, index=data.keys(), dtype=out_dtype, **kwargs) + return s + + class Operations: """ A collector of the queries and selectors we apply to queries to return the appropriate results. @@ -344,8 +350,7 @@ def _metric_aggs( # Return single value if this is a series # if len(numeric_source_fields) == 1: # return np.float64(results[numeric_source_fields[0]]) - out_dtype = EMPTY_SERIES_DTYPE if not results else None - s = pd.Series(data=results, index=results.keys(), dtype=out_dtype) + s = build_series(results) return s @@ -395,8 +400,7 @@ def _terms_aggs(self, query_compiler, func, es_size=None): except IndexError: name = None - out_dtype = EMPTY_SERIES_DTYPE if not results else None - s = pd.Series(data=results, index=results.keys(), name=name, dtype=out_dtype) + s = build_series(results, name=name) return s diff --git a/eland/tests/dataframe/test_dtypes_pytest.py b/eland/tests/dataframe/test_dtypes_pytest.py index 1cbb14df..819109f9 100644 --- a/eland/tests/dataframe/test_dtypes_pytest.py +++ b/eland/tests/dataframe/test_dtypes_pytest.py @@ -17,6 +17,7 @@ import numpy as np from pandas.testing import assert_series_equal +from eland.operations import build_series, EMPTY_SERIES_DTYPE from eland.tests.common import TestData from eland.tests.common import assert_pandas_eland_frame_equal @@ -42,3 +43,7 @@ def test_flights_select_dtypes(self): pd_flights.select_dtypes(include=np.number), ed_flights.select_dtypes(include=np.number), ) + + def test_emtpy_series_dtypes(self): + s = build_series({}) + assert s.dtype == EMPTY_SERIES_DTYPE From c860be402c371f120986fa95c7acc6621916a263 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sun, 19 Apr 2020 09:40:46 +0200 Subject: [PATCH 3/4] remove unused variable, capture warnings with record = True --- eland/operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eland/operations.py b/eland/operations.py index 340fbbad..19706959 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -40,7 +40,7 @@ SizeTask, ) -with warnings.catch_warnings() as w: +with warnings.catch_warnings(record=True): warnings.simplefilter("ignore") EMPTY_SERIES_DTYPE = pd.Series().dtype From d695c82aa748085d034418115b7dd75f6cacf821 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Sun, 19 Apr 2020 14:26:26 +0200 Subject: [PATCH 4/4] add empty warning assert --- eland/operations.py | 2 +- eland/tests/dataframe/test_dtypes_pytest.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/eland/operations.py b/eland/operations.py index 19706959..aa344ff4 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -40,7 +40,7 @@ SizeTask, ) -with warnings.catch_warnings(record=True): +with warnings.catch_warnings(): warnings.simplefilter("ignore") EMPTY_SERIES_DTYPE = pd.Series().dtype diff --git a/eland/tests/dataframe/test_dtypes_pytest.py b/eland/tests/dataframe/test_dtypes_pytest.py index 819109f9..2f23d479 100644 --- a/eland/tests/dataframe/test_dtypes_pytest.py +++ b/eland/tests/dataframe/test_dtypes_pytest.py @@ -14,6 +14,7 @@ # File called _pytest for PyCharm compatability +import warnings import numpy as np from pandas.testing import assert_series_equal @@ -45,5 +46,7 @@ def test_flights_select_dtypes(self): ) def test_emtpy_series_dtypes(self): - s = build_series({}) + with warnings.catch_warnings(record=True) as w: + s = build_series({}) assert s.dtype == EMPTY_SERIES_DTYPE + assert w == []