diff --git a/docs/source/conf.py b/docs/source/conf.py index a6c6ee3f..f03dbcd6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -66,11 +66,11 @@ extlinks = { "pandas_api_docs": ( - "https://pandas.pydata.org/pandas-docs/version/0.25.3/reference/api/%s.html", + "https://pandas.pydata.org/pandas-docs/stable/reference/api/%s.html", "", ), "pandas_user_guide": ( - "https://pandas.pydata.org/pandas-docs/version/0.25.3/user_guide/%s.html", + "https://pandas.pydata.org/pandas-docs/stable/user_guide/%s.html", "Pandas User Guide/", ), "es_api_docs": ( diff --git a/docs/source/reference/api/eland.Series.isna.rst b/docs/source/reference/api/eland.Series.isna.rst new file mode 100644 index 00000000..3dcf7b93 --- /dev/null +++ b/docs/source/reference/api/eland.Series.isna.rst @@ -0,0 +1,6 @@ +eland.Series.isna +================== + +.. currentmodule:: eland + +.. automethod:: Series.isna diff --git a/docs/source/reference/api/eland.Series.notna.rst b/docs/source/reference/api/eland.Series.notna.rst new file mode 100644 index 00000000..0167e08e --- /dev/null +++ b/docs/source/reference/api/eland.Series.notna.rst @@ -0,0 +1,6 @@ +eland.Series.notna +================== + +.. currentmodule:: eland + +.. automethod:: Series.notna diff --git a/docs/source/reference/series.rst b/docs/source/reference/series.rst index 3ab6889a..0c834ec6 100644 --- a/docs/source/reference/series.rst +++ b/docs/source/reference/series.rst @@ -74,6 +74,8 @@ Reindexing / selection / label manipulation :toctree: api/ Series.rename + Series.isna + Series.notna Plotting ~~~~~~~~ diff --git a/eland/filter.py b/eland/filter.py index 2f5800c0..5197cfdf 100644 --- a/eland/filter.py +++ b/eland/filter.py @@ -141,7 +141,7 @@ def __init__(self, field: str, value: str) -> None: class IsNull(BooleanFilter): def __init__(self, field: str) -> None: super().__init__() - self._filter = {"missing": {"field": field}} + self._filter = {"bool": {"must_not": {"exists": {"field": field}}}} class NotNull(BooleanFilter): diff --git a/eland/series.py b/eland/series.py index 4343d840..c247f988 100644 --- a/eland/series.py +++ b/eland/series.py @@ -39,6 +39,8 @@ LessEqual, ScriptFilter, IsIn, + IsNull, + NotNull, ) @@ -468,6 +470,41 @@ def isin(self, other): else: raise NotImplementedError(other, type(other)) + def isna(self): + """ + Detect missing values. + + Returns + ------- + eland.Series + Mask of bool values for each element in Series that indicates whether an element is not an NA value. + + See Also + -------- + :pandas_api_docs:`pandas.Series.isna` + """ + return IsNull(field=self.name) + + isnull = isna + + def notna(self): + """ + Detect existing (non-missing) values. + + Returns + ------- + eland.Series + Mask of bool values for each element in Series that indicates whether an element is not an NA value + + See Also + -------- + :pandas_api_docs:`pandas.Series.notna` + + """ + return NotNull(field=self.name) + + notnull = notna + @property def ndim(self): """ diff --git a/eland/tests/operators/test_operators_pytest.py b/eland/tests/operators/test_operators_pytest.py index 6539de47..ead670e1 100644 --- a/eland/tests/operators/test_operators_pytest.py +++ b/eland/tests/operators/test_operators_pytest.py @@ -31,7 +31,7 @@ def test_leaf_boolean_filter(self): assert Like("a", "a*b").build() == {"wildcard": {"a": "a*b"}} assert Rlike("a", "a*b").build() == {"regexp": {"a": "a*b"}} assert Startswith("a", "jj").build() == {"prefix": {"a": "jj"}} - assert IsNull("a").build() == {"missing": {"field": "a"}} + assert IsNull("a").build() == {"bool": {"must_not": {"exists": {"field": "a"}}}} assert NotNull("a").build() == {"exists": {"field": "a"}} assert ScriptFilter( 'doc["num1"].value > params.param1', lang="painless", params={"param1": 5} diff --git a/eland/tests/series/test_na_pytest.py b/eland/tests/series/test_na_pytest.py new file mode 100644 index 00000000..7ec9e4e7 --- /dev/null +++ b/eland/tests/series/test_na_pytest.py @@ -0,0 +1,44 @@ +# Licensed to Elasticsearch B.V under one or more agreements. +# Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +# See the LICENSE file in the project root for more information + +from eland import eland_to_pandas +from eland.tests.common import TestData +from eland.tests.common import assert_pandas_eland_frame_equal + + +class TestSeriesNA(TestData): + columns = [ + "currency", + "customer_full_name", + "geoip.country_iso_code", + "geoip.region_name", + ] + + def test_not_isna(self): + ed_ecommerce = self.ed_ecommerce() + pd_ecommerce = eland_to_pandas(ed_ecommerce) + + for column in self.columns: + not_isna_ed_ecommerce = ed_ecommerce[~ed_ecommerce[column].isna()] + not_isna_pd_ecommerce = pd_ecommerce[~pd_ecommerce[column].isna()] + assert_pandas_eland_frame_equal( + not_isna_pd_ecommerce, not_isna_ed_ecommerce + ) + + def test_isna(self): + ed_ecommerce = self.ed_ecommerce() + pd_ecommerce = eland_to_pandas(ed_ecommerce) + + isna_ed_ecommerce = ed_ecommerce[ed_ecommerce["geoip.region_name"].isna()] + isna_pd_ecommerce = pd_ecommerce[pd_ecommerce["geoip.region_name"].isna()] + assert_pandas_eland_frame_equal(isna_pd_ecommerce, isna_ed_ecommerce) + + def test_notna(self): + ed_ecommerce = self.ed_ecommerce() + pd_ecommerce = eland_to_pandas(ed_ecommerce) + + for column in self.columns: + notna_ed_ecommerce = ed_ecommerce[ed_ecommerce[column].notna()] + notna_pd_ecommerce = pd_ecommerce[pd_ecommerce[column].notna()] + assert_pandas_eland_frame_equal(notna_pd_ecommerce, notna_ed_ecommerce)