Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade Pandas dependency to 2.1 #31185

Merged
merged 22 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ea0fc6c
Upgrade to Pandas 2.1
tvalentyn Apr 2, 2024
11cc50d
Pandas 2.1: Disable interchange protocol tests.
tvalentyn Apr 26, 2024
f9143af
Exclude attrs tests as it is not supported.
tvalentyn Apr 29, 2024
c036ef0
Exclude new doctests that exercise unsupported order-sensitive ops.
tvalentyn Apr 29, 2024
97d6528
Iteration over deferred DFs is not supported
tvalentyn Apr 30, 2024
a829683
Skip 'mul' op when index is used as an axis
tvalentyn Apr 30, 2024
ee1dd1e
Exclude new tests that use index.
tvalentyn Apr 30, 2024
bae7aea
Exclude shift test as order-sensitive.
tvalentyn Apr 30, 2024
1707fcb
Exclude known failure modes.
tvalentyn May 1, 2024
294391c
Exclude failures that existed on Pandas 1.
tvalentyn May 1, 2024
e5e7ba9
Allow bulk-exclusion of an example in all tests.
tvalentyn May 1, 2024
facf186
Exclude examples that use to_timedelta.
tvalentyn May 1, 2024
a922486
Exclude the test that evaluates an inferred .tz value.
tvalentyn May 1, 2024
7c6ce08
Exclude more tz and timedelta tests.
tvalentyn May 2, 2024
d364520
Exclude a tests exercision PeriodProporties.end_time
tvalentyn May 2, 2024
8186053
Exclude tests exercising unsupported GroupBy operations.
tvalentyn May 3, 2024
ba50de0
Expand the list of elementwise string methods.
tvalentyn May 3, 2024
bec644a
Exclude known WontImpl ops
tvalentyn May 3, 2024
ee96722
Fix test output normalization.
tvalentyn May 3, 2024
f7e3cce
Exclude remaining new tests that didn't work
tvalentyn May 3, 2024
5b41fb1
Remove test that uses values, an unsupported non-deferred op.
tvalentyn May 4, 2024
59bd2b5
lint
tvalentyn May 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions sdks/python/apache_beam/dataframe/doctests.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ def concat(values):

def fix(self, want, got):
if 'DeferredBase' in got:
# When we have a tuple of Dataframes, pandas prints each from a new line.
got = re.sub(r'DeferredBase\[(\d+)\],', '\\g<0>\n', got)
try:
to_compute = {
m.group(0): self._env._all_frames[int(m.group(1))]
Expand Down Expand Up @@ -381,20 +383,23 @@ def to_callable(cond):
self._skipped_set = set()

def _is_wont_implement_ok(self, example, test):
always_wont_implement = self._wont_implement_ok.get('*', [])
return any(
wont_implement(example)
for wont_implement in self._wont_implement_ok.get(test.name, []))
wont_implement(example) for wont_implement in (
self._wont_implement_ok.get(test.name, []) + always_wont_implement))

def _is_not_implemented_ok(self, example, test):
always_not_impl = self._not_implemented_ok.get('*', [])
return any(
not_implemented(example)
for not_implemented in self._not_implemented_ok.get(test.name, []))
not_implemented(example) for not_implemented in (
self._not_implemented_ok.get(test.name, []) + always_not_impl))

def run(self, test, **kwargs):
self._checker.reset()
always_skip = self._skip.get('*', [])
for example in test.examples:
if any(should_skip(example)
for should_skip in self._skip.get(test.name, [])):
for should_skip in self._skip.get(test.name, []) + always_skip):
self._skipped_set.add(example)
example.source = 'pass'
example.want = ''
Expand Down Expand Up @@ -726,6 +731,7 @@ def wrapper(fn):
verify the examples, else use PartitioningSession to simulate
distributed execution.
skip (Dict[str,str]): A set of examples to skip entirely.
If a key is '*', an example will be skipped in all test scenarios.
wont_implement_ok (Dict[str,str]): A set of examples that are allowed to
raise WontImplementError.
not_implemented_ok (Dict[str,str]): A set of examples that are allowed to
Expand Down
19 changes: 17 additions & 2 deletions sdks/python/apache_beam/dataframe/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,8 +1181,11 @@ def _set_index(self, value):
pd.DataFrame, 'hist', reason="plotting-tools")

attrs = property(
frame_base.wont_implement_method(
pd.DataFrame, 'attrs', reason='experimental'))
fget=frame_base.wont_implement_method(
pd.DataFrame, 'attrs', reason='experimental'),
fset=frame_base.wont_implement_method(
pd.DataFrame, 'attrs', reason='experimental'),
)

reorder_levels = frame_base._proxy_method(
'reorder_levels',
Expand Down Expand Up @@ -5124,13 +5127,18 @@ def rsplit(self, **kwargs):
ELEMENTWISE_STRING_METHODS = [
'capitalize',
'casefold',
'center',
'contains',
'count',
'decode',
'encode',
'endswith',
'extract',
'find',
'findall',
'fullmatch',
'get',
'index',
'isalnum',
'isalpha',
'isdecimal',
Expand All @@ -5142,22 +5150,29 @@ def rsplit(self, **kwargs):
'isupper',
'join',
'len',
'lfind',
'ljust',
'lower',
'lstrip',
'match',
'normalize',
'pad',
'partition',
'removeprefix',
'removesuffix',
'replace',
'rpartition',
'rfind',
'rindex',
'rjust',
'rstrip',
'slice',
'slice_replace',
'startswith',
'strip',
'swapcase',
'title',
'translate',
'upper',
'wrap',
'zfill',
Expand Down
14 changes: 14 additions & 0 deletions sdks/python/apache_beam/dataframe/frames_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from apache_beam.dataframe import frame_base
from apache_beam.dataframe import frames
from apache_beam.dataframe.convert import to_dataframe
from apache_beam.dataframe.doctests import teststring
from apache_beam.runners.interactive import interactive_beam as ib
from apache_beam.runners.interactive import interactive_environment as ie
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
Expand Down Expand Up @@ -363,6 +364,19 @@ def new_column(df):
})
self._run_inplace_test(new_column, df)

def test_tz_with_utc_zone_set_explicitly(self):
test = """
>>> s = pd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+03:00"])
>>> s = pd.to_datetime(s, utc=True)
>>> s
0 2020-01-01 10:00:00+00:00
1 2020-02-01 08:00:00+00:00
dtype: datetime64[ns, UTC]
>>> s.dt.tz
datetime.timezone.utc
"""
teststring(test)

def test_tz_localize_ambiguous_series(self):
# This replicates a tz_localize doctest:
# s.tz_localize('CET', ambiguous=np.array([True, True, False]))
Expand Down
Loading
Loading