Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate ignoring empty objects in concat #14672

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,9 @@ def _init_from_series_list(self, data, columns, index):

transpose = self.T
else:
concat_df = cudf.concat(data, axis=1)
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
concat_df = cudf.concat(data, axis=1)

cols = concat_df._data.to_pandas_index()
if cols.dtype == "object":
Expand Down Expand Up @@ -1920,9 +1922,11 @@ def _get_renderable_dataframe(self):
lower_left = self.tail(lower_rows).iloc[:, :left_cols]
lower_right = self.tail(lower_rows).iloc[:, right_cols:]

upper = cudf.concat([upper_left, upper_right], axis=1)
lower = cudf.concat([lower_left, lower_right], axis=1)
output = cudf.concat([upper, lower])
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
upper = cudf.concat([upper_left, upper_right], axis=1)
lower = cudf.concat([lower_left, lower_right], axis=1)
output = cudf.concat([upper, lower])

output = self._clean_nulls_from_dataframe(output)
output._index = output._index._clean_nulls_from_index()
Expand Down Expand Up @@ -5154,14 +5158,17 @@ def describe(
None,
)

return cudf.concat(
[
series.reindex(names, copy=False)
for series in describe_series_list
],
axis=1,
sort=False,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
res = cudf.concat(
[
series.reindex(names, copy=False)
for series in describe_series_list
],
axis=1,
sort=False,
)
return res

@_cudf_nvtx_annotate
def to_pandas(self, *, nullable: bool = False) -> pd.DataFrame:
Expand Down Expand Up @@ -6258,7 +6265,10 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
if len(mode_results) == 0:
return DataFrame()

df = cudf.concat(mode_results, axis=1)
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
df = cudf.concat(mode_results, axis=1)

if isinstance(df, Series):
df = df.to_frame()

Expand Down
12 changes: 9 additions & 3 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,13 +1319,17 @@ def _post_process_chunk_results(
# group is a row-like "Series" where the index labels
# are the same as the original calling DataFrame
if _is_row_of(chunk_results[0], self.obj):
result = cudf.concat(chunk_results, axis=1).T
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
result = cudf.concat(chunk_results, axis=1).T
result.index = group_names
result.index.names = self.grouping.names
# When the UDF is like df.x + df.y, the result for each
# group is the same length as the original group
elif len(self.obj) == sum(len(chk) for chk in chunk_results):
result = cudf.concat(chunk_results)
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
result = cudf.concat(chunk_results)
index_data = group_keys._data.copy(deep=True)
index_data[None] = grouped_values.index._column
result.index = cudf.MultiIndex._from_data(index_data)
Expand All @@ -1336,7 +1340,9 @@ def _post_process_chunk_results(
f"type {type(chunk_results[0])}"
)
else:
result = cudf.concat(chunk_results)
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
result = cudf.concat(chunk_results)
if self._group_keys:
index_data = group_keys._data.copy(deep=True)
index_data[None] = grouped_values.index._column
Expand Down
14 changes: 13 additions & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,16 @@ def _values(self):
@_cudf_nvtx_annotate
def _concat(cls, objs):
non_empties = [index for index in objs if len(index)]
if len(objs) != len(non_empties):
# Do not remove until pandas-3.0 support is added.
warnings.warn(
"The behavior of array concatenation with empty entries is "
"deprecated. In a future version, this will no longer exclude "
"empty items when determining the result dtype. "
"To retain the old behavior, exclude the empty entries before "
"the concat operation.",
FutureWarning,
)
if all(isinstance(obj, RangeIndex) for obj in non_empties):
result = _concat_range_index(non_empties)
else:
Expand Down Expand Up @@ -1300,7 +1310,9 @@ def __repr__(self):
top = self[0:mr]
bottom = self[-1 * mr :]

preprocess = cudf.concat([top, bottom])
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
preprocess = cudf.concat([top, bottom])
else:
preprocess = self

Expand Down
9 changes: 6 additions & 3 deletions python/cudf/cudf/core/join/_join_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import warnings
from collections import abc
from typing import TYPE_CHECKING, Any, Tuple, cast

Expand Down Expand Up @@ -170,9 +171,11 @@ def _match_categorical_dtypes_both(
return lcol, rcol.astype(ltype)
else:
# merge categories
merged_categories = cudf.concat(
[ltype.categories, rtype.categories]
).unique()
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
merged_categories = cudf.concat(
[ltype.categories, rtype.categories]
).unique()
common_type = cudf.CategoricalDtype(
categories=merged_categories, ordered=False
)
Expand Down
21 changes: 12 additions & 9 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numbers
import operator
import pickle
import warnings
from collections import abc
from functools import cached_property
from numbers import Integral
Expand Down Expand Up @@ -717,15 +718,17 @@ def _compute_validity_mask(self, index, row_tuple, max_length):
continue
lookup[i] = cudf.Series(row)
frame = cudf.DataFrame(dict(enumerate(index._data.columns)))
data_table = cudf.concat(
[
frame,
cudf.DataFrame(
{"idx": cudf.Series(column.arange(len(frame)))}
),
],
axis=1,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
data_table = cudf.concat(
[
frame,
cudf.DataFrame(
{"idx": cudf.Series(column.arange(len(frame)))}
),
],
axis=1,
)
# Sort indices in pandas compatible mode
# because we want the indices to be fetched
# in a deterministic order.
Expand Down
14 changes: 13 additions & 1 deletion python/cudf/cudf/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Dict, Optional

import cupy
import warnings
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -320,9 +321,20 @@ def concat(objs, axis=0, join="outer", ignore_index=False, sort=None):
df = cudf.DataFrame()
_normalize_series_and_dataframe(objs, axis=axis)

any_empty = any(obj.empty for obj in objs)
if any_empty:
# Do not remove until pandas-3.0 support is added.
warnings.warn(
"The behavior of array concatenation with empty entries is "
"deprecated. In a future version, this will no longer exclude "
"empty items when determining the result dtype. "
"To retain the old behavior, exclude the empty entries before "
"the concat operation.",
FutureWarning,
)
# Inner joins involving empty data frames always return empty dfs, but
# We must delay returning until we have set the column names.
empty_inner = any(obj.empty for obj in objs) and join == "inner"
empty_inner = any_empty and join == "inner"

objs = [obj for obj in objs if obj.shape != (0, 0)]

Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,9 @@ def __repr__(self):
if max_rows not in (0, None) and len(self) > max_rows:
top = self.head(int(max_rows / 2 + 1))
bottom = self.tail(int(max_rows / 2 + 1))
preprocess = cudf.concat([top, bottom])
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
preprocess = cudf.concat([top, bottom])
else:
preprocess = self.copy()
preprocess.index = preprocess.index._clean_nulls_from_index()
Expand Down
16 changes: 9 additions & 7 deletions python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,13 +794,15 @@ def _parquet_to_frame(
dtype=_dtype,
)

# Concatenate dfs and return.
# Assume we can ignore the index if it has no name.
return (
cudf.concat(dfs, ignore_index=dfs[-1].index.name is None)
if len(dfs) > 1
else dfs[0]
)
if len(dfs) > 1:
# Concatenate dfs and return.
# Assume we can ignore the index if it has no name.
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
res = cudf.concat(dfs, ignore_index=dfs[-1].index.name is None)
return res
else:
return dfs[0]


@_cudf_nvtx_annotate
Expand Down
Loading
Loading