Skip to content

Commit

Permalink
Fix convert_dtypes with convert_integer=False/convert_floating=True (#…
Browse files Browse the repository at this point in the history
…15964)

If `convert_integer=False`, there should be no attempt to convert to integer

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15964
  • Loading branch information
mroeschke authored Jul 15, 2024
1 parent ceb73d9 commit 04330f2
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 15 deletions.
34 changes: 19 additions & 15 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6235,13 +6235,13 @@ def rank(

def convert_dtypes(
self,
infer_objects=True,
convert_string=True,
convert_integer=True,
convert_boolean=True,
convert_floating=True,
infer_objects: bool = True,
convert_string: bool = True,
convert_integer: bool = True,
convert_boolean: bool = True,
convert_floating: bool = True,
dtype_backend=None,
):
) -> Self:
"""
Convert columns to the best possible nullable dtypes.
Expand All @@ -6252,17 +6252,21 @@ def convert_dtypes(
All other dtypes are always returned as-is as all dtypes in
cudf are nullable.
"""
result = self.copy()

if convert_floating:
# cast any floating columns to int64 if
# they are all integer data:
for name, col in result._data.items():
if not (convert_floating and convert_integer):
return self.copy()
else:
cols = []
for col in self._columns:
if col.dtype.kind == "f":
col = col.fillna(0)
if cp.allclose(col, col.astype("int64")):
result._data[name] = col.astype("int64")
return result
as_int = col.astype("int64")
if cp.allclose(col, as_int):
cols.append(as_int)
continue
cols.append(col)
return self._from_data_like_self(
self._data._from_columns_like_self(cols, verify=False)
)

@_warn_no_dask_cudf
def __dask_tokenize__(self):
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/tests/series/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,18 @@ def test_convert_dtypes(data, dtype):
assert_eq(expect, got)


def test_convert_integer_false_convert_floating_true():
data = [1.000000000000000000000000001, 1]
expected = pd.Series(data).convert_dtypes(
convert_integer=False, convert_floating=True
)
result = (
cudf.Series(data)
.convert_dtypes(convert_integer=False, convert_floating=True)
.to_pandas(nullable=True)
)
assert_eq(result, expected)


# Now write the same test, but construct a DataFrame
# as input instead of parametrizing:

0 comments on commit 04330f2

Please sign in to comment.