Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve sub-second data for time scalars in column construction #15655

Merged
merged 5 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ def as_device_scalar(val, dtype=None):
def _is_null_host_scalar(slr):
if cudf.utils.utils.is_na_like(slr):
return True
elif isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr):
elif (isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr)) or \
slr is pd.NaT:
return True
else:
return False
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2163,6 +2163,19 @@ def as_column(
nan_as_null=nan_as_null,
length=length,
)
elif (
isinstance(element, (pd.Timestamp, pd.Timedelta))
or element is pd.NaT
):
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
# TODO: Remove this after
# https://github.com/apache/arrow/issues/26492
# is fixed.
return as_column(
pd.Series(arbitrary),
dtype=dtype,
nan_as_null=nan_as_null,
length=length,
)
elif not any(element is na for na in (None, pd.NA, np.nan)):
# Might have NA + element like above, but short-circuit if
# an element pyarrow/pandas might be able to parse
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ def dtypes(self):
>>> df.dtypes
float float64
int int64
datetime datetime64[us]
datetime datetime64[ns]
string object
dtype: object
"""
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ def _preprocess_host_value(self, value, dtype):

if dtype is None:
if not valid:
if value is NaT:
value = value.to_numpy()

if isinstance(value, (np.datetime64, np.timedelta64)):
unit, _ = np.datetime_data(value)
if unit == "generic":
Expand Down
28 changes: 28 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2786,3 +2786,31 @@ def test_squeeze(axis, data):
def test_squeeze_invalid_axis(axis):
with pytest.raises(ValueError):
cudf.Series([1]).squeeze(axis=axis)


@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
def test_timestamp_series_init(data):
scalar = pd.Timestamp(data)
expected = pd.Series([scalar])
actual = cudf.Series([scalar])

assert_eq(expected, actual)

expected = pd.Series(scalar)
actual = cudf.Series(scalar)

assert_eq(expected, actual)


@pytest.mark.parametrize("data", [None, 123, 33243243232423, 0])
def test_timedelta_series_init(data):
scalar = pd.Timedelta(data)
expected = pd.Series([scalar])
actual = cudf.Series([scalar])

assert_eq(expected, actual)

expected = pd.Series(scalar)
actual = cudf.Series(scalar)

assert_eq(expected, actual)
Loading