From 4ad583261751afb89d947ce79fd6d3c3252f92a7 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 8 May 2023 10:13:28 -0400 Subject: [PATCH 01/14] json --- python/cudf/cudf/tests/test_json.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index 8dcab37d20a..ccf051b581f 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -32,13 +32,11 @@ def make_numeric_dataframe(nrows, dtype): def pdf(request): types = NUMERIC_TYPES + DATETIME_TYPES + ["bool"] typer = {"col_" + val: val for val in types} - ncols = len(types) nrows = request.param # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - [list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)], - columns=pd.Index([f"col_{typ}" for typ in types], name="foo"), + {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types} ) # Delete the name of the column index, and rename the row index test_pdf.columns.name = None From 809cc6e158c8b0c0d0aff7305a19152f12c631f9 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Tue, 9 May 2023 10:26:43 -0400 Subject: [PATCH 02/14] Start making changes to support np 1.24 --- python/cudf/cudf/__init__.py | 2 ++ python/cudf/cudf/core/column/numerical.py | 4 ++-- python/cudf/cudf/tests/test_column.py | 4 ++-- python/cudf/cudf/tests/test_csv.py | 6 +++--- python/cudf/cudf/tests/test_numerical.py | 6 +++++- python/cudf/cudf/tests/test_replace.py | 13 ++++++++++--- python/cudf/cudf/utils/queryutils.py | 2 +- 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 06310e278a2..48e622acc07 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -85,6 +85,7 @@ try: from cubinlinker.patch import patch_numba_linker_if_needed + from ptxcompiler.patch import patch_numba_codegen_if_needed except ImportError: pass else: @@ -96,6 +97,7 @@ _setup_numba_linker(_PTX_FILE) + patch_numba_codegen_if_needed() del patch_numba_linker_if_needed cuda.set_memory_manager(RMMNumbaManager) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 87e73d212ef..a35efbb6ac2 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -767,8 +767,8 @@ def _normalize_find_and_replace_input( if cudf._lib.scalar._is_null_host_scalar(col_to_normalize[0]): return normalized_column.astype(input_column_dtype) else: - col_to_normalize_casted = input_column_dtype.type( - col_to_normalize[0] + col_to_normalize_casted = np.array(col_to_normalize[0]).astype( + input_column_dtype ) if not np.isnan(col_to_normalize_casted) and ( col_to_normalize_casted != col_to_normalize[0] diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py index a15afa727c0..db0446d506c 100644 --- a/python/cudf/cudf/tests/test_column.py +++ b/python/cudf/cudf/tests/test_column.py @@ -398,8 +398,8 @@ def test_column_view_string_slice(slc): cudf.core.column.as_column([], dtype="uint8"), ), ( - cp.array([453], dtype="uint8"), - cudf.core.column.as_column([453], dtype="uint8"), + cp.array([255], dtype="uint8"), + cudf.core.column.as_column([255], dtype="uint8"), ), ], ) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 4a7804da62c..25f173aeac7 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -150,8 +150,8 @@ def make_all_numeric_extremes_dataframe(): np_type = pdf_dtypes[gdf_dtype] if np.issubdtype(np_type, np.integer): itype = np.iinfo(np_type) - extremes = [0, +1, -1, itype.min, itype.max] - df[gdf_dtype] = np.array(extremes * 4, dtype=np_type)[:20] + extremes = [itype.min, itype.max] + df[gdf_dtype] = np.array(extremes * 10, dtype=np_type)[:20] else: ftype = np.finfo(np_type) extremes = [ @@ -1433,7 +1433,7 @@ def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype): gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"]) - expected = np.array(values, dtype=np_dtype) + expected = np.array(values).astype(np_dtype) actual = gdf["hex_int"].to_numpy() np.testing.assert_array_equal(expected, actual) diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py index e2fbd55c051..5bb55c164fe 100644 --- a/python/cudf/cudf/tests/test_numerical.py +++ b/python/cudf/cudf/tests/test_numerical.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -194,6 +194,7 @@ def test_to_numeric_downcast_int(data, downcast): assert_eq(expected, got) +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") @pytest.mark.parametrize( "data", [ @@ -223,6 +224,7 @@ def test_to_numeric_downcast_float(data, downcast): assert_eq(expected, got) +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") @pytest.mark.parametrize( "data", [ @@ -245,6 +247,7 @@ def test_to_numeric_downcast_large_float(data, downcast): assert_eq(expected, got) +@pytest.mark.filterwarnings("ignore:overflow encountered in cast") @pytest.mark.parametrize( "data", [ @@ -325,6 +328,7 @@ def test_to_numeric_downcast_string_float(data, downcast): assert_eq(expected, got) +@pytest.mark.filterwarnings("ignore:overflow encountered in cast") @pytest.mark.parametrize( "data", [ diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index 9e93dd6d227..13e44e7cf59 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -944,8 +944,15 @@ def test_numeric_series_replace_dtype(series_dtype, replacement): psr = pd.Series([0, 1, 2, 3, 4, 5], dtype=series_dtype) sr = cudf.from_pandas(psr) + if sr.dtype.kind in "ui": + can_replace = np.array([replacement])[0].is_integer() and np.can_cast( + int(replacement), sr.dtype + ) + else: + can_replace = np.can_cast(replacement, sr.dtype) + # Both Scalar - if sr.dtype.type(replacement) != replacement: + if not can_replace: with pytest.raises(TypeError): sr.replace(1, replacement) else: @@ -954,7 +961,7 @@ def test_numeric_series_replace_dtype(series_dtype, replacement): assert_eq(expect, got) # to_replace is a list, replacement is a scalar - if sr.dtype.type(replacement) != replacement: + if not can_replace: with pytest.raises(TypeError): sr.replace([2, 3], replacement) @@ -974,7 +981,7 @@ def test_numeric_series_replace_dtype(series_dtype, replacement): # Both lists of equal length if ( np.dtype(type(replacement)).kind == "f" and sr.dtype.kind in {"i", "u"} - ) or (sr.dtype.type(replacement) != replacement): + ) or (not can_replace): with pytest.raises(TypeError): sr.replace([2, 3], [replacement, replacement]) else: diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index 4ce89b526d6..aecbe4fd659 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -137,7 +137,7 @@ def query_compile(expr): key "args" is a sequence of name of the arguments. """ - funcid = f"queryexpr_{np.uintp(hash(expr)):x}" + funcid = f"queryexpr_{np.uintp(abs(hash(expr))):x}" # Load cache compiled = _cache.get(funcid) # Cache not found From 4e4ad8285e501e4a459dadf8dd56a96149e9e0e7 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Wed, 10 May 2023 08:35:40 -0400 Subject: [PATCH 03/14] More --- python/cudf/cudf/tests/test_feather.py | 9 +-------- python/cudf/cudf/tests/test_parquet.py | 17 ++--------------- python/cudf/cudf/tests/test_rank.py | 13 ++++++------- python/cudf/cudf/tests/test_sparse_df.py | 4 ++-- python/cudf/cudf/tests/test_unaops.py | 8 ++++++-- 5 files changed, 17 insertions(+), 34 deletions(-) diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py index 6cdf47ed948..83aaf0a4407 100644 --- a/python/cudf/cudf/tests/test_feather.py +++ b/python/cudf/cudf/tests/test_feather.py @@ -15,23 +15,16 @@ @pytest.fixture(params=[0, 1, 10, 100]) def pdf(request): types = NUMERIC_TYPES + ["bool"] - typer = {"col_" + val: val for val in types} - ncols = len(types) nrows = request.param # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - [list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)], - columns=pd.Index([f"col_{typ}" for typ in types], name="foo"), + {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types} ) # Delete the name of the column index, and rename the row index test_pdf.columns.name = None test_pdf.index.name = "index" - # Cast all the column dtypes to objects, rename them, and then cast to - # appropriate types - test_pdf = test_pdf.astype("object").astype(typer) - # Create non-numeric categorical data otherwise may get typecasted data = [ascii_letters[np.random.randint(0, 52)] for i in range(nrows)] test_pdf["col_category"] = pd.Series(data, dtype="category") diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index c24ff080033..3d3e731e7e6 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -69,14 +69,11 @@ def simple_pdf(request): "float32", "float64", ] - typer = {"col_" + val: val for val in types} - ncols = len(types) nrows = request.param # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - [list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)], - columns=pd.Index([f"col_{typ}" for typ in types], name="foo"), + {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types}, # Need to ensure that this index is not a RangeIndex to get the # expected round-tripping behavior from Parquet reader/writer. index=pd.Index(list(range(nrows))), @@ -85,10 +82,6 @@ def simple_pdf(request): test_pdf.columns.name = None test_pdf.index.name = "test_index" - # Cast all the column dtypes to objects, rename them, and then cast to - # appropriate types - test_pdf = test_pdf.astype("object").astype(typer) - return test_pdf @@ -115,13 +108,11 @@ def build_pdf(num_columns, day_resolution_timestamps): "datetime64[us]", "str", ] - typer = {"col_" + val: val for val in types} - ncols = len(types) nrows = num_columns.param # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - [list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)], + {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types}, columns=pd.Index([f"col_{typ}" for typ in types], name="foo"), # Need to ensure that this index is not a RangeIndex to get the # expected round-tripping behavior from Parquet reader/writer. @@ -131,10 +122,6 @@ def build_pdf(num_columns, day_resolution_timestamps): test_pdf.columns.name = None test_pdf.index.name = "test_index" - # Cast all the column dtypes to objects, rename them, and then cast to - # appropriate types - test_pdf = test_pdf.astype(typer) - # make datetime64's a little more interesting by increasing the range of # dates note that pandas will convert these to ns timestamps, so care is # taken to avoid overflowing a ns timestamp. There is also the ability to diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 9bd67309ece..0bccf02ce3c 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from itertools import chain, combinations_with_replacement, product @@ -125,7 +125,7 @@ def test_rank_error_arguments(pdf): np.full((3,), np.inf), np.full((3,), -np.inf), ] -sort_dtype_args = [np.int32, np.int64, np.float32, np.float64] +sort_dtype_args = [np.float32, np.float64] @pytest.mark.parametrize( @@ -139,13 +139,12 @@ def test_rank_error_arguments(pdf): ) def test_series_rank_combinations(elem, dtype): np.random.seed(0) + aa = np.fromiter(chain.from_iterable(elem), dtype=dtype) gdf = DataFrame() - gdf["a"] = aa = np.fromiter(chain.from_iterable(elem), np.float64).astype( - dtype - ) - ranked_gs = gdf["a"].rank(method="first") df = pd.DataFrame() + gdf["a"] = aa df["a"] = aa + ranked_gs = gdf["a"].rank(method="first") ranked_ps = df["a"].rank(method="first") # Check - assert_eq(ranked_ps, ranked_gs.to_pandas()) + assert_eq(ranked_ps, ranked_gs) diff --git a/python/cudf/cudf/tests/test_sparse_df.py b/python/cudf/cudf/tests/test_sparse_df.py index 0dd47c219c0..3248e7f72c0 100644 --- a/python/cudf/cudf/tests/test_sparse_df.py +++ b/python/cudf/cudf/tests/test_sparse_df.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. import numpy as np @@ -7,7 +7,7 @@ def test_to_dense_array(): data = np.random.random(8) - mask = np.asarray([0b11010110], dtype=np.byte) + mask = np.asarray([0b11010110]).astype(np.byte) sr = Series.from_masked_array(data=data, mask=mask, null_count=3) assert sr.has_nulls diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index 3f2f2072758..f821b06e07b 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import itertools import operator @@ -79,9 +79,13 @@ def generate_valid_scalar_unaop_combos(): @pytest.mark.parametrize("slr,dtype,op", generate_valid_scalar_unaop_combos()) def test_scalar_unary_operations(slr, dtype, op): - slr_host = cudf.dtype(dtype).type(slr) + slr_host = np.array([slr])[0].astype(cudf.dtype(dtype)) slr_device = cudf.Scalar(slr, dtype=dtype) + if op.__name__ == "neg" and np.dtype(dtype).kind == "u": + # TODO: what do we want to do here? + return + expect = op(slr_host) got = op(slr_device) From b94427fa081f5d31ad9379bfa684c64c1ea5148d Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 11 May 2023 14:51:28 -0400 Subject: [PATCH 04/14] Special-case inf --- python/cudf/cudf/core/column/numerical.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index a35efbb6ac2..406c4836c5b 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -766,10 +766,12 @@ def _normalize_find_and_replace_input( if len(col_to_normalize) == 1: if cudf._lib.scalar._is_null_host_scalar(col_to_normalize[0]): return normalized_column.astype(input_column_dtype) - else: - col_to_normalize_casted = np.array(col_to_normalize[0]).astype( - input_column_dtype - ) + if np.isinf(col_to_normalize[0]): + return normalized_column + col_to_normalize_casted = np.array(col_to_normalize[0]).astype( + input_column_dtype + ) + if not np.isnan(col_to_normalize_casted) and ( col_to_normalize_casted != col_to_normalize[0] ): From ba3ede72352f20b2042ea3354d9560ce1469513a Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Thu, 11 May 2023 17:54:25 -0400 Subject: [PATCH 05/14] Remove numpy upper bounds --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 + conda/recipes/cudf/meta.yaml | 2 +- dependencies.yaml | 2 +- python/cudf/pyproject.toml | 2 +- python/cudf_kafka/pyproject.toml | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 4031f1aa1c3..56463ad87cc 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -49,6 +49,7 @@ dependencies: - ninja - notebook - numba>=0.56.4,<0.57 +- numpy>=1.21 - numpy>=1.21,<1.24 - numpydoc - nvcc_linux-64=11.8 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index f8074711b88..327f7f90a82 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -66,7 +66,7 @@ requirements: - pandas >=1.3,<1.6.0dev0 - cupy >=12.0.0 - numba >=0.56.4,<0.57 - - numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations + - numpy >=1.21 - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf {{ version }} - fastavro >=0.22.0 diff --git a/dependencies.yaml b/dependencies.yaml index 70d7f8c1ec8..b78766f6743 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -234,7 +234,7 @@ dependencies: # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - pyarrow==11.0.0.* - - numpy>=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations + - numpy>=1.21 build_python: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index d13324a7404..3685e4965a1 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cmake>=3.23.1,!=3.25.0", "cython>=0.29,<0.30", "ninja", - "numpy>=1.21,<1.24", + "numpy>=1.21", "protoc-wheel", "pyarrow==11.0.0.*", "rmm==23.6.*", diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index df0825c846a..d8b97f52864 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "cython>=0.29,<0.30", - "numpy>=1.21,<1.24", + "numpy>=1.21", "pyarrow==11.0.0.*", "setuptools", "wheel", From f884eb768d65f818403c8af4b4e59a05bf538e36 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 11 May 2023 15:32:19 -0700 Subject: [PATCH 06/14] One more place to unpin numpy --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 - dependencies.yaml | 2 +- python/cudf/pyproject.toml | 2 +- python/dask_cudf/pyproject.toml | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 56463ad87cc..e0980d7263c 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -50,7 +50,6 @@ dependencies: - notebook - numba>=0.56.4,<0.57 - numpy>=1.21 -- numpy>=1.21,<1.24 - numpydoc - nvcc_linux-64=11.8 - nvtx>=0.2.1 diff --git a/dependencies.yaml b/dependencies.yaml index b78766f6743..d7c847ed1f7 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -342,7 +342,7 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - fsspec>=0.6.0 - - numpy>=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations + - numpy>=1.21 - pandas>=1.3,<1.6.0dev0 run_cudf: common: diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 3685e4965a1..2a686fe3016 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", "numba>=0.56.4,<0.57", - "numpy>=1.21,<1.24", + "numpy>=1.21", "nvtx>=0.2.1", "packaging", "pandas>=1.3,<1.6.0dev0", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index ff2a3f2d095..c6c43661e9e 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "dask==2023.3.2", "distributed==2023.3.2.1", "fsspec>=0.6.0", - "numpy>=1.21,<1.24", + "numpy>=1.21", "pandas>=1.3,<1.6.0dev0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ From 42d6a74736c947ceb78a6f3d82e0abce92a32907 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 07:46:46 -0400 Subject: [PATCH 07/14] filter warning --- python/cudf/cudf/tests/test_csv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 25f173aeac7..4a7502f729a 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -150,8 +150,8 @@ def make_all_numeric_extremes_dataframe(): np_type = pdf_dtypes[gdf_dtype] if np.issubdtype(np_type, np.integer): itype = np.iinfo(np_type) - extremes = [itype.min, itype.max] - df[gdf_dtype] = np.array(extremes * 10, dtype=np_type)[:20] + extremes = [0, +1, -1, itype.min, itype.max] + df[gdf_dtype] = np.array(extremes * 4).astype(np_type)[:20] else: ftype = np.finfo(np_type) extremes = [ @@ -324,6 +324,7 @@ def test_csv_reader_dtype_dict(use_names): assert_eq(gdf, pdf) +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") @pytest.mark.parametrize("use_names", [True, False]) def test_csv_reader_dtype_extremes(use_names): # Save with the column header if not explicitly specifying a list of names From d75986b63ca7f001d101c6a3ac0b5a8d0a619ecc Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 07:50:39 -0400 Subject: [PATCH 08/14] Actually cast columns --- python/cudf/cudf/tests/test_feather.py | 5 ++++- python/cudf/cudf/tests/test_json.py | 10 ++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py index 83aaf0a4407..12a325fa4e8 100644 --- a/python/cudf/cudf/tests/test_feather.py +++ b/python/cudf/cudf/tests/test_feather.py @@ -19,7 +19,10 @@ def pdf(request): # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types} + { + f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ) + for typ in types + } ) # Delete the name of the column index, and rename the row index test_pdf.columns.name = None diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index ccf051b581f..43b0ca0119a 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -31,21 +31,19 @@ def make_numeric_dataframe(nrows, dtype): @pytest.fixture(params=[0, 1, 10, 100]) def pdf(request): types = NUMERIC_TYPES + DATETIME_TYPES + ["bool"] - typer = {"col_" + val: val for val in types} nrows = request.param # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types} + { + f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ) + for typ in types + } ) # Delete the name of the column index, and rename the row index test_pdf.columns.name = None test_pdf.index.name = "test_index" - # Cast all the column dtypes to objects, rename them, and then cast to - # appropriate types - test_pdf = test_pdf.astype("object").astype(typer) - return test_pdf From 2dfb93969701c9f08f7663c1c7d7b97e42941a01 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 08:06:42 -0400 Subject: [PATCH 09/14] Fix parquet casting --- python/cudf/cudf/tests/test_parquet.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index c41e5d16b2b..ebd226402c0 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -73,7 +73,10 @@ def simple_pdf(request): # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types}, + { + f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ) + for typ in types + }, # Need to ensure that this index is not a RangeIndex to get the # expected round-tripping behavior from Parquet reader/writer. index=pd.Index(list(range(nrows))), @@ -112,8 +115,10 @@ def build_pdf(num_columns, day_resolution_timestamps): # Create a pandas dataframe with random data of mixed types test_pdf = pd.DataFrame( - {f"col_{typ}": np.random.randint(0, nrows, nrows) for typ in types}, - columns=pd.Index([f"col_{typ}" for typ in types], name="foo"), + { + f"col_{typ}": np.random.randint(0, nrows, nrows).astype(typ) + for typ in types + }, # Need to ensure that this index is not a RangeIndex to get the # expected round-tripping behavior from Parquet reader/writer. index=pd.Index(list(range(nrows))), From 860f4396d8911353d2783f56749ffee3d86fe77c Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 12:38:49 -0400 Subject: [PATCH 10/14] Use a filterwarning instead --- python/cudf/cudf/tests/test_rank.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 0bccf02ce3c..f8a8903b518 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -125,9 +125,10 @@ def test_rank_error_arguments(pdf): np.full((3,), np.inf), np.full((3,), -np.inf), ] -sort_dtype_args = [np.float32, np.float64] +sort_dtype_args = [np.int32, np.int64, np.float32, np.float64] +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") @pytest.mark.parametrize( "elem,dtype", list( @@ -139,7 +140,7 @@ def test_rank_error_arguments(pdf): ) def test_series_rank_combinations(elem, dtype): np.random.seed(0) - aa = np.fromiter(chain.from_iterable(elem), dtype=dtype) + aa = np.fromiter(chain.from_iterable(elem), np.float64).astype(dtype) gdf = DataFrame() df = pd.DataFrame() gdf["a"] = aa From 8221628529eabf211d6d5695e2b65bf552b161b2 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 12:43:02 -0400 Subject: [PATCH 11/14] More filterwarnings --- python/cudf/cudf/tests/test_unaops.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index f821b06e07b..a7502051a78 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -77,15 +77,12 @@ def generate_valid_scalar_unaop_combos(): return results +@pytest.mark.filterwarnings("ignore:overflow encountered in scalar negative") @pytest.mark.parametrize("slr,dtype,op", generate_valid_scalar_unaop_combos()) def test_scalar_unary_operations(slr, dtype, op): slr_host = np.array([slr])[0].astype(cudf.dtype(dtype)) slr_device = cudf.Scalar(slr, dtype=dtype) - if op.__name__ == "neg" and np.dtype(dtype).kind == "u": - # TODO: what do we want to do here? - return - expect = op(slr_host) got = op(slr_device) From abdc0268dee772802ae2b9bf08a4925b13381af0 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 12:52:10 -0400 Subject: [PATCH 12/14] Shift, not abs --- python/cudf/cudf/utils/queryutils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index aecbe4fd659..bc0ac9fb390 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -137,7 +137,8 @@ def query_compile(expr): key "args" is a sequence of name of the arguments. """ - funcid = f"queryexpr_{np.uintp(abs(hash(expr))):x}" + # hash returns in the semi-open interval [-2**63, 2**63) + funcid = f"queryexpr_{(hash(expr) + 2**63):x}" # Load cache compiled = _cache.get(funcid) # Cache not found From 37c9514a58102479e32de969fb332395037b12d4 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Mon, 15 May 2023 15:37:45 -0400 Subject: [PATCH 13/14] More filterwarnings --- python/cudf/cudf/tests/test_csv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 4a7502f729a..b34340295f2 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -2150,6 +2150,7 @@ def test_default_integer_bitwidth_partial( ) +@pytest.mark.filterwarnings("ignore:invalid value encountered in cast") def test_default_integer_bitwidth_extremes( cudf_extreme_numeric_dataframe, default_integer_bitwidth ): From e4b43ac6acca9213742260c0c7055c4a83e2454c Mon Sep 17 00:00:00 2001 From: Ashwin Srinath Date: Wed, 24 May 2023 15:27:30 -0400 Subject: [PATCH 14/14] Style --- python/cudf/cudf/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 137419b30a6..de0f2d67add 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -88,7 +88,6 @@ from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache, set_allocator - cuda.set_memory_manager(RMMNumbaManager) cupy.cuda.set_allocator(rmm_cupy_allocator)