diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh index f8ddbaba0f3..30e3ffc9a43 100755 --- a/ci/cudf_pandas_scripts/third-party-integration/test.sh +++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh @@ -26,6 +26,8 @@ main() { LIBS=${LIBS#[} LIBS=${LIBS%]} + ANY_FAILURES=0 + for lib in ${LIBS//,/ }; do lib=$(echo "$lib" | tr -d '""') echo "Running tests for library $lib" @@ -56,10 +58,6 @@ main() { rapids-logger "Check GPU usage" nvidia-smi - EXITCODE=0 - trap "EXITCODE=1" ERR - set +e - rapids-logger "pytest ${lib}" NUM_PROCESSES=8 @@ -72,12 +70,20 @@ main() { fi done + EXITCODE=0 + trap "EXITCODE=1" ERR + set +e + TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib} + set -e rapids-logger "Test script exiting with value: ${EXITCODE}" + if [[ ${EXITCODE} != 0 ]]; then + ANY_FAILURES=1 + fi done - exit ${EXITCODE} + exit ${ANY_FAILURES} } main "$@" diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml index e726b7fdca1..3891110e9d3 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml @@ -76,13 +76,6 @@ files: - py_version - test_base - test_xgboost - test_catboost: - output: none - includes: - - cuda_version - - py_version - - test_base - - test_catboost test_cuml: output: none includes: @@ -251,14 +244,6 @@ dependencies: - pip - pip: - xgboost>=2.0.1 - test_catboost: - common: - - output_types: conda - packages: - - numpy - - scipy - - scikit-learn - - catboost test_cuml: common: - output_types: conda diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py deleted file mode 100644 index 04cc69231fe..00000000000 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -import numpy as np -import pandas as pd -import pytest -from catboost import CatBoostClassifier, CatBoostRegressor, Pool -from sklearn.datasets import make_classification, make_regression - -rng = np.random.default_rng(seed=42) - - -def assert_catboost_equal(expect, got, rtol=1e-7, atol=0.0): - if isinstance(expect, (tuple, list)): - assert len(expect) == len(got) - for e, g in zip(expect, got): - assert_catboost_equal(e, g, rtol, atol) - elif isinstance(expect, np.ndarray): - np.testing.assert_allclose(expect, got, rtol=rtol, atol=atol) - elif isinstance(expect, pd.DataFrame): - pd.testing.assert_frame_equal(expect, got) - elif isinstance(expect, pd.Series): - pd.testing.assert_series_equal(expect, got) - else: - assert expect == got - - -pytestmark = pytest.mark.assert_eq(fn=assert_catboost_equal) - - -@pytest.fixture -def regression_data(): - X, y = make_regression(n_samples=100, n_features=10, random_state=42) - return pd.DataFrame(X), pd.Series(y) - - -@pytest.fixture -def classification_data(): - X, y = make_classification( - n_samples=100, n_features=10, n_classes=2, random_state=42 - ) - return pd.DataFrame(X), pd.Series(y) - - -def test_catboost_regressor_with_dataframe(regression_data): - X, y = regression_data - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(X, y) - predictions = model.predict(X) - return predictions - - -def test_catboost_regressor_with_numpy(regression_data): - X, y = regression_data - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(X.values, y.values) - predictions = model.predict(X.values) - return predictions - - -def test_catboost_classifier_with_dataframe(classification_data): - X, y = classification_data - model = CatBoostClassifier(iterations=10, verbose=0) - model.fit(X, y) - predictions = model.predict(X) - return predictions - - -def test_catboost_classifier_with_numpy(classification_data): - X, y = classification_data - model = CatBoostClassifier(iterations=10, verbose=0) - model.fit(X.values, y.values) - predictions = model.predict(X.values) - return predictions - - -def test_catboost_with_pool_and_dataframe(regression_data): - X, y = regression_data - train_pool = Pool(X, y) - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(train_pool) - predictions = model.predict(X) - return predictions - - -def test_catboost_with_pool_and_numpy(regression_data): - X, y = regression_data - train_pool = Pool(X.values, y.values) - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(train_pool) - predictions = model.predict(X.values) - return predictions - - -def test_catboost_with_categorical_features(): - data = { - "numerical_feature": rng.standard_normal(100), - "categorical_feature": rng.choice(["A", "B", "C"], size=100), - "target": rng.integers(0, 2, size=100), - } - df = pd.DataFrame(data) - X = df[["numerical_feature", "categorical_feature"]] - y = df["target"] - cat_features = ["categorical_feature"] - model = CatBoostClassifier( - iterations=10, verbose=0, cat_features=cat_features - ) - model.fit(X, y) - predictions = model.predict(X) - return predictions - - -@pytest.mark.parametrize( - "X, y", - [ - ( - pd.DataFrame(rng.standard_normal((100, 5))), - pd.Series(rng.standard_normal(100)), - ), - (rng.standard_normal((100, 5)), rng.standard_normal(100)), - ], -) -def test_catboost_train_test_split(X, y): - from sklearn.model_selection import train_test_split - - X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) - model = CatBoostRegressor(iterations=10, verbose=0) - model.fit(X_train, y_train) - predictions = model.predict(X_test) - return len(X_train), len(X_test), len(y_train), len(y_test), predictions diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py index bef02c86355..8be48953974 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py @@ -71,6 +71,9 @@ def test_holoviews_heatmap(df): ) +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_holoviews_histogram(df): return get_plot_info(hv.Histogram(df.values)) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py index 1909392b9f7..c91808021e8 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py @@ -33,6 +33,9 @@ def assert_plots_equal(expect, got): pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal) +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_line(): df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]}) (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-") @@ -40,6 +43,9 @@ def test_line(): return plt.gca() +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_bar(): data = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) ax = data.plot(kind="bar") diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py index 472f1889354..4d35d9e8946 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py @@ -37,6 +37,9 @@ def test_numpy_dot(df): return np.dot(df, df.T) +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_numpy_fft(sr): fft = np.fft.fft(sr) return fft diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py index ad287471aa0..7cea635afc4 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py @@ -116,6 +116,9 @@ def test_torch_train(data): return model(test_x1, test_x2) +@pytest.mark.skip( + reason="AssertionError: The values for attribute 'device' do not match: cpu != cuda:0." +) def test_torch_tensor_ctor(): s = pd.Series(range(5)) return torch.tensor(s.values) diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py index 021c5bac9b7..f6a8a96ae3c 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py @@ -54,6 +54,9 @@ def test_scatter(df): return ax +@pytest.mark.skip( + reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'" +) def test_lineplot_with_sns_data(): df = sns.load_dataset("flights") ax = sns.lineplot(data=df, x="month", y="passengers") diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py index 0777d982ac2..f275659288e 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py @@ -41,7 +41,7 @@ def test_multidimensional_distributed_timeseries(dask_client): rng = np.random.default_rng(seed=42) # Each row represents data from a different dimension while each column represents # data from the same dimension - your_time_series = rng.random(3, 1000) + your_time_series = rng.random((3, 1000)) # Approximately, how many data points might be found in a pattern window_size = 50 diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py index ba1f518cbfd..b4fad3024e7 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py @@ -271,6 +271,7 @@ def call(self, values): return tf.concat(values, axis=-1) +@pytest.mark.xfail(reason="ValueError: Invalid dtype: object") def test_full_example_train_with_df(df, target): # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example # Inputs are directly passed as dictionary of series diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py index 70f1e6a4250..0fd632507a6 100644 --- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py +++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py @@ -113,6 +113,9 @@ def test_with_external_memory( return predt +@pytest.mark.skip( + reason="TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly." +) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_predict(device: str) -> np.ndarray: reg = xgb.XGBRegressor(n_estimators=2, device=device)