From 32fe2691f7eea7d2d2ed3bf3460965450f2ba256 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 23 Oct 2024 13:02:21 +0200
Subject: [PATCH 001/131] add finiteness_checker pybind11 bindings

---
 onedal/dal.cpp | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/onedal/dal.cpp b/onedal/dal.cpp
index 814b22aa8b..14e0aed35d 100644
--- a/onedal/dal.cpp
+++ b/onedal/dal.cpp
@@ -75,6 +75,9 @@ namespace oneapi::dal::python {
     #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001
     ONEDAL_PY_INIT_MODULE(logistic_regression);
     #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001
+    #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
+    ONEDAL_PY_INIT_MODULE(finiteness_checker);
+    #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
 #endif // ONEDAL_DATA_PARALLEL_SPMD
 
 #ifdef ONEDAL_DATA_PARALLEL_SPMD
@@ -133,6 +136,9 @@ namespace oneapi::dal::python {
     #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001
         init_logistic_regression(m);
     #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001
+    #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
+        init_finiteness_checker(m);
+    #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700
     }
 #endif // ONEDAL_DATA_PARALLEL_SPMD
 

From cdbf1b5e5bfdc8036beee80545ea11e553ceac99 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 23 Oct 2024 13:04:00 +0200
Subject: [PATCH 002/131] added finiteness checker

---
 onedal/primitives/finiteness_checker.cpp | 96 ++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 onedal/primitives/finiteness_checker.cpp

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
new file mode 100644
index 0000000000..6aaf7c52d6
--- /dev/null
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -0,0 +1,96 @@
+/*******************************************************************************
+* Copyright 2024 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/finiteness_checker.hpp"
+
+#include "onedal/common.hpp"
+#include "onedal/version.hpp"
+
+namespace py = pybind11;
+
+namespace oneapi::dal::python {
+
+template <typename Task, typename Ops>
+struct method2t {
+    method2t(const Task& task, const Ops& ops) : ops(ops) {}
+
+    template <typename Float>
+    auto operator()(const py::dict& params) {
+        using namespace finiteness_checker;
+
+        const auto method = params["method"].cast<std::string>();
+
+        ONEDAL_PARAM_DISPATCH_VALUE(method, "dense", ops, Float, method::dense);
+        ONEDAL_PARAM_DISPATCH_VALUE(method, "by_default", ops, Float, method::by_default);
+        ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(method);
+    }
+
+    Ops ops;
+};
+
+struct params2desc {
+    template <typename Float, typename Method, typename Task>
+    auto operator()(const pybind11::dict& params) {
+        using namespace dal::finiteness_checker;
+
+        auto desc = descriptor<Float, Method, Task>();
+        desc.set_allow_NaN(params["allow_nan"].cast<std::bool>());
+        return desc;
+    }
+};
+
+template <typename Policy, typename Task>
+void init_compute_ops(py::module_& m) {
+    m.def("compute",
+          [](const Policy& policy,
+             const py::dict& params,
+             const table& data) {
+              using namespace finiteness_checker;
+              using input_t = compute_input<Task>;
+
+              compute_ops ops(policy, input_t{ data}, params2desc{});
+              return fptype2t{ method2t{ Task{}, ops } }(params);
+          });
+}
+
+template <typename Task>
+void init_compute_result(py::module_& m) {
+    using namespace finiteness_checker;
+    using result_t = compute_result<Task>;
+
+    py::class_<result_t>(m, "compute_result")
+        .def(py::init())
+        .DEF_ONEDAL_PY_PROPERTY(finite, result_t)
+}
+
+ONEDAL_PY_TYPE2STR(finiteness_checker::task::compute, "compute");
+
+ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_ops);
+ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result);
+
+ONEDAL_PY_INIT_MODULE(finiteness_checker) {
+    using namespace dal::detail;
+    using namespace finiteness_checker;
+    using namespace dal::finiteness;
+
+    using task_list = types<task::compute>;
+    auto sub = m.def_submodule("finiteness_checker");
+
+    ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list);
+    ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list);
+}
+
+} // namespace oneapi::dal::python

From 62674a24547cf4f7771efbd48657666ed41a97fe Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 13:37:53 +0200
Subject: [PATCH 003/131] Update finiteness_checker.cpp

---
 onedal/primitives/finiteness_checker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
index 6aaf7c52d6..51a3ef161a 100644
--- a/onedal/primitives/finiteness_checker.cpp
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -14,7 +14,7 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "oneapi/dal/algo/finiteness_checker.hpp"
+#include "oneapi/dal/algo/finiteness_checker/compute.hpp"
 
 #include "onedal/common.hpp"
 #include "onedal/version.hpp"

From c75c23b34e714ac22eace32d4a44ae5699286262 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 13:46:49 +0200
Subject: [PATCH 004/131] Update finiteness_checker.cpp

---
 onedal/primitives/finiteness_checker.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
index 51a3ef161a..761ee28de9 100644
--- a/onedal/primitives/finiteness_checker.cpp
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -14,7 +14,12 @@
 * limitations under the License.
 *******************************************************************************/
 
+// fix error with missing headers
+#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200
+#include "oneapi/dal/algo/finiteness_checker.hpp
+#else
 #include "oneapi/dal/algo/finiteness_checker/compute.hpp"
+#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200
 
 #include "onedal/common.hpp"
 #include "onedal/version.hpp"

From 6a20938aba804e69b09bf5d15c12f3128982df7d Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 13:47:36 +0200
Subject: [PATCH 005/131] Update finiteness_checker.cpp

---
 onedal/primitives/finiteness_checker.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
index 761ee28de9..531554f857 100644
--- a/onedal/primitives/finiteness_checker.cpp
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -16,9 +16,9 @@
 
 // fix error with missing headers
 #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200
-#include "oneapi/dal/algo/finiteness_checker.hpp
+    #include "oneapi/dal/algo/finiteness_checker.hpp
 #else
-#include "oneapi/dal/algo/finiteness_checker/compute.hpp"
+    #include "oneapi/dal/algo/finiteness_checker/compute.hpp"
 #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200
 
 #include "onedal/common.hpp"

From 382d7a1268a4612f6eec162a30c02b18bcc0e041 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 13:47:47 +0200
Subject: [PATCH 006/131] Update finiteness_checker.cpp

---
 onedal/primitives/finiteness_checker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
index 531554f857..ebc7bfd798 100644
--- a/onedal/primitives/finiteness_checker.cpp
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -16,7 +16,7 @@
 
 // fix error with missing headers
 #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200
-    #include "oneapi/dal/algo/finiteness_checker.hpp
+    #include "oneapi/dal/algo/finiteness_checker.hpp"
 #else
     #include "oneapi/dal/algo/finiteness_checker/compute.hpp"
 #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200

From c8ffd9c0c2c9a132449020fa2ffc492b7c9bd1fb Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 13:54:20 +0200
Subject: [PATCH 007/131] Update finiteness_checker.cpp

---
 onedal/primitives/finiteness_checker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
index ebc7bfd798..92a17a875d 100644
--- a/onedal/primitives/finiteness_checker.cpp
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -52,7 +52,7 @@ struct params2desc {
         using namespace dal::finiteness_checker;
 
         auto desc = descriptor<Float, Method, Task>();
-        desc.set_allow_NaN(params["allow_nan"].cast<std::bool>());
+        desc.set_allow_NaN(params["allow_nan"].cast<bool>());
         return desc;
     }
 };

From 9aa13d5e72340509c33986befce7ff5f3169a325 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 13:58:13 +0200
Subject: [PATCH 008/131] Update finiteness_checker.cpp

---
 onedal/primitives/finiteness_checker.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
index 92a17a875d..7189aec5d9 100644
--- a/onedal/primitives/finiteness_checker.cpp
+++ b/onedal/primitives/finiteness_checker.cpp
@@ -78,7 +78,7 @@ void init_compute_result(py::module_& m) {
 
     py::class_<result_t>(m, "compute_result")
         .def(py::init())
-        .DEF_ONEDAL_PY_PROPERTY(finite, result_t)
+        .DEF_ONEDAL_PY_PROPERTY(finite, result_t);
 }
 
 ONEDAL_PY_TYPE2STR(finiteness_checker::task::compute, "compute");
@@ -89,7 +89,7 @@ ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result);
 ONEDAL_PY_INIT_MODULE(finiteness_checker) {
     using namespace dal::detail;
     using namespace finiteness_checker;
-    using namespace dal::finiteness;
+    using namespace dal::finiteness_checker;
 
     using task_list = types<task::compute>;
     auto sub = m.def_submodule("finiteness_checker");

From 84e15d598392ebf5da945468cd1cf110a25d3764 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 23 Oct 2024 14:21:02 +0200
Subject: [PATCH 009/131] Rename finiteness_checker.cpp to
 finiteness_checker.cpp

---
 onedal/{primitives => utils}/finiteness_checker.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename onedal/{primitives => utils}/finiteness_checker.cpp (100%)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp
similarity index 100%
rename from onedal/primitives/finiteness_checker.cpp
rename to onedal/utils/finiteness_checker.cpp

From 63073c60d17c192781e30db5425eeee4832761d9 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Thu, 24 Oct 2024 10:58:08 +0200
Subject: [PATCH 010/131] Update finiteness_checker.cpp

---
 onedal/utils/finiteness_checker.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp
index 7189aec5d9..6bc6a2e66b 100644
--- a/onedal/utils/finiteness_checker.cpp
+++ b/onedal/utils/finiteness_checker.cpp
@@ -94,8 +94,10 @@ ONEDAL_PY_INIT_MODULE(finiteness_checker) {
     using task_list = types<task::compute>;
     auto sub = m.def_submodule("finiteness_checker");
 
-    ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list);
-    ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list);
+    #ifndef ONEDAL_DATA_PARALLEL_SPMD
+        ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list);
+        ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list);
+    #endif
 }
 
 } // namespace oneapi::dal::python

From 3dddf2dc3469f197c7e539c73f407670173c9864 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 1 Nov 2024 00:30:15 +0100
Subject: [PATCH 011/131] add next step

---
 onedal/utils/validation.py | 41 +++++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index bde2390e80..eb313cd980 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -20,6 +20,10 @@
 
 import numpy as np
 from scipy import sparse as sp
+from onedal import _backend
+from ..common._policy import _get_policy
+from ..datatypes import _convert_to_supported, to_table
+
 
 if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"):
     # numpy_version >= 2.0
@@ -31,7 +35,9 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.utils.validation import check_array
 
-from daal4py.sklearn.utils.validation import _assert_all_finite
+from daal4py.sklearn.utils.validation import (
+    _assert_all_finite as _daal4py_assert_all_finite,
+)
 
 
 class DataConversionWarning(UserWarning):
@@ -135,10 +141,10 @@ def _check_array(
     if force_all_finite:
         if sp.issparse(array):
             if hasattr(array, "data"):
-                _assert_all_finite(array.data)
+                _daal4py_assert_all_finite(array.data)
                 force_all_finite = False
         else:
-            _assert_all_finite(array)
+            _daal4py_assert_all_finite(array)
             force_all_finite = False
     array = check_array(
         array=array,
@@ -200,7 +206,7 @@ def _check_X_y(
     if y_numeric and y.dtype.kind == "O":
         y = y.astype(np.float64)
     if force_all_finite:
-        _assert_all_finite(y)
+        _daal4py_assert_all_finite(y)
 
     lengths = [X.shape[0], y.shape[0]]
     uniques = np.unique(lengths)
@@ -285,7 +291,7 @@ def _type_of_target(y):
     # check float and contains non-integer float values
     if y.dtype.kind == "f" and np.any(y != y.astype(int)):
         # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]
-        _assert_all_finite(y)
+        _daal4py_assert_all_finite(y)
         return "continuous" + suffix
 
     if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
@@ -430,3 +436,28 @@ def _is_csr(x):
     return isinstance(x, sp.csr_matrix) or (
         hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
     )
+
+
+def _assert_all_finite(X, allow_nan=False, input_name=""):
+    # NOTE: This function does not respond to target_offload, as the memory movement
+    # is likely to cause a significant reduction in performance
+    # requires extracting the queue to generate a policy for converting the data to fp32
+    X = to_table(_convert_to_supported(_get_policy(None, X), X))
+    if not _backend.finiteness_checker(allow_nan=allow_nan).compute(X).finite:
+        type_err = "infinity" if allow_nan else "NaN, infinity"
+        padded_input_name = input_name + " " if input_name else ""
+        msg_err = f"Input {padded_input_name}contains {type_err}."
+        raise ValueError(msg_err)
+
+
+def assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    _assert_all_finite(
+        X.data if sp.issparse(X) else X,
+        allow_nan=allow_nan,
+        input_name=input_name,
+    )

From 1e1213e60e2d52310b26625a1c749379affcd007 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 1 Nov 2024 00:37:07 +0100
Subject: [PATCH 012/131] follow conventions

---
 onedal/utils/validation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index eb313cd980..3a9d849486 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -442,8 +442,11 @@ def _assert_all_finite(X, allow_nan=False, input_name=""):
     # NOTE: This function does not respond to target_offload, as the memory movement
     # is likely to cause a significant reduction in performance
     # requires extracting the queue to generate a policy for converting the data to fp32
-    X = to_table(_convert_to_supported(_get_policy(None, X), X))
-    if not _backend.finiteness_checker(allow_nan=allow_nan).compute(X).finite:
+    policy = _get_policy(None, X)
+    X = to_table(_convert_to_supported(policy, X))
+    if not _backend.finiteness_checker.compute(
+        policy, {"allow_nan": allow_nan}, X
+    ).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"
         padded_input_name = input_name + " " if input_name else ""
         msg_err = f"Input {padded_input_name}contains {type_err}."

From 053171340099a68ced8fec11f79371f6bac253ef Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 1 Nov 2024 00:38:57 +0100
Subject: [PATCH 013/131] make xtable explicit

---
 onedal/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 3a9d849486..67c7a2dee0 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -443,9 +443,9 @@ def _assert_all_finite(X, allow_nan=False, input_name=""):
     # is likely to cause a significant reduction in performance
     # requires extracting the queue to generate a policy for converting the data to fp32
     policy = _get_policy(None, X)
-    X = to_table(_convert_to_supported(policy, X))
+    X_table = to_table(_convert_to_supported(policy, X))
     if not _backend.finiteness_checker.compute(
-        policy, {"allow_nan": allow_nan}, X
+        policy, {"allow_nan": allow_nan}, X_table
     ).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"
         padded_input_name = input_name + " " if input_name else ""

From e831167b32b85135b9e685c7dd83227db89603e2 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 1 Nov 2024 00:42:29 +0100
Subject: [PATCH 014/131] remove comment

---
 onedal/utils/validation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 67c7a2dee0..10bb920291 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -441,7 +441,6 @@ def _is_csr(x):
 def _assert_all_finite(X, allow_nan=False, input_name=""):
     # NOTE: This function does not respond to target_offload, as the memory movement
     # is likely to cause a significant reduction in performance
-    # requires extracting the queue to generate a policy for converting the data to fp32
     policy = _get_policy(None, X)
     X_table = to_table(_convert_to_supported(policy, X))
     if not _backend.finiteness_checker.compute(

From d6eb1d05e9de1c6bc0a1f9683659ddef4540480d Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 00:57:56 +0100
Subject: [PATCH 015/131] Update validation.py

---
 onedal/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 10bb920291..f4597cd01c 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -21,8 +21,8 @@
 import numpy as np
 from scipy import sparse as sp
 from onedal import _backend
-from ..common._policy import _get_policy
-from ..datatypes import _convert_to_supported, to_table
+from onedal.common._policy import _get_policy
+from onedal.datatypes import _convert_to_supported, to_table
 
 
 if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"):

From fb30d6e69a2c6244112079a9c6a0dd75cd9a3a85 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:34:52 +0100
Subject: [PATCH 016/131] Update __init__.py

---
 onedal/utils/__init__.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py
index 0a1b05fbc2..0bc9ed35a3 100644
--- a/onedal/utils/__init__.py
+++ b/onedal/utils/__init__.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import scipy.sparse as sp
 
 from .validation import (
     _check_array,
@@ -22,7 +23,6 @@
     _column_or_1d,
     _is_arraylike,
     _is_arraylike_not_scalar,
-    _is_csr,
     _is_integral_float,
     _is_multilabel,
     _num_features,
@@ -31,6 +31,12 @@
     _validate_targets,
 )
 
+def _is_csr(x):
+    """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array"""
+    return isinstance(x, sp.csr_matrix) or (
+        hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
+    )
+
 __all__ = [
     "_column_or_1d",
     "_validate_targets",

From 63a18c2f66ad93720408c33aa3a3b05f74d58f48 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:35:12 +0100
Subject: [PATCH 017/131] Update validation.py

---
 onedal/utils/validation.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index f4597cd01c..1421bfaefc 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -431,13 +431,6 @@ def _num_samples(x):
         raise TypeError(message) from type_error
 
 
-def _is_csr(x):
-    """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array"""
-    return isinstance(x, sp.csr_matrix) or (
-        hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
-    )
-
-
 def _assert_all_finite(X, allow_nan=False, input_name=""):
     # NOTE: This function does not respond to target_offload, as the memory movement
     # is likely to cause a significant reduction in performance

From 76c0856a12c04d4d3eb13d3c21382b1b84a23dc7 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:40:03 +0100
Subject: [PATCH 018/131] Update __init__.py

---
 onedal/utils/__init__.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py
index 0bc9ed35a3..a7e1495cf9 100644
--- a/onedal/utils/__init__.py
+++ b/onedal/utils/__init__.py
@@ -13,8 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-import scipy.sparse as sp
 
+def _is_csr(x):
+    """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array"""
+    return isinstance(x, sp.csr_matrix) or (
+        hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
+    )
 from .validation import (
     _check_array,
     _check_classification_targets,
@@ -23,6 +27,7 @@
     _column_or_1d,
     _is_arraylike,
     _is_arraylike_not_scalar,
+    _is_csr,
     _is_integral_float,
     _is_multilabel,
     _num_features,
@@ -31,12 +36,6 @@
     _validate_targets,
 )
 
-def _is_csr(x):
-    """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array"""
-    return isinstance(x, sp.csr_matrix) or (
-        hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
-    )
-
 __all__ = [
     "_column_or_1d",
     "_validate_targets",

From 7deb2bbce9c0435b2484ae0fcfc754f5521bb01d Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:40:24 +0100
Subject: [PATCH 019/131] Update __init__.py

---
 onedal/utils/__init__.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py
index a7e1495cf9..0a1b05fbc2 100644
--- a/onedal/utils/__init__.py
+++ b/onedal/utils/__init__.py
@@ -14,11 +14,6 @@
 # limitations under the License.
 # ==============================================================================
 
-def _is_csr(x):
-    """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array"""
-    return isinstance(x, sp.csr_matrix) or (
-        hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
-    )
 from .validation import (
     _check_array,
     _check_classification_targets,

From ed46b2907bb0a00678dab9c2516543941471b64a Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:41:17 +0100
Subject: [PATCH 020/131] Update validation.py

---
 onedal/utils/validation.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 1421bfaefc..f4597cd01c 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -431,6 +431,13 @@ def _num_samples(x):
         raise TypeError(message) from type_error
 
 
+def _is_csr(x):
+    """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array"""
+    return isinstance(x, sp.csr_matrix) or (
+        hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
+    )
+
+
 def _assert_all_finite(X, allow_nan=False, input_name=""):
     # NOTE: This function does not respond to target_offload, as the memory movement
     # is likely to cause a significant reduction in performance

From 67d6273f3520232daad4f7f16b49291240600e16 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:42:45 +0100
Subject: [PATCH 021/131] Update _data_conversion.py

---
 onedal/datatypes/_data_conversion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index 0caac10884..011a2eb89d 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -17,11 +17,11 @@
 import warnings
 
 import numpy as np
+import scipy.sparse as sp
 
 from daal4py.sklearn._utils import make2d
 from onedal import _backend, _is_dpc_backend
 
-from ..utils import _is_csr
 from ..utils._dpep_helpers import is_dpctl_available
 
 dpctl_available = is_dpctl_available("0.14")
@@ -46,7 +46,7 @@ def convert_one_to_table(arg):
         if isinstance(arg, dpt.usm_ndarray):
             return _backend.dpctl_to_table(arg)
 
-    if not _is_csr(arg):
+    if not sp.issparse(arg):
         arg = make2d(arg)
     return _backend.to_table(arg)
 

From 8abead922bd8c2fceff7e8e6dffe4b76389fe1d4 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 1 Nov 2024 22:58:03 +0100
Subject: [PATCH 022/131] Update _data_conversion.py

---
 onedal/datatypes/_data_conversion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index 386101eb14..12dc24eca3 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -103,7 +103,7 @@ def convert_one_to_table(arg, sua_iface=None):
         if sua_iface:
             return _backend.sua_iface_to_table(arg)
 
-        if not sp.sparse(arg):
+        if not sp.issparse(arg):
             arg = make2d(arg)
         return _backend.to_table(arg)
 
@@ -130,7 +130,7 @@ def convert_one_to_table(arg, sua_iface=None):
                 "SYCL usm array conversion to table requires the DPC backend"
             )
 
-        if not sp.sparse(arg):
+        if not sp.issparse(arg):
             arg = make2d(arg)
         return _backend.to_table(arg)
 

From 47d0f8bf7f0544089bcc2626dc06863be663757b Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 00:39:18 +0100
Subject: [PATCH 023/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index bfb3c02cbd..3d8443378d 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -31,6 +31,10 @@ constexpr const char py_capsule_name[] = "PyCapsule";
 constexpr const char get_capsule_name[] = "_get_capsule";
 constexpr const char queue_capsule_name[] = "SyclQueueRef";
 constexpr const char context_capsule_name[] = "SyclContextRef";
+constexpr const char device_name[] = "sycl_device";
+constexpr const char filter_name[] = "filter_selector";
+
+
 
 sycl::queue extract_queue(py::capsule capsule) {
     constexpr const char* gtr_name = queue_capsule_name;
@@ -79,7 +83,12 @@ sycl::queue get_queue_from_python(const py::object& syclobj) {
         const auto caps = syclobj.cast<py::capsule>();
         return extract_from_capsule(std::move(caps));
     }
-    else {
+    else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), filter_name)) {
+        auto attr = syclobj.attr(device_name).attr(filter_name);
+        return get_queue_by_filter_string(attr.cast<std::string>());
+    }
+    else
+    {
         throw std::runtime_error("Unable to interpret \"syclobj\"");
     }
 }

From e48c2bdca15b554e9b325508b8827465ae6d34bf Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 00:45:56 +0100
Subject: [PATCH 024/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index 3d8443378d..364f248992 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -32,7 +32,7 @@ constexpr const char get_capsule_name[] = "_get_capsule";
 constexpr const char queue_capsule_name[] = "SyclQueueRef";
 constexpr const char context_capsule_name[] = "SyclContextRef";
 constexpr const char device_name[] = "sycl_device";
-constexpr const char filter_name[] = "filter_selector";
+constexpr const char get_filter_name[] = "get_filter_string";
 
 
 
@@ -83,9 +83,9 @@ sycl::queue get_queue_from_python(const py::object& syclobj) {
         const auto caps = syclobj.cast<py::capsule>();
         return extract_from_capsule(std::move(caps));
     }
-    else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), filter_name)) {
-        auto attr = syclobj.attr(device_name).attr(filter_name);
-        return get_queue_by_filter_string(attr.cast<std::string>());
+    else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), get_filter_name)) {
+        auto attr = syclobj.attr(device_name).attr(get_filter_name);
+        return get_queue_by_filter_string(attr().cast<std::string>());
     }
     else
     {

From c6751c4bc2dea6fd8e38c470d9f398bb0b8f8161 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 00:47:04 +0100
Subject: [PATCH 025/131] Update _policy.py

---
 onedal/common/_policy.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py
index 90705854f6..abd267f4a6 100644
--- a/onedal/common/_policy.py
+++ b/onedal/common/_policy.py
@@ -48,12 +48,7 @@ def __init__(self):
 
 
 if _is_dpc_backend:
-    from onedal._device_offload import DummySyclQueue
-
     class _DataParallelInteropPolicy(_backend.data_parallel_policy):
         def __init__(self, queue):
             self._queue = queue
-            if isinstance(queue, DummySyclQueue):
-                super().__init__(self._queue.sycl_device.get_filter_string())
-                return
             super().__init__(self._queue)

From f3e4a3a678298b7a7b135bae67ef29e293a45ee5 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 01:01:33 +0100
Subject: [PATCH 026/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index 364f248992..3bd18c3689 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -34,8 +34,6 @@ constexpr const char context_capsule_name[] = "SyclContextRef";
 constexpr const char device_name[] = "sycl_device";
 constexpr const char get_filter_name[] = "get_filter_string";
 
-
-
 sycl::queue extract_queue(py::capsule capsule) {
     constexpr const char* gtr_name = queue_capsule_name;
     constexpr std::size_t gtr_size = sizeof(queue_capsule_name);
@@ -74,6 +72,20 @@ sycl::queue get_queue_by_get_capsule(const py::object& syclobj) {
     return extract_from_capsule(std::move(capsule));
 }
 
+sycl::queue get_queue_by_filter_string(const std::string& filter) {
+    filter_selector_wrapper selector{ filter };
+    return sycl::queue{ selector };
+}
+
+sycl::queue get_queue_by_device_id(std::uint32_t id) {
+    if (auto device = get_device_by_id(id)) {
+        return sycl::queue{ device.value() };
+    }
+    else {
+        throw std::runtime_error(unknown_device);
+    }
+}
+
 sycl::queue get_queue_from_python(const py::object& syclobj) {
     static auto pycapsule = py::cast(py_capsule_name);
     if (py::hasattr(syclobj, get_capsule_name)) {
@@ -93,20 +105,6 @@ sycl::queue get_queue_from_python(const py::object& syclobj) {
     }
 }
 
-sycl::queue get_queue_by_filter_string(const std::string& filter) {
-    filter_selector_wrapper selector{ filter };
-    return sycl::queue{ selector };
-}
-
-sycl::queue get_queue_by_device_id(std::uint32_t id) {
-    if (auto device = get_device_by_id(id)) {
-        return sycl::queue{ device.value() };
-    }
-    else {
-        throw std::runtime_error(unknown_device);
-    }
-}
-
 std::string get_device_name(const sycl::queue& queue) {
     const auto& device = queue.get_device();
     if (device.is_gpu()) {

From 39cdb5f3c48810a178b12608fa18eb2a8edecfd0 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 01:28:12 +0100
Subject: [PATCH 027/131] Rename finiteness_checker.cpp to
 finiteness_checker.cpp

---
 onedal/{utils => primitives}/finiteness_checker.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename onedal/{utils => primitives}/finiteness_checker.cpp (100%)

diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp
similarity index 100%
rename from onedal/utils/finiteness_checker.cpp
rename to onedal/primitives/finiteness_checker.cpp

From 0f39613063f153d054826cbcac9f931232c14177 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 01:33:21 +0100
Subject: [PATCH 028/131] Create finiteness_checker.py

---
 onedal/primitives/finiteness_checker.py | 48 +++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 onedal/primitives/finiteness_checker.py

diff --git a/onedal/primitives/finiteness_checker.py b/onedal/primitives/finiteness_checker.py
new file mode 100644
index 0000000000..c1a2b5c364
--- /dev/null
+++ b/onedal/primitives/finiteness_checker.py
@@ -0,0 +1,48 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import scipy.sparse as sp
+
+from onedal import _backend
+from onedal.common._policy import _get_policy
+from onedal.datatypes import _convert_to_supported, to_table
+
+
+def _assert_all_finite(X, allow_nan=False, input_name=""):
+    # NOTE: This function does not respond to target_offload, as the memory movement
+    # is likely to cause a significant reduction in performance
+    policy = _get_policy(None, X)
+    X_table = to_table(_convert_to_supported(policy, X))
+    if not _backend.finiteness_checker.compute(
+        policy, {"allow_nan": allow_nan}, X_table
+    ).finite:
+        type_err = "infinity" if allow_nan else "NaN, infinity"
+        padded_input_name = input_name + " " if input_name else ""
+        msg_err = f"Input {padded_input_name}contains {type_err}."
+        raise ValueError(msg_err)
+
+
+def assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    _assert_all_finite(
+        X.data if sp.issparse(X) else X,
+        allow_nan=allow_nan,
+        input_name=input_name,
+    )

From b42cfe365d6dba0735dee79e732b6f1bddd9b1dc Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 01:33:45 +0100
Subject: [PATCH 029/131] Update validation.py

---
 onedal/utils/validation.py | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index f4597cd01c..bb501617fa 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -20,10 +20,6 @@
 
 import numpy as np
 from scipy import sparse as sp
-from onedal import _backend
-from onedal.common._policy import _get_policy
-from onedal.datatypes import _convert_to_supported, to_table
-
 
 if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"):
     # numpy_version >= 2.0
@@ -436,30 +432,3 @@ def _is_csr(x):
     return isinstance(x, sp.csr_matrix) or (
         hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
     )
-
-
-def _assert_all_finite(X, allow_nan=False, input_name=""):
-    # NOTE: This function does not respond to target_offload, as the memory movement
-    # is likely to cause a significant reduction in performance
-    policy = _get_policy(None, X)
-    X_table = to_table(_convert_to_supported(policy, X))
-    if not _backend.finiteness_checker.compute(
-        policy, {"allow_nan": allow_nan}, X_table
-    ).finite:
-        type_err = "infinity" if allow_nan else "NaN, infinity"
-        padded_input_name = input_name + " " if input_name else ""
-        msg_err = f"Input {padded_input_name}contains {type_err}."
-        raise ValueError(msg_err)
-
-
-def assert_all_finite(
-    X,
-    *,
-    allow_nan=False,
-    input_name="",
-):
-    _assert_all_finite(
-        X.data if sp.issparse(X) else X,
-        allow_nan=allow_nan,
-        input_name=input_name,
-    )

From 0ed615e9b44825e483aaad292187296416a08960 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 01:34:51 +0100
Subject: [PATCH 030/131] Update __init__.py

---
 onedal/primitives/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py
index 39213819b5..c501a78d67 100644
--- a/onedal/primitives/__init__.py
+++ b/onedal/primitives/__init__.py
@@ -15,13 +15,16 @@
 # ==============================================================================
 
 from .get_tree import get_tree_state_cls, get_tree_state_reg
+from .finiteness_checker import assert_all_finite, _assert_all_finite
 from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel
 
 __all__ = [
+    "assert_all_finite",
     "get_tree_state_cls",
     "get_tree_state_reg",
     "linear_kernel",
     "rbf_kernel",
     "poly_kernel",
     "sigmoid_kernel",
+    "_assert_all_finite",
 ]

From f101affd5068f017edd6f399666528920a4e309f Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 13:32:43 +0100
Subject: [PATCH 031/131] attempt at fixing circular imports again

---
 onedal/common/_policy.py                |  1 +
 onedal/datatypes/_data_conversion.py    | 31 ++++++++--------
 onedal/primitives/finiteness_checker.py | 48 -------------------------
 onedal/utils/validation.py              | 31 ++++++++++++++++
 4 files changed, 49 insertions(+), 62 deletions(-)
 delete mode 100644 onedal/primitives/finiteness_checker.py

diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py
index abd267f4a6..0d7d8ca6a3 100644
--- a/onedal/common/_policy.py
+++ b/onedal/common/_policy.py
@@ -48,6 +48,7 @@ def __init__(self):
 
 
 if _is_dpc_backend:
+
     class _DataParallelInteropPolicy(_backend.data_parallel_policy):
         def __init__(self, queue):
             self._queue = queue
diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index 12dc24eca3..af5b41eb6b 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -31,13 +31,23 @@ def _apply_and_pass(func, *args, **kwargs):
 
 if _is_dpc_backend:
 
-    from ..utils._dpep_helpers import dpctl_available, dpnp_available
+    try:
+        import dpnp
 
-    if dpctl_available:
-        import dpctl.tensor as dpt
+        def _onedal_gpu_table_to_array(table, xp=None):
+            # By default DPNP ndarray created with a copy.
+            # TODO:
+            # investigate why dpnp.array(table, copy=False) doesn't work.
+            # Work around with using dpctl.tensor.asarray.
+            if xp == dpnp:
+                return dpnp.array(dpnp.dpctl.tensor.asarray(table), copy=False)
+            else:
+                return xp.asarray(table)
 
-    if dpnp_available:
-        import dpnp
+    except ImportError:
+
+        def _onedal_gpu_table_to_array(table, xp=None):
+            return xp.asarray(table)
 
     from ..common._policy import _HostInteropPolicy
 
@@ -86,15 +96,8 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
                     _backend.from_table(table), usm_type="device", sycl_queue=sycl_queue
                 )
             else:
-                xp_name = xp.__name__
-                if dpnp_available and xp_name == "dpnp":
-                    # By default DPNP ndarray created with a copy.
-                    # TODO:
-                    # investigate why dpnp.array(table, copy=False) doesn't work.
-                    # Work around with using dpctl.tensor.asarray.
-                    return dpnp.array(dpt.asarray(table), copy=False)
-                else:
-                    return xp.asarray(table)
+                return _onedal_gpu_table_to_array(table, xp=xp)
+
         return _backend.from_table(table)
 
     def convert_one_to_table(arg, sua_iface=None):
diff --git a/onedal/primitives/finiteness_checker.py b/onedal/primitives/finiteness_checker.py
deleted file mode 100644
index c1a2b5c364..0000000000
--- a/onedal/primitives/finiteness_checker.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# ==============================================================================
-# Copyright 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-import scipy.sparse as sp
-
-from onedal import _backend
-from onedal.common._policy import _get_policy
-from onedal.datatypes import _convert_to_supported, to_table
-
-
-def _assert_all_finite(X, allow_nan=False, input_name=""):
-    # NOTE: This function does not respond to target_offload, as the memory movement
-    # is likely to cause a significant reduction in performance
-    policy = _get_policy(None, X)
-    X_table = to_table(_convert_to_supported(policy, X))
-    if not _backend.finiteness_checker.compute(
-        policy, {"allow_nan": allow_nan}, X_table
-    ).finite:
-        type_err = "infinity" if allow_nan else "NaN, infinity"
-        padded_input_name = input_name + " " if input_name else ""
-        msg_err = f"Input {padded_input_name}contains {type_err}."
-        raise ValueError(msg_err)
-
-
-def assert_all_finite(
-    X,
-    *,
-    allow_nan=False,
-    input_name="",
-):
-    _assert_all_finite(
-        X.data if sp.issparse(X) else X,
-        allow_nan=allow_nan,
-        input_name=input_name,
-    )
diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index bb501617fa..c620b7b2e4 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -35,6 +35,10 @@
     _assert_all_finite as _daal4py_assert_all_finite,
 )
 
+from onedal import _backend
+from onedal.common._policy import _get_policy
+from onedal.datatypes import _convert_to_supported, to_table
+
 
 class DataConversionWarning(UserWarning):
     """Warning used to notify implicit data conversions happening in the code."""
@@ -432,3 +436,30 @@ def _is_csr(x):
     return isinstance(x, sp.csr_matrix) or (
         hasattr(sp, "csr_array") and isinstance(x, sp.csr_array)
     )
+
+
+def _assert_all_finite(X, allow_nan=False, input_name=""):
+    # NOTE: This function does not respond to target_offload, as the memory movement
+    # is likely to cause a significant reduction in performance
+    policy = _get_policy(None, X)
+    X_table = to_table(_convert_to_supported(policy, X))
+    if not _backend.finiteness_checker.compute(
+        policy, {"allow_nan": allow_nan}, X_table
+    ).finite:
+        type_err = "infinity" if allow_nan else "NaN, infinity"
+        padded_input_name = input_name + " " if input_name else ""
+        msg_err = f"Input {padded_input_name}contains {type_err}."
+        raise ValueError(msg_err)
+
+
+def assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    _assert_all_finite(
+        X.data if sp.issparse(X) else X,
+        allow_nan=allow_nan,
+        input_name=input_name,
+    )

From 24c0e9472a85b2023ddb21a27fe6a783adb5cc1c Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 13:33:06 +0100
Subject: [PATCH 032/131] fix isort

---
 onedal/primitives/__init__.py | 2 +-
 onedal/utils/validation.py    | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py
index c501a78d67..79d72e2f16 100644
--- a/onedal/primitives/__init__.py
+++ b/onedal/primitives/__init__.py
@@ -14,8 +14,8 @@
 # limitations under the License.
 # ==============================================================================
 
+from .finiteness_checker import _assert_all_finite, assert_all_finite
 from .get_tree import get_tree_state_cls, get_tree_state_reg
-from .finiteness_checker import assert_all_finite, _assert_all_finite
 from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel
 
 __all__ = [
diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index c620b7b2e4..4c5cc9746f 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -34,7 +34,6 @@
 from daal4py.sklearn.utils.validation import (
     _assert_all_finite as _daal4py_assert_all_finite,
 )
-
 from onedal import _backend
 from onedal.common._policy import _get_policy
 from onedal.datatypes import _convert_to_supported, to_table

From 3f96166299d3ac5f07931ba64e5b0e96af345496 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 13:35:06 +0100
Subject: [PATCH 033/131] remove __init__ changes

---
 onedal/primitives/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py
index 79d72e2f16..39213819b5 100644
--- a/onedal/primitives/__init__.py
+++ b/onedal/primitives/__init__.py
@@ -14,17 +14,14 @@
 # limitations under the License.
 # ==============================================================================
 
-from .finiteness_checker import _assert_all_finite, assert_all_finite
 from .get_tree import get_tree_state_cls, get_tree_state_reg
 from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel
 
 __all__ = [
-    "assert_all_finite",
     "get_tree_state_cls",
     "get_tree_state_reg",
     "linear_kernel",
     "rbf_kernel",
     "poly_kernel",
     "sigmoid_kernel",
-    "_assert_all_finite",
 ]

From d98505388701b670e037148e14490163e5675590 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 13:35:50 +0100
Subject: [PATCH 034/131] last move

---
 onedal/{primitives => utils}/finiteness_checker.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename onedal/{primitives => utils}/finiteness_checker.cpp (100%)

diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp
similarity index 100%
rename from onedal/primitives/finiteness_checker.cpp
rename to onedal/utils/finiteness_checker.cpp

From 90ec48b46bc0c06a1da5b07e7b5d93efc12c12b7 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 14:39:03 +0100
Subject: [PATCH 035/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index 3bd18c3689..828be51547 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -87,11 +87,10 @@ sycl::queue get_queue_by_device_id(std::uint32_t id) {
 }
 
 sycl::queue get_queue_from_python(const py::object& syclobj) {
-    static auto pycapsule = py::cast(py_capsule_name);
     if (py::hasattr(syclobj, get_capsule_name)) {
         return get_queue_by_get_capsule(syclobj);
     }
-    else if (py::isinstance(syclobj, pycapsule)) {
+    else if (py::isinstance(syclobj, py::capsule)) {
         const auto caps = syclobj.cast<py::capsule>();
         return extract_from_capsule(std::move(caps));
     }

From 8c2c854c06b0e4486aae563418ea047d24f528df Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 14:59:19 +0100
Subject: [PATCH 036/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index 828be51547..224e7a04e1 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -19,7 +19,6 @@
 #endif // ONEDAL_DATA_PARALLEL
 
 #include <pybind11/pybind11.h>
-
 #include "onedal/common/policy_common.hpp"
 
 namespace oneapi::dal::python {
@@ -90,7 +89,7 @@ sycl::queue get_queue_from_python(const py::object& syclobj) {
     if (py::hasattr(syclobj, get_capsule_name)) {
         return get_queue_by_get_capsule(syclobj);
     }
-    else if (py::isinstance(syclobj, py::capsule)) {
+    else if (py::isinstance<py::capsule>(syclobj) {
         const auto caps = syclobj.cast<py::capsule>();
         return extract_from_capsule(std::move(caps));
     }

From 6fa38d7f49d95a831d663101e076530297980865 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 15:07:44 +0100
Subject: [PATCH 037/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index 224e7a04e1..b10c60880d 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -89,7 +89,7 @@ sycl::queue get_queue_from_python(const py::object& syclobj) {
     if (py::hasattr(syclobj, get_capsule_name)) {
         return get_queue_by_get_capsule(syclobj);
     }
-    else if (py::isinstance<py::capsule>(syclobj) {
+    else if (py::isinstance<py::capsule>(syclobj)) {
         const auto caps = syclobj.cast<py::capsule>();
         return extract_from_capsule(std::move(caps));
     }

From 9c1ca9c3f29d3f00f5b10444e3e78101fb39adc0 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 17:22:59 +0100
Subject: [PATCH 038/131] Update policy_common.cpp

---
 onedal/common/policy_common.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp
index b10c60880d..284762b035 100644
--- a/onedal/common/policy_common.cpp
+++ b/onedal/common/policy_common.cpp
@@ -19,6 +19,7 @@
 #endif // ONEDAL_DATA_PARALLEL
 
 #include <pybind11/pybind11.h>
+
 #include "onedal/common/policy_common.hpp"
 
 namespace oneapi::dal::python {

From 4b67dbde880bfa8c3d5373473a589bd2f6577c56 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sat, 2 Nov 2024 19:27:45 +0100
Subject: [PATCH 039/131] Update validation.py

---
 onedal/utils/validation.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 4c5cc9746f..2ea8de8f51 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -438,8 +438,6 @@ def _is_csr(x):
 
 
 def _assert_all_finite(X, allow_nan=False, input_name=""):
-    # NOTE: This function does not respond to target_offload, as the memory movement
-    # is likely to cause a significant reduction in performance
     policy = _get_policy(None, X)
     X_table = to_table(_convert_to_supported(policy, X))
     if not _backend.finiteness_checker.compute(

From fa59a3c0103e9bd9d31ac1c0bf94cc9d1f86ae26 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 22:23:58 +0100
Subject: [PATCH 040/131] add testing

---
 onedal/utils/tests/test_validation.py | 115 ++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 onedal/utils/tests/test_validation.py

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
new file mode 100644
index 0000000000..406a2fd7bc
--- /dev/null
+++ b/onedal/utils/tests/test_validation.py
@@ -0,0 +1,115 @@
+# ==============================================================================
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import time
+
+import numpy as np
+import numpy.random as rand
+import pytest
+from numpy.testing import assert_raises
+
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from onedal.utils.validation import assert_all_finite, _assert_all_finite
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "shape",
+    [
+        [16, 2048],
+        [
+            2**16 + 3,
+        ],
+        [1000, 1000],
+        [
+            3,
+        ],
+    ],
+)
+@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize(
+    "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl")
+)
+def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue):
+    X = np.array(shape, dtype=dtype)
+    X.fill(np.finfo(dtype).max)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    _assert_all_finite(X, allow_nan=allow_nan)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "shape",
+    [
+        [16, 2048],
+        [
+            2**16 + 3,
+        ],
+        [1000, 1000],
+        [
+            3,
+        ],
+    ],
+)
+@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, int(time.time())])
+@pytest.mark.parametrize(
+    "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl")
+)
+def test_assert_finite_random_location(
+    dtype, shape, allow_nan, check, seed, dataframe, queue
+):
+    rand.seed(seed)
+    X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+
+    if check:
+        loc = rand.randint(0, X.size - 1)
+        X.reshape((-1,))[loc] = float(check)
+
+    if check is None or (allow_nan and check == "NaN"):
+        _assert_all_finite(X, allow_nan=allow_nan)
+    else:
+        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, int(time.time())])
+@pytest.mark.parametrize(
+    "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl")
+)
+def test_assert_finite_random_shape_and_location(
+    dtype, allow_nan, check, seed, dataframe, queue
+):
+    lb, ub = 2, 1048576  # lb is a patching condition, ub 2^20
+    rand.seed(seed)
+    X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+
+    if check:
+        loc = rand.randint(0, X.size - 1)
+        X[loc] = float(check)
+
+    if check is None or (allow_nan and check == "NaN"):
+        _assert_all_finite(X, allow_nan=allow_nan)
+    else:
+        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)

From 3330b3312f07a751859d8e9c7639512e5d035ed3 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 22:24:38 +0100
Subject: [PATCH 041/131] isort

---
 onedal/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index 406a2fd7bc..5788a9ccc3 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -25,7 +25,7 @@
     _convert_to_dataframe,
     get_dataframes_and_queues,
 )
-from onedal.utils.validation import assert_all_finite, _assert_all_finite
+from onedal.utils.validation import _assert_all_finite, assert_all_finite
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])

From 48959403bde34845dd7bcc9bb357cc6e79eb846e Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 22:53:23 +0100
Subject: [PATCH 042/131] attempt to fix module error

---
 onedal/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 2ea8de8f51..9b33d49fe0 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -440,7 +440,7 @@ def _is_csr(x):
 def _assert_all_finite(X, allow_nan=False, input_name=""):
     policy = _get_policy(None, X)
     X_table = to_table(_convert_to_supported(policy, X))
-    if not _backend.finiteness_checker.compute(
+    if not _backend.finiteness_checker.compute.compute(
         policy, {"allow_nan": allow_nan}, X_table
     ).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"

From 0c6dd5d284155478773d1d4cf88c4fab3c9b6558 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 23:20:51 +0100
Subject: [PATCH 043/131] add fptype

---
 onedal/utils/validation.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 9b33d49fe0..f6e62bef14 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -439,10 +439,12 @@ def _is_csr(x):
 
 def _assert_all_finite(X, allow_nan=False, input_name=""):
     policy = _get_policy(None, X)
-    X_table = to_table(_convert_to_supported(policy, X))
-    if not _backend.finiteness_checker.compute.compute(
-        policy, {"allow_nan": allow_nan}, X_table
-    ).finite:
+    X_t = to_table(_convert_to_supported(policy, X))
+    params = {
+        "fptype": "float" if X_t.dtype.name == "float32" else "double",
+        "allow_nan": allow_nan,
+    }
+    if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"
         padded_input_name = input_name + " " if input_name else ""
         msg_err = f"Input {padded_input_name}contains {type_err}."

From e2182fa81ffc0b35b485a01f43b1d0dca5bb79e1 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sat, 2 Nov 2024 23:40:24 +0100
Subject: [PATCH 044/131] fix typo

---
 onedal/utils/validation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index f6e62bef14..1ce7e5378d 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -438,12 +438,12 @@ def _is_csr(x):
 
 
 def _assert_all_finite(X, allow_nan=False, input_name=""):
-    policy = _get_policy(None, X)
-    X_t = to_table(_convert_to_supported(policy, X))
     params = {
-        "fptype": "float" if X_t.dtype.name == "float32" else "double",
+        "fptype": "float" if X.dtype.name == "float32" else "double",
         "allow_nan": allow_nan,
     }
+    policy = _get_policy(None, X)
+    X_t = to_table(_convert_to_supported(policy, X))
     if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"
         padded_input_name = input_name + " " if input_name else ""

From 982ef2c8e57e56d4d018b72fa7cd3e7ba58e0ebb Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 3 Nov 2024 00:02:35 +0100
Subject: [PATCH 045/131] Update validation.py

---
 onedal/utils/validation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 1ce7e5378d..6298f3ee5a 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -438,11 +438,12 @@ def _is_csr(x):
 
 
 def _assert_all_finite(X, allow_nan=False, input_name=""):
+    policy = _get_policy(None, X)
     params = {
         "fptype": "float" if X.dtype.name == "float32" else "double",
+        "method": "dense",
         "allow_nan": allow_nan,
     }
-    policy = _get_policy(None, X)
     X_t = to_table(_convert_to_supported(policy, X))
     if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"

From 2fb52a82bc27226d53ddfa27a462840e2011c9cb Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sun, 3 Nov 2024 02:38:41 +0100
Subject: [PATCH 046/131] remove sua_ifcae from to_table

---
 onedal/datatypes/_data_conversion.py | 39 +++++++++++-----------------
 onedal/datatypes/table.cpp           | 11 ++++----
 onedal/datatypes/tests/test_data.py  | 12 ++++-----
 sklearnex/tests/test_memory_usage.py |  6 ++---
 4 files changed, 30 insertions(+), 38 deletions(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index af5b41eb6b..2ef6903041 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -19,15 +19,29 @@
 import numpy as np
 import scipy.sparse as sp
 
-from daal4py.sklearn._utils import make2d
 from onedal import _backend, _is_dpc_backend
 
 
+def make2d(X):
+    # generalized for array-like inputs
+    if hasattr(X, "reshape") and hasattr(X, "ndim") and X.ndim == 1:
+        return X.reshape((-1, 1))
+    if np.isscalar(X):
+        return np.atleast_2d(X)
+    return X
+
+
 def _apply_and_pass(func, *args, **kwargs):
     if len(args) == 1:
         return func(args[0], **kwargs)
     return tuple(map(lambda arg: func(arg, **kwargs), args))
 
+def convert_one_to_table(arg):
+    return _backend.to_table(arg if sp.issparse(arg) else make2d(arg))
+
+def to_table(*args):
+    return _apply_and_pass(convert_one_to_table, *args)
+
 
 if _is_dpc_backend:
 
@@ -100,16 +114,6 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
 
         return _backend.from_table(table)
 
-    def convert_one_to_table(arg, sua_iface=None):
-        # Note: currently only oneDAL homogen tables are supported and the
-        # contiuginity of the input array should be checked in advance.
-        if sua_iface:
-            return _backend.sua_iface_to_table(arg)
-
-        if not sp.issparse(arg):
-            arg = make2d(arg)
-        return _backend.to_table(arg)
-
 else:
 
     def _convert_to_supported(policy, *data):
@@ -127,22 +131,9 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
             )
         return _backend.from_table(table)
 
-    def convert_one_to_table(arg, sua_iface=None):
-        if sua_iface:
-            raise RuntimeError(
-                "SYCL usm array conversion to table requires the DPC backend"
-            )
-
-        if not sp.issparse(arg):
-            arg = make2d(arg)
-        return _backend.to_table(arg)
-
 
 def from_table(*args, sycl_queue=None, sua_iface=None, xp=None):
     return _apply_and_pass(
         convert_one_from_table, *args, sycl_queue=sycl_queue, sua_iface=sua_iface, xp=xp
     )
 
-
-def to_table(*args, sua_iface=None):
-    return _apply_and_pass(convert_one_to_table, *args, sua_iface=sua_iface)
diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp
index 9771306118..ce0f15936b 100644
--- a/onedal/datatypes/table.cpp
+++ b/onedal/datatypes/table.cpp
@@ -78,6 +78,12 @@ ONEDAL_PY_INIT_MODULE(table) {
 #endif // ONEDAL_DATA_PARALLEL
 
     m.def("to_table", [](py::object obj) {
+        #ifdef ONEDAL_DATA_PARALLEL
+        if (py::hasattr(obj, "__sycl_usm_array_interface__")) {
+            return convert_from_sua_iface(obj);
+        }
+        #endif // ONEDAL_DATA_PARALLEL
+
         auto* obj_ptr = obj.ptr();
         return convert_to_table(obj_ptr);
     });
@@ -87,11 +93,6 @@ ONEDAL_PY_INIT_MODULE(table) {
         return obj_ptr;
     });
 
-#ifdef ONEDAL_DATA_PARALLEL
-    m.def("sua_iface_to_table", [](py::object obj) {
-        return convert_from_sua_iface(obj);
-    });
-#endif // ONEDAL_DATA_PARALLEL
 }
 
 } // namespace oneapi::dal::python
diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py
index 471d6f0a64..de47e18ad4 100644
--- a/onedal/datatypes/tests/test_data.py
+++ b/onedal/datatypes/tests/test_data.py
@@ -68,7 +68,7 @@ def fit(self, X, y=None):
                 X = xp.astype(X, dtype=xp.float64)
             dtype = get_dtype(X)
             params = bs_DBSCAN._get_onedal_params(dtype)
-            X_table = to_table(X, sua_iface=sua_iface)
+            X_table = to_table(X)
             # TODO:
             # check other candidates for the dummy base oneDAL func.
             # oneDAL backend func is needed to check result table checks.
@@ -251,7 +251,7 @@ def test_input_sua_iface_zero_copy(dataframe, queue, order, dtype):
 
     sua_iface, X_dp_namespace, _ = _get_sycl_namespace(X_dp)
 
-    X_table = to_table(X_dp, sua_iface=sua_iface)
+    X_table = to_table(X_dp)
     _assert_sua_iface_fields(X_dp, X_table)
 
     X_dp_from_table = from_table(
@@ -339,7 +339,7 @@ def test_sua_iface_interop_invalid_shape(dataframe, queue, data_shape):
         "Unable to convert from SUA interface: only 1D & 2D tensors are allowed"
     )
     with pytest.raises(ValueError, match=expected_err_msg):
-        to_table(X, sua_iface=sua_iface)
+        to_table(X)
 
 
 @pytest.mark.skipif(
@@ -368,7 +368,7 @@ def test_sua_iface_interop_unsupported_dtypes(dataframe, queue, dtype):
 
     expected_err_msg = "Unable to convert from SUA interface: unknown data type"
     with pytest.raises(ValueError, match=expected_err_msg):
-        to_table(X, sua_iface=sua_iface)
+        to_table(X)
 
 
 @pytest.mark.parametrize(
@@ -393,7 +393,7 @@ def test_to_table_non_contiguous_input(dataframe, queue):
     else:
         expected_err_msg = "Numpy input Could not convert Python object to onedal table."
     with pytest.raises(ValueError, match=expected_err_msg):
-        to_table(X, sua_iface=sua_iface)
+        to_table(X)
 
 
 @pytest.mark.skipif(
@@ -411,4 +411,4 @@ def test_sua_iface_interop_if_no_dpc_backend(dataframe, queue, dtype):
 
     expected_err_msg = "SYCL usm array conversion to table requires the DPC backend"
     with pytest.raises(RuntimeError, match=expected_err_msg):
-        to_table(X, sua_iface=sua_iface)
+        to_table(X)
diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py
index 4035832d37..6e7fdb72b5 100644
--- a/sklearnex/tests/test_memory_usage.py
+++ b/sklearnex/tests/test_memory_usage.py
@@ -142,8 +142,8 @@ class DummyEstimatorWithTableConversions(BaseEstimator):
 
         def fit(self, X, y=None):
             sua_iface, xp, _ = _get_sycl_namespace(X)
-            X_table = to_table(X, sua_iface=sua_iface)
-            y_table = to_table(y, sua_iface=sua_iface)
+            X_table = to_table(X)
+            y_table = to_table(y)
             # The presence of the fitted attributes (ending with a trailing
             # underscore) is required for the correct check. The cleanup of
             # the memory will occur at the estimator instance deletion.
@@ -160,7 +160,7 @@ def predict(self, X):
             # fitted attributes (ending with a trailing underscore).
             check_is_fitted(self)
             sua_iface, xp, _ = _get_sycl_namespace(X)
-            X_table = to_table(X, sua_iface=sua_iface)
+            X_table = to_table(X)
             returned_X = from_table(
                 X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
             )

From 28dc267ab319edf2cef611340c0ab634eae036c4 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sun, 3 Nov 2024 02:42:29 +0100
Subject: [PATCH 047/131] isort and black

---
 onedal/datatypes/_data_conversion.py | 3 ++-
 onedal/datatypes/table.cpp           | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index 2ef6903041..c08196f1d6 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -36,9 +36,11 @@ def _apply_and_pass(func, *args, **kwargs):
         return func(args[0], **kwargs)
     return tuple(map(lambda arg: func(arg, **kwargs), args))
 
+
 def convert_one_to_table(arg):
     return _backend.to_table(arg if sp.issparse(arg) else make2d(arg))
 
+
 def to_table(*args):
     return _apply_and_pass(convert_one_to_table, *args)
 
@@ -136,4 +138,3 @@ def from_table(*args, sycl_queue=None, sua_iface=None, xp=None):
     return _apply_and_pass(
         convert_one_from_table, *args, sycl_queue=sycl_queue, sua_iface=sua_iface, xp=xp
     )
-
diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp
index ce0f15936b..113d881228 100644
--- a/onedal/datatypes/table.cpp
+++ b/onedal/datatypes/table.cpp
@@ -92,7 +92,6 @@ ONEDAL_PY_INIT_MODULE(table) {
         auto* obj_ptr = convert_to_pyobject(t);
         return obj_ptr;
     });
-
 }
 
 } // namespace oneapi::dal::python

From 2f85fd4713535424395acfe5d0f72d1451c27d16 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 3 Nov 2024 08:19:57 +0100
Subject: [PATCH 048/131] Update test_memory_usage.py

---
 sklearnex/tests/test_memory_usage.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py
index 6e7fdb72b5..6e3ef2b3f7 100644
--- a/sklearnex/tests/test_memory_usage.py
+++ b/sklearnex/tests/test_memory_usage.py
@@ -142,6 +142,14 @@ class DummyEstimatorWithTableConversions(BaseEstimator):
 
         def fit(self, X, y=None):
             sua_iface, xp, _ = _get_sycl_namespace(X)
+            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
+            assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']
+            if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']):
+                X = xp.copy(X)
+            if not (y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']):
+                y = xp.copy(y)
+            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
+            assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']
             X_table = to_table(X)
             y_table = to_table(y)
             # The presence of the fitted attributes (ending with a trailing
@@ -160,6 +168,10 @@ def predict(self, X):
             # fitted attributes (ending with a trailing underscore).
             check_is_fitted(self)
             sua_iface, xp, _ = _get_sycl_namespace(X)
+            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
+            if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']):
+                X = xp.copy(X)
+            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
             X_table = to_table(X)
             returned_X = from_table(
                 X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp

From 8659248f70dc78cc94058690e217fa6383747b9b Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sun, 3 Nov 2024 09:19:39 +0100
Subject: [PATCH 049/131] format

---
 sklearnex/tests/test_memory_usage.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py
index 6e3ef2b3f7..214c03a6ba 100644
--- a/sklearnex/tests/test_memory_usage.py
+++ b/sklearnex/tests/test_memory_usage.py
@@ -142,14 +142,14 @@ class DummyEstimatorWithTableConversions(BaseEstimator):
 
         def fit(self, X, y=None):
             sua_iface, xp, _ = _get_sycl_namespace(X)
-            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
-            assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']
-            if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']):
+            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
+            assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]
+            if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]):
                 X = xp.copy(X)
-            if not (y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']):
+            if not (y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]):
                 y = xp.copy(y)
-            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
-            assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']
+            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
+            assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]
             X_table = to_table(X)
             y_table = to_table(y)
             # The presence of the fitted attributes (ending with a trailing
@@ -168,10 +168,10 @@ def predict(self, X):
             # fitted attributes (ending with a trailing underscore).
             check_is_fitted(self)
             sua_iface, xp, _ = _get_sycl_namespace(X)
-            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
-            if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']):
+            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
+            if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]):
                 X = xp.copy(X)
-            assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']
+            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
             X_table = to_table(X)
             returned_X = from_table(
                 X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp

From 3827d6f38cfcd5ef065d8d6a3ea34bc749de436a Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 3 Nov 2024 11:01:26 +0100
Subject: [PATCH 050/131] Update _data_conversion.py

---
 onedal/datatypes/_data_conversion.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index c08196f1d6..0deacf4c74 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -24,8 +24,9 @@
 
 def make2d(X):
     # generalized for array-like inputs
+    # dpnp -1 indexing is broken, use size
     if hasattr(X, "reshape") and hasattr(X, "ndim") and X.ndim == 1:
-        return X.reshape((-1, 1))
+        return X.reshape((X.size, 1))
     if np.isscalar(X):
         return np.atleast_2d(X)
     return X

From 55fa7d214f7a2f0398f1a83a7961a8491c587269 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 3 Nov 2024 12:28:38 +0100
Subject: [PATCH 051/131] Update _data_conversion.py

---
 onedal/datatypes/_data_conversion.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index 0deacf4c74..353fef7e9c 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -22,16 +22,6 @@
 from onedal import _backend, _is_dpc_backend
 
 
-def make2d(X):
-    # generalized for array-like inputs
-    # dpnp -1 indexing is broken, use size
-    if hasattr(X, "reshape") and hasattr(X, "ndim") and X.ndim == 1:
-        return X.reshape((X.size, 1))
-    if np.isscalar(X):
-        return np.atleast_2d(X)
-    return X
-
-
 def _apply_and_pass(func, *args, **kwargs):
     if len(args) == 1:
         return func(args[0], **kwargs)
@@ -39,7 +29,7 @@ def _apply_and_pass(func, *args, **kwargs):
 
 
 def convert_one_to_table(arg):
-    return _backend.to_table(arg if sp.issparse(arg) else make2d(arg))
+    return _backend.to_table(np.atleast_2d(arg) if np.isscalar(arg) else arg)
 
 
 def to_table(*args):

From 175cd7899f2a3851c60cd1964c7f7fe1f48712f3 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 3 Nov 2024 13:33:34 +0100
Subject: [PATCH 052/131] Update test_validation.py

---
 onedal/utils/tests/test_validation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index 5788a9ccc3..6f9f1c383f 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -78,12 +78,13 @@ def test_assert_finite_random_location(
 ):
     rand.seed(seed)
     X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
-    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
 
     if check:
         loc = rand.randint(0, X.size - 1)
         X.reshape((-1,))[loc] = float(check)
 
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+
     if check is None or (allow_nan and check == "NaN"):
         _assert_all_finite(X, allow_nan=allow_nan)
     else:
@@ -103,12 +104,13 @@ def test_assert_finite_random_shape_and_location(
     lb, ub = 2, 1048576  # lb is a patching condition, ub 2^20
     rand.seed(seed)
     X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype)
-    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
 
     if check:
         loc = rand.randint(0, X.size - 1)
         X[loc] = float(check)
 
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+
     if check is None or (allow_nan and check == "NaN"):
         _assert_all_finite(X, allow_nan=allow_nan)
     else:

From 7016ad0871a5f4c5f1d0c53bad5709752a88361c Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Sun, 3 Nov 2024 14:33:38 +0100
Subject: [PATCH 053/131] remove unnecessary code

---
 onedal/datatypes/_data_conversion.py |  1 -
 sklearnex/tests/test_memory_usage.py | 12 ------------
 2 files changed, 13 deletions(-)

diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py
index 353fef7e9c..018b79524e 100644
--- a/onedal/datatypes/_data_conversion.py
+++ b/onedal/datatypes/_data_conversion.py
@@ -17,7 +17,6 @@
 import warnings
 
 import numpy as np
-import scipy.sparse as sp
 
 from onedal import _backend, _is_dpc_backend
 
diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py
index 214c03a6ba..6e7fdb72b5 100644
--- a/sklearnex/tests/test_memory_usage.py
+++ b/sklearnex/tests/test_memory_usage.py
@@ -142,14 +142,6 @@ class DummyEstimatorWithTableConversions(BaseEstimator):
 
         def fit(self, X, y=None):
             sua_iface, xp, _ = _get_sycl_namespace(X)
-            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
-            assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]
-            if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]):
-                X = xp.copy(X)
-            if not (y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]):
-                y = xp.copy(y)
-            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
-            assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]
             X_table = to_table(X)
             y_table = to_table(y)
             # The presence of the fitted attributes (ending with a trailing
@@ -168,10 +160,6 @@ def predict(self, X):
             # fitted attributes (ending with a trailing underscore).
             check_is_fitted(self)
             sua_iface, xp, _ = _get_sycl_namespace(X)
-            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
-            if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]):
-                X = xp.copy(X)
-            assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]
             X_table = to_table(X)
             returned_X = from_table(
                 X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp

From fb7375f796834d6dd6a2ed490bdcc38a018f80e3 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Tue, 19 Nov 2024 06:57:01 +0100
Subject: [PATCH 054/131] make reviewer changes

---
 onedal/utils/finiteness_checker.cpp   | 2 +-
 onedal/utils/tests/test_validation.py | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp
index 6bc6a2e66b..2b8d84bd6f 100644
--- a/onedal/utils/finiteness_checker.cpp
+++ b/onedal/utils/finiteness_checker.cpp
@@ -66,7 +66,7 @@ void init_compute_ops(py::module_& m) {
               using namespace finiteness_checker;
               using input_t = compute_input<Task>;
 
-              compute_ops ops(policy, input_t{ data}, params2desc{});
+              compute_ops ops(policy, input_t{ data }, params2desc{});
               return fptype2t{ method2t{ Task{}, ops } }(params);
           });
 }
diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index 6f9f1c383f..5f92a64bf7 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -19,7 +19,6 @@
 import numpy as np
 import numpy.random as rand
 import pytest
-from numpy.testing import assert_raises
 
 from onedal.tests.utils._dataframes_support import (
     _convert_to_dataframe,
@@ -88,7 +87,9 @@ def test_assert_finite_random_location(
     if check is None or (allow_nan and check == "NaN"):
         _assert_all_finite(X, allow_nan=allow_nan)
     else:
-        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        with pytest.raises(ValueError, match=msg_err):
+            _assert_all_finite(X, allow_nan=allow_nan)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -114,4 +115,6 @@ def test_assert_finite_random_shape_and_location(
     if check is None or (allow_nan and check == "NaN"):
         _assert_all_finite(X, allow_nan=allow_nan)
     else:
-        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        with pytest.raises(ValueError, match=msg_err):
+            _assert_all_finite(X, allow_nan=allow_nan)

From 30816bf546a8b5aa5470a34ec0b4e6c82577a3c9 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Tue, 19 Nov 2024 15:43:29 +0100
Subject: [PATCH 055/131] make dtype check change

---
 onedal/datatypes/table.cpp | 4 ++++
 onedal/utils/validation.py | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp
index 113d881228..634cc99a1d 100644
--- a/onedal/datatypes/table.cpp
+++ b/onedal/datatypes/table.cpp
@@ -72,6 +72,10 @@ ONEDAL_PY_INIT_MODULE(table) {
         const auto column_count = t.get_column_count();
         return py::make_tuple(row_count, column_count);
     });
+    table_obj.def_property_readonly("dtype", [](const table& t){
+        // returns a numpy dtype, even if source was not from numpy
+        return convert_dal_to_npy_type(t.get_metadata().get_data_type(0));
+    });
 
 #ifdef ONEDAL_DATA_PARALLEL
     define_sycl_usm_array_property(table_obj);
diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 5294483ac2..836dd84a75 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -447,12 +447,12 @@ def _is_csr(x):
 
 def _assert_all_finite(X, allow_nan=False, input_name=""):
     policy = _get_policy(None, X)
+    X_t = to_table(_convert_to_supported(policy, X))
     params = {
-        "fptype": "float" if X.dtype.name == "float32" else "double",
+        "fptype": "float" if X_t.dtype == np.float32 else "double",
         "method": "dense",
         "allow_nan": allow_nan,
     }
-    X_t = to_table(_convert_to_supported(policy, X))
     if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite:
         type_err = "infinity" if allow_nan else "NaN, infinity"
         padded_input_name = input_name + " " if input_name else ""

From abb3b1683f71fe758beec194795ab6a8b24545f3 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Tue, 19 Nov 2024 16:06:59 +0100
Subject: [PATCH 056/131] add sparse testing

---
 onedal/utils/tests/test_validation.py | 29 +++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index 5f92a64bf7..aefa1dbb36 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -19,6 +19,7 @@
 import numpy as np
 import numpy.random as rand
 import pytest
+import scipy.sparse as sp
 
 from onedal.tests.utils._dataframes_support import (
     _convert_to_dataframe,
@@ -118,3 +119,31 @@ def test_assert_finite_random_shape_and_location(
         msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
         with pytest.raises(ValueError, match=msg_err):
             _assert_all_finite(X, allow_nan=allow_nan)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
+@pytest.mark.parametrize("seed", [0, int(time.time())])
+def test_assert_finite_sparse(dtype, allow_nan, check, seed):
+    lb, ub = 2, 1048576  # lb is a patching condition, ub 2^20
+    rand.seed(seed)
+    X = sp.random(
+        rand.randint(lb, ub),
+        rand.randint(lb, ub),
+        format="csr",
+        dtype=dtype,
+        random_state=rand.default_rng(seed),
+    )
+
+    if check:
+        locx = rand.randint(0, X.shape[0] - 1)
+        locy = rand.randint(0, X.shape[1] - 1)
+        X[locx, locy] = float(check)
+
+    if check is None or (allow_nan and check == "NaN"):
+        assert_all_finite(X, allow_nan=allow_nan)
+    else:
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        with pytest.raises(ValueError, match=msg_err):
+            assert_all_finite(X, allow_nan=allow_nan)

From 97aef73e5866db07206fdf47571f9fb94f93185c Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Tue, 19 Nov 2024 17:06:17 +0100
Subject: [PATCH 057/131] try again

---
 onedal/datatypes/table.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp
index 634cc99a1d..a06a08710d 100644
--- a/onedal/datatypes/table.cpp
+++ b/onedal/datatypes/table.cpp
@@ -74,7 +74,7 @@ ONEDAL_PY_INIT_MODULE(table) {
     });
     table_obj.def_property_readonly("dtype", [](const table& t){
         // returns a numpy dtype, even if source was not from numpy
-        return convert_dal_to_npy_type(t.get_metadata().get_data_type(0));
+        return py::dtype(convert_dal_to_npy_type(t.get_metadata().get_data_type(0)));
     });
 
 #ifdef ONEDAL_DATA_PARALLEL

From 6e29651587f42226b06c2d733d386a0bc19e0168 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Tue, 19 Nov 2024 17:29:19 +0100
Subject: [PATCH 058/131] try again

---
 onedal/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index aefa1dbb36..d953038f33 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -126,7 +126,7 @@ def test_assert_finite_random_shape_and_location(
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 def test_assert_finite_sparse(dtype, allow_nan, check, seed):
-    lb, ub = 2, 1048576  # lb is a patching condition, ub 2^20
+    lb, ub = 2, 256
     rand.seed(seed)
     X = sp.random(
         rand.randint(lb, ub),

From 59363a8126643a1eb5aff981d1d7ce09cdbf711b Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Tue, 19 Nov 2024 17:30:46 +0100
Subject: [PATCH 059/131] try again

---
 onedal/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index d953038f33..7662f486f3 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -126,7 +126,7 @@ def test_assert_finite_random_shape_and_location(
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 def test_assert_finite_sparse(dtype, allow_nan, check, seed):
-    lb, ub = 2, 256
+    lb, ub = 2, 2056
     rand.seed(seed)
     X = sp.random(
         rand.randint(lb, ub),

From 12de7038d719510df8043ae3dbce216afb39c6b2 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 07:18:21 +0100
Subject: [PATCH 060/131] temporary commit

---
 sklearnex/utils/validation.py | 40 ++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index b2d1898643..e41dec4a18 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -14,4 +14,42 @@
 # limitations under the License.
 # ===============================================================================
 
-from daal4py.sklearn.utils.validation import _assert_all_finite
+import scipy.sparse as sp
+from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite
+from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+from daal4py.sklearn._utils import sklearn_check_version
+
+if sklearn_check_version("1.6"):
+    from sklearn.utils.validation import validate_data as _sklearn_validate_data
+    _finite_keyword = "ensure_all_finite"
+
+else:
+    from sklearn.base import BaseEstimator
+    _sklearn_validate_data = BaseEstimator._validate_data
+    _finite_keyword = "force_all_finite"
+
+
+
+def validate_data(*args, **kwargs):
+    # force finite check to not occur in sklearn, default is True
+    force_all_finite = _finite_keyword not in kwargs or kwargs[_finite_keyword]
+    kwargs[_finite_keyword] = False
+    out = _sklearn_validate_data(*args, **kwargs)
+    if force_all_finite:
+        # run local finite check
+        for arg in out:
+            assert_all_finite(arg)
+    return out
+
+
+def assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    _assert_all_finite(
+        X.data if sp.issparse(X) else X,
+        allow_nan=allow_nan,
+        input_name=input_name,
+    )
\ No newline at end of file

From 07ec3d88ca0a5754edcf42a060ce03f1ab438dd7 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 10:58:56 +0100
Subject: [PATCH 061/131] first attempt

---
 sklearnex/utils/validation.py | 137 +++++++++++++++++++++++++++++++---
 1 file changed, 125 insertions(+), 12 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index e41dec4a18..16b398380e 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -16,30 +16,107 @@
 
 import scipy.sparse as sp
 from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite
-from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+
 from daal4py.sklearn._utils import sklearn_check_version
+from onedal.utils._array_api import _is_numpy_namespace
+from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+
+from ._array_api import get_namespace
 
 if sklearn_check_version("1.6"):
     from sklearn.utils.validation import validate_data as _sklearn_validate_data
+
     _finite_keyword = "ensure_all_finite"
 
 else:
     from sklearn.base import BaseEstimator
+
     _sklearn_validate_data = BaseEstimator._validate_data
     _finite_keyword = "force_all_finite"
 
 
+def _is_contiguous(X):
+    # array_api does not have a `strides` or `flags` attribute for testing memory
+    # order. When dlpack support is brought in for oneDAL, the dlpack object can
+    # then be inspected and this must be updated. _is_contiguous is therefore
+    # conservative in verifying attributes and does not support array_api. This
+    # will block onedal_assert_all_finite from being used for array api inputs.
+    if hasattr(X, "flags") and X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]:
+        return True
+    return False
 
-def validate_data(*args, **kwargs):
-    # force finite check to not occur in sklearn, default is True
-    force_all_finite = _finite_keyword not in kwargs or kwargs[_finite_keyword]
-    kwargs[_finite_keyword] = False
-    out = _sklearn_validate_data(*args, **kwargs)
-    if force_all_finite:
-        # run local finite check
-        for arg in out:
-            assert_all_finite(arg)
-    return out
+
+def _assert_all_finite_core(X, *, xp, allow_nan, input_name=""):
+    # This is a reproduction of code from sklearn.utils.validation
+    # necessary for older sklearn versions (<1.2) and for dpnp inputs
+    # which do not conform to the array_api standard, and cannot be
+    # checked in sklearn.
+    first_pass_isfinite = xp.isfinite(xp.sum(X))
+    if first_pass_isfinite:
+        return
+
+    has_inf = xp.any(xp.isinf(X))
+    has_nan_error = False if allow_nan else xp.any(xp.isnan(X))
+    if has_inf or has_nan_error:
+        type_err = "infinity" if allow_nan else "NaN, infinity"
+        padded_input_name = input_name + " " if input_name else ""
+        msg_err = f"Input {padded_input_name}contains {type_err}."
+        raise ValueError(msg_err)
+
+
+if sklearn_check_version("1.2"):
+
+    def _array_api_assert_all_finite(
+        X, *, xp, is_array_api_compliant, allow_nan=False, input_name=""
+    ):
+        if _is_numpy_namespace(xp) or is_array_api_compliant:
+            _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
+        elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
+            return
+        # handle dpnp inputs
+        _assert_all_finite_core(X, xp, allow_nan, input_name=input_name)
+
+else:
+
+    def _array_api_assert_all_finite(
+        X, xp, is_array_api_compliant, *, allow_nan=False, input_name=""
+    ):
+
+        if _is_numpy_namespace(xp):
+            _sklearn_assert_all_finite(X, allow_nan, input_name=input_name)
+        elif is_array_api_compliant and not xp.isdtype(
+            X, ("real floating", "complex floating")
+        ):
+            return
+        elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
+            return
+
+        # handle array_api and dpnp inputs
+        _assert_all_finite_core(X, xp, allow_nan, input_name=input_name)
+
+
+def _assert_all_finite(
+    X,
+    *,
+    allow_nan=False,
+    input_name="",
+):
+    # array_api compliance in sklearn varies betweeen the support sklearn versions
+    # therefore a separate check matching sklearn's assert_all_finite is necessary
+    # when the data is not float32 or float64 but of a float type. The onedal
+    # assert_all_finite is only for float32 and float64 contiguous arrays.
+
+    # initial match to daal4py, can be optimized later
+    xp, is_array_api_compliant = get_namespace(X)
+    if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X):
+
+        # all non-numpy arrays for sklearn 1.0 and dpnp for sklearn are not handeled properly
+        # separate function for import-time sklearn version check
+        _array_api_assert_all_finite(
+            X, xp, is_array_api_compliant, allow_nan=allow_nan, input_name=input_name
+        )
+    else:
+        _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
 
 
 def assert_all_finite(
@@ -52,4 +129,40 @@ def assert_all_finite(
         X.data if sp.issparse(X) else X,
         allow_nan=allow_nan,
         input_name=input_name,
-    )
\ No newline at end of file
+    )
+
+
+def validate_data(
+    _estimator,
+    /,
+    X="no_validation",
+    y="no_validation",
+    reset=True,
+    validate_separately=False,
+    skip_check_array=False,
+    **check_params,
+):
+    # force finite check to not occur in sklearn, default is True
+    # `ensure_all_finite` is the most up-to-date keyword name in sklearn
+    # _finite_keyword provides backward compatability for `force_all_finite`
+    force_all_finite = (
+        "ensure_all_finite" not in check_params or check_params["ensure_all_finite"]
+    )
+    check_params[_finite_keyword] = False
+    out = _sklearn_validate_data(
+        _estimator,
+        X=X,
+        y=y,
+        reset=reset,
+        validate_separate=validate_separately,
+        skip_check_array=skip_check_array,
+        **check_params,
+    )
+    if force_all_finite:
+        # run local finite check
+        arg = iter(out)
+        if not isinstance(X, str) or X != "no_validation":
+            assert_all_finite(next(arg), input_name="X")
+        if y is not None or not isinstance(y, str) or y != "no_validation":
+            assert_all_finite(next(arg), input_name="y")
+    return out

From 32c565d42ad0d07ed37d5a2ea264c32b25510676 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 11:18:13 +0100
Subject: [PATCH 062/131] missing change?

---
 sklearnex/utils/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py
index 4c3fe21154..686e089adf 100755
--- a/sklearnex/utils/__init__.py
+++ b/sklearnex/utils/__init__.py
@@ -14,6 +14,6 @@
 # limitations under the License.
 # ===============================================================================
 
-from .validation import _assert_all_finite
+from .validation import assert_all_finite
 
-__all__ = ["_assert_all_finite"]
+__all__ = ["assert_all_finite"]

From 5093ed7d8e35559c7966d3e4fd573cd2a6f19b80 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 12:15:56 +0100
Subject: [PATCH 063/131] modify DummyEstimator for testing

---
 sklearnex/tests/test_memory_usage.py | 44 +++++-----------------------
 sklearnex/tests/utils/__init__.py    |  2 ++
 sklearnex/tests/utils/base.py        | 35 ++++++++++++++++++++++
 3 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py
index 6e7fdb72b5..570e061040 100644
--- a/sklearnex/tests/test_memory_usage.py
+++ b/sklearnex/tests/test_memory_usage.py
@@ -38,7 +38,12 @@
 from onedal.utils._array_api import _get_sycl_namespace
 from onedal.utils._dpep_helpers import dpctl_available, dpnp_available
 from sklearnex import config_context
-from sklearnex.tests.utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
+from sklearnex.tests.utils import (
+    PATCHED_FUNCTIONS,
+    PATCHED_MODELS,
+    SPECIAL_INSTANCES,
+    DummyEstimator,
+)
 from sklearnex.utils._array_api import get_namespace
 
 if dpctl_available:
@@ -132,41 +137,6 @@ def gen_functions(functions):
 ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray}
 
 
-if _is_dpc_backend:
-
-    from sklearn.utils.validation import check_is_fitted
-
-    from onedal.datatypes import from_table, to_table
-
-    class DummyEstimatorWithTableConversions(BaseEstimator):
-
-        def fit(self, X, y=None):
-            sua_iface, xp, _ = _get_sycl_namespace(X)
-            X_table = to_table(X)
-            y_table = to_table(y)
-            # The presence of the fitted attributes (ending with a trailing
-            # underscore) is required for the correct check. The cleanup of
-            # the memory will occur at the estimator instance deletion.
-            self.x_attr_ = from_table(
-                X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
-            )
-            self.y_attr_ = from_table(
-                y_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
-            )
-            return self
-
-        def predict(self, X):
-            # Checks if the estimator is fitted by verifying the presence of
-            # fitted attributes (ending with a trailing underscore).
-            check_is_fitted(self)
-            sua_iface, xp, _ = _get_sycl_namespace(X)
-            X_table = to_table(X)
-            returned_X = from_table(
-                X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
-            )
-            return returned_X
-
-
 def gen_clsf_data(n_samples, n_features, dtype=None):
     data, label = make_classification(
         n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777
@@ -370,7 +340,7 @@ def test_table_conversions_memory_leaks(dataframe, queue, order, data_shape, dty
         pytest.skip("SYCL device memory leak check requires the level zero sysman")
 
     _kfold_function_template(
-        DummyEstimatorWithTableConversions,
+        DummyEstimator,
         dataframe,
         data_shape,
         queue,
diff --git a/sklearnex/tests/utils/__init__.py b/sklearnex/tests/utils/__init__.py
index 60ca67fa37..db728fe913 100644
--- a/sklearnex/tests/utils/__init__.py
+++ b/sklearnex/tests/utils/__init__.py
@@ -21,6 +21,7 @@
     SPECIAL_INSTANCES,
     UNPATCHED_FUNCTIONS,
     UNPATCHED_MODELS,
+    DummyEstimator,
     _get_processor_info,
     call_method,
     gen_dataset,
@@ -39,6 +40,7 @@
     "gen_models_info",
     "gen_dataset",
     "sklearn_clone_dict",
+    "DummyEstimator",
 ]
 
 _IS_INTEL = "GenuineIntel" in _get_processor_info()
diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 1949519585..248eb85a59 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -32,7 +32,9 @@
 )
 from sklearn.datasets import load_diabetes, load_iris
 from sklearn.neighbors._base import KNeighborsMixin
+from sklearn.utils.validation import check_is_fitted
 
+from onedal.datatypes import from_table, to_table
 from onedal.tests.utils._dataframes_support import _convert_to_dataframe
 from sklearnex import get_patch_map, patch_sklearn, sklearn_is_patched, unpatch_sklearn
 from sklearnex.basic_statistics import BasicStatistics, IncrementalBasicStatistics
@@ -44,6 +46,7 @@
     NearestNeighbors,
 )
 from sklearnex.svm import SVC, NuSVC
+from sklearnex.utils.validation import validate_data
 
 
 def _load_all_models(with_sklearnex=True, estimator=True):
@@ -369,3 +372,35 @@ def _get_processor_info():
         )
 
     return proc
+
+
+class DummyEstimator(BaseEstimator):
+
+    def fit(self, X, y=None):
+        X_array, y_array = validate_data(self, X, y)
+
+        sua_iface, xp, _ = _get_sycl_namespace(X_array)
+        X_table = to_table(X_array)
+        y_table = to_table(y_array)
+        # The presence of the fitted attributes (ending with a trailing
+        # underscore) is required for the correct check. The cleanup of
+        # the memory will occur at the estimator instance deletion.
+        self.x_attr_ = from_table(
+            X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+        )
+        self.y_attr_ = from_table(
+            y_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+        )
+        return self
+
+    def predict(self, X):
+        # Checks if the estimator is fitted by verifying the presence of
+        # fitted attributes (ending with a trailing underscore).
+        check_is_fitted(self)
+        X_array = validate_data(self, X, reset=False)
+        sua_iface, xp, _ = _get_sycl_namespace(X_array)
+        X_table = to_table(X_array)
+        returned_X = from_table(
+            X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+        )
+        return returned_X

From f04deba338611c4367d3c7ca91f9fcfaf3e1c432 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 12:21:32 +0100
Subject: [PATCH 064/131] generalize DummyEstimator

---
 sklearnex/tests/utils/base.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 248eb85a59..1d4eb3d0cf 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -385,12 +385,19 @@ def fit(self, X, y=None):
         # The presence of the fitted attributes (ending with a trailing
         # underscore) is required for the correct check. The cleanup of
         # the memory will occur at the estimator instance deletion.
-        self.x_attr_ = from_table(
-            X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
-        )
-        self.y_attr_ = from_table(
-            y_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
-        )
+        if sua_iface:
+            self.x_attr_ = from_table(
+                X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+            )
+            self.y_attr_ = from_table(
+                y_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+            )
+        else:
+            self.x_attr = from_table(X_table)
+            self.y_attr = from_table(y_table)
+
+        assert type(self.x_attr) == type(X)
+
         return self
 
     def predict(self, X):
@@ -400,7 +407,13 @@ def predict(self, X):
         X_array = validate_data(self, X, reset=False)
         sua_iface, xp, _ = _get_sycl_namespace(X_array)
         X_table = to_table(X_array)
-        returned_X = from_table(
-            X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
-        )
+        if sua_iface:
+            returned_X = from_table(
+                X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+            )
+        else:
+            returned_X = from_table(X_table)
+
+        assert type(returned_X) == type(X)
+
         return returned_X

From 740a5e762788d989186222b79c9f467d4c0973c4 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 12:42:42 +0100
Subject: [PATCH 065/131] switch test

---
 sklearnex/utils/tests/test_finite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 2874ec3400..eaa39fe2c0 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -21,7 +21,7 @@
 import pytest
 from numpy.testing import assert_raises
 
-from sklearnex.utils import _assert_all_finite
+from sklearnex.utils import assert_all_finite
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])

From 27050bd5a4329dcc30d8f9ec39efce6212cd8694 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 12:43:23 +0100
Subject: [PATCH 066/131] further testing changes

---
 sklearnex/utils/tests/test_finite.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index eaa39fe2c0..487bb39369 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -65,7 +65,7 @@ def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed):
         X.reshape((-1,))[loc] = float(check)
 
     if check is None or (allow_nan and check == "NaN"):
-        _assert_all_finite(X, allow_nan=allow_nan)
+        assert_all_finite(X, allow_nan=allow_nan)
     else:
         assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
 
@@ -84,6 +84,6 @@ def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed):
         X[loc] = float(check)
 
     if check is None or (allow_nan and check == "NaN"):
-        _assert_all_finite(X, allow_nan=allow_nan)
+        assert_all_finite(X, allow_nan=allow_nan)
     else:
-        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
+        assert_raises(ValueError, assert_all_finite, X, allow_nan=allow_nan)

From 53c8f7b7152d53019819fe7cbb30b382cf7b4e66 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 13:34:29 +0100
Subject: [PATCH 067/131] add initial validate_data test, will be refactored

---
 sklearnex/utils/tests/test_finite.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 487bb39369..6468fde2cc 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -21,6 +21,8 @@
 import pytest
 from numpy.testing import assert_raises
 
+from onedal.tests.utils._dataframes_support import get_dataframes_and_queues
+from sklearnex.tests.utils import DummyEstimator, gen_dataset
 from sklearnex.utils import assert_all_finite
 
 
@@ -39,7 +41,7 @@
 def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
     X = np.array(shape, dtype=dtype)
     X.fill(np.finfo(dtype).max)
-    _assert_all_finite(X, allow_nan=allow_nan)
+    assert_all_finite(X, allow_nan=allow_nan)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -67,7 +69,7 @@ def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed):
     if check is None or (allow_nan and check == "NaN"):
         assert_all_finite(X, allow_nan=allow_nan)
     else:
-        assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan)
+        assert_raises(ValueError, assert_all_finite, X, allow_nan=allow_nan)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -87,3 +89,13 @@ def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed):
         assert_all_finite(X, allow_nan=allow_nan)
     else:
         assert_raises(ValueError, assert_all_finite, X, allow_nan=allow_nan)
+
+
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_validate_data(dtype, dataframe, queue):
+    est = DummyEstimator()
+    X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
+    est.fit(X, y)
+    output = est.predict(X)
+    assert type(X) == type(output)

From 90f59c442021b4c529e64ef9f4844296f412c014 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 15:10:04 +0100
Subject: [PATCH 068/131] fixes for CI

---
 sklearnex/utils/validation.py | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 16b398380e..7bcfc3fdf6 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -67,7 +67,7 @@ def _assert_all_finite_core(X, *, xp, allow_nan, input_name=""):
 if sklearn_check_version("1.2"):
 
     def _array_api_assert_all_finite(
-        X, *, xp, is_array_api_compliant, allow_nan=False, input_name=""
+        X, xp, is_array_api_compliant, *, allow_nan=False, input_name=""
     ):
         if _is_numpy_namespace(xp) or is_array_api_compliant:
             _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
@@ -137,26 +137,18 @@ def validate_data(
     /,
     X="no_validation",
     y="no_validation",
-    reset=True,
-    validate_separately=False,
-    skip_check_array=False,
-    **check_params,
+    **kwargs,
 ):
     # force finite check to not occur in sklearn, default is True
     # `ensure_all_finite` is the most up-to-date keyword name in sklearn
     # _finite_keyword provides backward compatability for `force_all_finite`
-    force_all_finite = (
-        "ensure_all_finite" not in check_params or check_params["ensure_all_finite"]
-    )
-    check_params[_finite_keyword] = False
+    force_all_finite = "ensure_all_finite" not in kwargs or kwargs["ensure_all_finite"]
+    kwargs[_finite_keyword] = False
     out = _sklearn_validate_data(
         _estimator,
         X=X,
         y=y,
-        reset=reset,
-        validate_separate=validate_separately,
-        skip_check_array=skip_check_array,
-        **check_params,
+        **kwargs,
     )
     if force_all_finite:
         # run local finite check

From 7f170e2efc494d66b1a7b9b1f29c87eb1c3f9edf Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 20 Nov 2024 16:36:38 +0100
Subject: [PATCH 069/131] Update validation.py

---
 sklearnex/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 7bcfc3fdf6..0fc31d53c0 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -1,4 +1,4 @@
-# ===============================================================================
+the# ===============================================================================
 # Copyright 2022 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,7 +46,7 @@ def _is_contiguous(X):
     return False
 
 
-def _assert_all_finite_core(X, *, xp, allow_nan, input_name=""):
+def _assert_all_finite_core(X, xp, allow_nan, *, input_name=""):
     # This is a reproduction of code from sklearn.utils.validation
     # necessary for older sklearn versions (<1.2) and for dpnp inputs
     # which do not conform to the array_api standard, and cannot be

From 81e2bbc763b21bdd29b40e1a72c1ac41355de569 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 20 Nov 2024 16:54:08 +0100
Subject: [PATCH 070/131] Update validation.py

---
 sklearnex/utils/validation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 0fc31d53c0..3e65223331 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -1,4 +1,4 @@
-the# ===============================================================================
+# ===============================================================================
 # Copyright 2022 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,7 +46,7 @@ def _is_contiguous(X):
     return False
 
 
-def _assert_all_finite_core(X, xp, allow_nan, *, input_name=""):
+def _assert_all_finite_core(X, xp, *, allow_nan=False, input_name=""):
     # This is a reproduction of code from sklearn.utils.validation
     # necessary for older sklearn versions (<1.2) and for dpnp inputs
     # which do not conform to the array_api standard, and cannot be
@@ -74,7 +74,7 @@ def _array_api_assert_all_finite(
         elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
             return
         # handle dpnp inputs
-        _assert_all_finite_core(X, xp, allow_nan, input_name=input_name)
+        _assert_all_finite_core(X, xp, allow_nan=allow_nan, input_name=input_name)
 
 else:
 

From 116bdba61f83fda8d66566cfd6bbeb999ca532df Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 20 Nov 2024 17:18:33 +0100
Subject: [PATCH 071/131] Update test_memory_usage.py

---
 sklearnex/tests/test_memory_usage.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py
index 570e061040..be501be218 100644
--- a/sklearnex/tests/test_memory_usage.py
+++ b/sklearnex/tests/test_memory_usage.py
@@ -35,7 +35,6 @@
     get_dataframes_and_queues,
 )
 from onedal.tests.utils._device_selection import get_queues, is_dpctl_device_available
-from onedal.utils._array_api import _get_sycl_namespace
 from onedal.utils._dpep_helpers import dpctl_available, dpnp_available
 from sklearnex import config_context
 from sklearnex.tests.utils import (

From 076ebc401b4e9fbd872f9f1bc971bad1eb095f32 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 20 Nov 2024 17:19:15 +0100
Subject: [PATCH 072/131] Update base.py

---
 sklearnex/tests/utils/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 1d4eb3d0cf..35ba2811e2 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -36,6 +36,7 @@
 
 from onedal.datatypes import from_table, to_table
 from onedal.tests.utils._dataframes_support import _convert_to_dataframe
+from onedal.utils._array_api import _get_sycl_namespace
 from sklearnex import get_patch_map, patch_sklearn, sklearn_is_patched, unpatch_sklearn
 from sklearnex.basic_statistics import BasicStatistics, IncrementalBasicStatistics
 from sklearnex.linear_model import LogisticRegression

From e1d074365e51fa77fa75f6457c090346eb6d527a Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 20 Nov 2024 18:03:03 +0100
Subject: [PATCH 073/131] Update base.py

---
 sklearnex/tests/utils/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 35ba2811e2..0d58b5189b 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -396,7 +396,7 @@ def fit(self, X, y=None):
         else:
             self.x_attr = from_table(X_table)
             self.y_attr = from_table(y_table)
-
+        assert type(X_array) == type(X)
         assert type(self.x_attr) == type(X)
 
         return self
@@ -414,7 +414,7 @@ def predict(self, X):
             )
         else:
             returned_X = from_table(X_table)
-
+        assert type(X_array) == type(X)
         assert type(returned_X) == type(X)
 
         return returned_X

From f59cdd33d29321c3989d0b4415b99b5055408f23 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 22:38:30 +0100
Subject: [PATCH 074/131] improve tests

---
 sklearnex/tests/utils/base.py        | 23 +++-----
 sklearnex/utils/tests/test_finite.py | 83 +++++++++++++++++++++++-----
 2 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 0d58b5189b..e484423cfc 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -378,26 +378,24 @@ def _get_processor_info():
 class DummyEstimator(BaseEstimator):
 
     def fit(self, X, y=None):
-        X_array, y_array = validate_data(self, X, y)
+        X, y = validate_data(self, X, y)
 
-        sua_iface, xp, _ = _get_sycl_namespace(X_array)
-        X_table = to_table(X_array)
-        y_table = to_table(y_array)
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        X_table = to_table(X)
+        y_table = to_table(y)
         # The presence of the fitted attributes (ending with a trailing
         # underscore) is required for the correct check. The cleanup of
         # the memory will occur at the estimator instance deletion.
         if sua_iface:
             self.x_attr_ = from_table(
-                X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+                X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
             )
             self.y_attr_ = from_table(
-                y_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+                y_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
             )
         else:
             self.x_attr = from_table(X_table)
             self.y_attr = from_table(y_table)
-        assert type(X_array) == type(X)
-        assert type(self.x_attr) == type(X)
 
         return self
 
@@ -405,16 +403,13 @@ def predict(self, X):
         # Checks if the estimator is fitted by verifying the presence of
         # fitted attributes (ending with a trailing underscore).
         check_is_fitted(self)
-        X_array = validate_data(self, X, reset=False)
-        sua_iface, xp, _ = _get_sycl_namespace(X_array)
-        X_table = to_table(X_array)
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        X_table = to_table(X)
         if sua_iface:
             returned_X = from_table(
-                X_table, sua_iface=sua_iface, sycl_queue=X_array.sycl_queue, xp=xp
+                X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
             )
         else:
             returned_X = from_table(X_table)
-        assert type(X_array) == type(X)
-        assert type(returned_X) == type(X)
 
         return returned_X
diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 6468fde2cc..5c3ee2d50e 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -19,11 +19,15 @@
 import numpy as np
 import numpy.random as rand
 import pytest
-from numpy.testing import assert_raises
 
-from onedal.tests.utils._dataframes_support import get_dataframes_and_queues
+from daal4py.sklearn._utils import sklearn_check_version
+from onedal.tests.utils._dataframes_support import (
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
+from sklearnex import config_context
 from sklearnex.tests.utils import DummyEstimator, gen_dataset
-from sklearnex.utils import assert_all_finite
+from sklearnex.utils import validate_data
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -39,9 +43,11 @@
 )
 @pytest.mark.parametrize("allow_nan", [False, True])
 def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
+    est = DummyEstimator()
     X = np.array(shape, dtype=dtype)
     X.fill(np.finfo(dtype).max)
-    assert_all_finite(X, allow_nan=allow_nan)
+    X_array = validate_data(est, X, allow_nan=allow_nan)
+    assert type(X_array) == type(X)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -58,7 +64,11 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
 @pytest.mark.parametrize("allow_nan", [False, True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
-def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed):
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+def test_validate_data_random_location(
+    dataframe, queue, dtype, shape, allow_nan, check, seed
+):
+    est = DummyEstimator()
     rand.seed(seed)
     X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
 
@@ -66,17 +76,29 @@ def test_assert_finite_random_location(dtype, shape, allow_nan, check, seed):
         loc = rand.randint(0, X.size - 1)
         X.reshape((-1,))[loc] = float(check)
 
+    X = _convert_to_dataframe(
+        X,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+
     if check is None or (allow_nan and check == "NaN"):
-        assert_all_finite(X, allow_nan=allow_nan)
+        validate_data(est, X, allow_nan=allow_nan)
     else:
-        assert_raises(ValueError, assert_all_finite, X, allow_nan=allow_nan)
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        with pytest.raises(ValueError, match=msg_err):
+            validate_data(est, X, allow_nan=allow_nan)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 @pytest.mark.parametrize("allow_nan", [False, True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
-def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed):
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+def test_validate_data_random_shape_and_location(
+    dataframe, queue, dtype, allow_nan, check, seed
+):
+    est = DummyEstimator()
     lb, ub = 32768, 1048576  # lb is a patching condition, ub 2^20
     rand.seed(seed)
     X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype)
@@ -85,17 +107,48 @@ def test_assert_finite_random_shape_and_location(dtype, allow_nan, check, seed):
         loc = rand.randint(0, X.size - 1)
         X[loc] = float(check)
 
+    X = _convert_to_dataframe(
+        X,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+
     if check is None or (allow_nan and check == "NaN"):
-        assert_all_finite(X, allow_nan=allow_nan)
+        validate_data(est, X)
     else:
-        assert_raises(ValueError, assert_all_finite, X, allow_nan=allow_nan)
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        with pytest.raises(ValueError, match=msg_err):
+            validate_data(est, X, allow_nan=allow_nan)
 
 
-@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_validate_data(dtype, dataframe, queue):
+@pytest.mark.parametrize("array_api_dispatch", [True, False])
+@pytest.mark.parametrize(
+    "dataframe, queue", get_dataframes_and_queues("numpy,dpctl,dpnp")
+)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
     est = DummyEstimator()
     X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
-    est.fit(X, y)
-    output = est.predict(X)
-    assert type(X) == type(output)
+
+    dispatch = {}
+    if sklearn_check_version("1.2"):
+        dispatch["array_api_dispatch"] = array_api_dispatch
+
+    with config_context(**dispatch):
+        validate_data(est, X, y)
+        est.fit(X, y)
+        X_array = validate_data(est, X, reset=False)
+        X_out = est.predict(X)
+
+    if (
+        sklearn_check_version("1.2") or dataframe != "array_api"
+    ) and dataframe != "pandas":
+        assert type(X) == type(
+            X_array
+        ), f"validate_data converted {type(X)} to {type(X_array)}"
+        assert type(X) == type(X_out), f"from_array converted {type(X)} to {type(X_out)}"
+    else:
+        # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
+        assert isinstance(X_array, np.ndarray)
+        assert isinstance(X_out, np.ndarray)

From 7f9ea25aceaff20983895aab9770311211fb9211 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 22:56:10 +0100
Subject: [PATCH 075/131] fix logic

---
 sklearnex/utils/tests/test_finite.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 5c3ee2d50e..9ddbed4d67 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -141,9 +141,9 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
         X_array = validate_data(est, X, reset=False)
         X_out = est.predict(X)
 
-    if (
-        sklearn_check_version("1.2") or dataframe != "array_api"
-    ) and dataframe != "pandas":
+    if dataframe != "pandas" and not (
+        dataframe == "array_api" and sklearn_check_version("1.2") and array_api_dispatch
+    ):
         assert type(X) == type(
             X_array
         ), f"validate_data converted {type(X)} to {type(X_array)}"

From 51247c050952481babace230e099f26750806ae5 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:00:48 +0100
Subject: [PATCH 076/131] fix logic

---
 sklearnex/utils/tests/test_finite.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 9ddbed4d67..cd400c855c 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -142,7 +142,9 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
         X_out = est.predict(X)
 
     if dataframe != "pandas" and not (
-        dataframe == "array_api" and sklearn_check_version("1.2") and array_api_dispatch
+        dataframe == "array_api"
+        and not sklearn_check_version("1.2")
+        and not array_api_dispatch
     ):
         assert type(X) == type(
             X_array

From 6e5c0efeae8743c2406cf0e89aca19197cc9654f Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:09:24 +0100
Subject: [PATCH 077/131] fix logic again

---
 sklearnex/utils/tests/test_finite.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index cd400c855c..9a789f274f 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -141,16 +141,15 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
         X_array = validate_data(est, X, reset=False)
         X_out = est.predict(X)
 
-    if dataframe != "pandas" and not (
+    if dataframe == "pandas" or (
         dataframe == "array_api"
-        and not sklearn_check_version("1.2")
-        and not array_api_dispatch
+        and not (sklearn_check_version("1.2") and array_api_dispatch)
     ):
+        # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
+        assert isinstance(X_array, np.ndarray)
+        assert isinstance(X_out, np.ndarray)
+    else:
         assert type(X) == type(
             X_array
         ), f"validate_data converted {type(X)} to {type(X_array)}"
         assert type(X) == type(X_out), f"from_array converted {type(X)} to {type(X_out)}"
-    else:
-        # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
-        assert isinstance(X_array, np.ndarray)
-        assert isinstance(X_out, np.ndarray)

From 8d47744f25c0b32e9b0ad639e772107710c56e98 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:21:14 +0100
Subject: [PATCH 078/131] rename file

---
 sklearnex/utils/tests/{test_finite.py => test_validation.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sklearnex/utils/tests/{test_finite.py => test_validation.py} (100%)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_validation.py
similarity index 100%
rename from sklearnex/utils/tests/test_finite.py
rename to sklearnex/utils/tests/test_validation.py

From 1ae9af5aa01ea34228e52e55f304b9c5e436e3fb Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:25:59 +0100
Subject: [PATCH 079/131] Revert "rename file"

This reverts commit 8d47744f25c0b32e9b0ad639e772107710c56e98.
---
 sklearnex/utils/tests/{test_validation.py => test_finite.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sklearnex/utils/tests/{test_validation.py => test_finite.py} (100%)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_finite.py
similarity index 100%
rename from sklearnex/utils/tests/test_validation.py
rename to sklearnex/utils/tests/test_finite.py

From bf9b46e84bdc0833463aa99ad7a61090fc7bbd30 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:26:43 +0100
Subject: [PATCH 080/131] remove duplication

---
 sklearnex/utils/tests/test_finite.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 9a789f274f..3fea947cd7 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -123,10 +123,7 @@ def test_validate_data_random_shape_and_location(
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 @pytest.mark.parametrize("array_api_dispatch", [True, False])
-@pytest.mark.parametrize(
-    "dataframe, queue", get_dataframes_and_queues("numpy,dpctl,dpnp")
-)
-@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
     est = DummyEstimator()
     X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]

From 3101c3fb0b5bbcc4f3a8386de28da538c5ed4467 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:41:44 +0100
Subject: [PATCH 081/131] fix imports

---
 sklearnex/utils/tests/test_finite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 3fea947cd7..d9d8d461fe 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -27,7 +27,7 @@
 )
 from sklearnex import config_context
 from sklearnex.tests.utils import DummyEstimator, gen_dataset
-from sklearnex.utils import validate_data
+from sklearnex.utils.validation import validate_data
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])

From ee799f60c000651eb828bd7586a91825706b644b Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 20 Nov 2024 23:42:45 +0100
Subject: [PATCH 082/131] Rename test_finite.py to test_validation.py

---
 sklearnex/utils/tests/{test_finite.py => test_validation.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sklearnex/utils/tests/{test_finite.py => test_validation.py} (100%)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_validation.py
similarity index 100%
rename from sklearnex/utils/tests/test_finite.py
rename to sklearnex/utils/tests/test_validation.py

From db4a6c6fe00883b42b8c580b11ecee8b169bc237 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 20 Nov 2024 23:43:15 +0100
Subject: [PATCH 083/131] Revert "Rename test_finite.py to test_validation.py"

This reverts commit ee799f60c000651eb828bd7586a91825706b644b.
---
 sklearnex/utils/tests/{test_validation.py => test_finite.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sklearnex/utils/tests/{test_validation.py => test_finite.py} (100%)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_finite.py
similarity index 100%
rename from sklearnex/utils/tests/test_validation.py
rename to sklearnex/utils/tests/test_finite.py

From b5acbac8782f6022eff6ee85425d593ce9826e6e Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 06:07:57 +0100
Subject: [PATCH 084/131] updates

---
 sklearnex/utils/tests/test_finite.py | 36 +++++++++++++++++-----------
 sklearnex/utils/validation.py        | 11 +++++----
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index d9d8d461fe..180b256771 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -41,12 +41,12 @@
         [1000, 1000],
     ],
 )
-@pytest.mark.parametrize("allow_nan", [False, True])
-def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
+@pytest.mark.parametrize("ensure_all_finite", [False, True])
+def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
     est = DummyEstimator()
     X = np.array(shape, dtype=dtype)
     X.fill(np.finfo(dtype).max)
-    X_array = validate_data(est, X, allow_nan=allow_nan)
+    X_array = validate_data(est, X, ensure_all_finite=ensure_all_finite)
     assert type(X_array) == type(X)
 
 
@@ -61,12 +61,12 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan):
         [1000, 1000],
     ],
 )
-@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("ensure_all_finite", [False, True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 def test_validate_data_random_location(
-    dataframe, queue, dtype, shape, allow_nan, check, seed
+    dataframe, queue, dtype, shape, ensure_all_finite, check, seed
 ):
     est = DummyEstimator()
     rand.seed(seed)
@@ -82,21 +82,25 @@ def test_validate_data_random_location(
         sycl_queue=queue,
     )
 
-    if check is None or (allow_nan and check == "NaN"):
-        validate_data(est, X, allow_nan=allow_nan)
+    if check is None or (ensure_all_finite and check == "NaN"):
+        validate_data(est, X, ensure_all_finite=ensure_all_finite)
     else:
-        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        msg_err = (
+            "Input contains "
+            + ("infinity" if ensure_all_finite else "NaN, infinity")
+            + "."
+        )
         with pytest.raises(ValueError, match=msg_err):
-            validate_data(est, X, allow_nan=allow_nan)
+            validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-@pytest.mark.parametrize("allow_nan", [False, True])
+@pytest.mark.parametrize("ensure_all_finite", [False, True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 def test_validate_data_random_shape_and_location(
-    dataframe, queue, dtype, allow_nan, check, seed
+    dataframe, queue, dtype, ensure_all_finite, check, seed
 ):
     est = DummyEstimator()
     lb, ub = 32768, 1048576  # lb is a patching condition, ub 2^20
@@ -113,12 +117,16 @@ def test_validate_data_random_shape_and_location(
         sycl_queue=queue,
     )
 
-    if check is None or (allow_nan and check == "NaN"):
+    if check is None or (ensure_all_finite and check == "NaN"):
         validate_data(est, X)
     else:
-        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        msg_err = (
+            "Input contains "
+            + ("infinity" if ensure_all_finite else "NaN, infinity")
+            + "."
+        )
         with pytest.raises(ValueError, match=msg_err):
-            validate_data(est, X, allow_nan=allow_nan)
+            validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 3e65223331..34bb988748 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -142,7 +142,9 @@ def validate_data(
     # force finite check to not occur in sklearn, default is True
     # `ensure_all_finite` is the most up-to-date keyword name in sklearn
     # _finite_keyword provides backward compatability for `force_all_finite`
-    force_all_finite = "ensure_all_finite" not in kwargs or kwargs["ensure_all_finite"]
+    ensure_all_finite = "ensure_all_finite" not in kwargs or kwargs.pop(
+        "ensure_all_finite"
+    )
     kwargs[_finite_keyword] = False
     out = _sklearn_validate_data(
         _estimator,
@@ -150,11 +152,12 @@ def validate_data(
         y=y,
         **kwargs,
     )
-    if force_all_finite:
+    if ensure_all_finite:
         # run local finite check
+        allow_nan = ensure_all_finite == "allow-nan"
         arg = iter(out)
         if not isinstance(X, str) or X != "no_validation":
-            assert_all_finite(next(arg), input_name="X")
+            assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
         if y is not None or not isinstance(y, str) or y != "no_validation":
-            assert_all_finite(next(arg), input_name="y")
+            assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y")
     return out

From ed57c15e7e08dee51970b4db316aaea16343d7c0 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Thu, 21 Nov 2024 06:14:53 +0100
Subject: [PATCH 085/131] Update validation.py

---
 sklearnex/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 34bb988748..e3dd92b7ed 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -142,7 +142,7 @@ def validate_data(
     # force finite check to not occur in sklearn, default is True
     # `ensure_all_finite` is the most up-to-date keyword name in sklearn
     # _finite_keyword provides backward compatability for `force_all_finite`
-    ensure_all_finite = "ensure_all_finite" not in kwargs or kwargs.pop(
+    ensure_all_finite = True if "ensure_all_finite" not in kwargs else kwargs.pop(
         "ensure_all_finite"
     )
     kwargs[_finite_keyword] = False

From 414f8979da5d44d8d0d19255d1b5f621733d8065 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 07:07:15 +0100
Subject: [PATCH 086/131] fixes for some test failures

---
 sklearnex/utils/tests/test_finite.py | 29 ++++++---------
 sklearnex/utils/validation.py        | 55 +++++++++++++++-------------
 2 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 180b256771..f75ff33301 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -41,11 +41,12 @@
         [1000, 1000],
     ],
 )
-@pytest.mark.parametrize("ensure_all_finite", [False, True])
+@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
 def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
     est = DummyEstimator()
     X = np.array(shape, dtype=dtype)
     X.fill(np.finfo(dtype).max)
+    X = np.atleast_2d(X)
     X_array = validate_data(est, X, ensure_all_finite=ensure_all_finite)
     assert type(X_array) == type(X)
 
@@ -61,7 +62,7 @@ def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
         [1000, 1000],
     ],
 )
-@pytest.mark.parametrize("ensure_all_finite", [False, True])
+@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
@@ -77,25 +78,22 @@ def test_validate_data_random_location(
         X.reshape((-1,))[loc] = float(check)
 
     X = _convert_to_dataframe(
-        X,
+        np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
     )
 
-    if check is None or (ensure_all_finite and check == "NaN"):
+    allow_nan = ensure_all_finite == "allow_nan"
+    if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X, ensure_all_finite=ensure_all_finite)
     else:
-        msg_err = (
-            "Input contains "
-            + ("infinity" if ensure_all_finite else "NaN, infinity")
-            + "."
-        )
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
         with pytest.raises(ValueError, match=msg_err):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-@pytest.mark.parametrize("ensure_all_finite", [False, True])
+@pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
@@ -112,19 +110,16 @@ def test_validate_data_random_shape_and_location(
         X[loc] = float(check)
 
     X = _convert_to_dataframe(
-        X,
+        np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
     )
 
-    if check is None or (ensure_all_finite and check == "NaN"):
+    allow_nan = ensure_all_finite == "allow_nan"
+    if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X)
     else:
-        msg_err = (
-            "Input contains "
-            + ("infinity" if ensure_all_finite else "NaN, infinity")
-            + "."
-        )
+        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
         with pytest.raises(ValueError, match=msg_err):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index e3dd92b7ed..804fafdb48 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -37,20 +37,20 @@
 
 def _is_contiguous(X):
     # array_api does not have a `strides` or `flags` attribute for testing memory
-    # order. When dlpack support is brought in for oneDAL, the dlpack object can
-    # then be inspected and this must be updated. _is_contiguous is therefore
-    # conservative in verifying attributes and does not support array_api. This
-    # will block onedal_assert_all_finite from being used for array api inputs.
+    # order. When dlpack support is brought in for oneDAL, the dlpack python capsule
+    # can then be inspected for strides and this must be updated. _is_contiguous is
+    # therefore conservative in verifying attributes and does not support array_api.
+    # This will block onedal_assert_all_finite from being used for array_api inputs.
     if hasattr(X, "flags") and X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]:
         return True
     return False
 
 
-def _assert_all_finite_core(X, xp, *, allow_nan=False, input_name=""):
-    # This is a reproduction of code from sklearn.utils.validation
-    # necessary for older sklearn versions (<1.2) and for dpnp inputs
-    # which do not conform to the array_api standard, and cannot be
-    # checked in sklearn.
+def _sycl_usm_assert_all_finite(X, xp, *, allow_nan=False, input_name=""):
+    # This is a reproduction of code from sklearn.utils.validation necessary for
+    # non-contiguous or non-fp32/fp64 dpctl inputs when sklearn version is <1.2 or
+    # for non-contiguous or non-fp32/fp64 dpnp inputs, as these cannot be checked
+    # for finiteness in sklearn nor onedal while preserving their object type.
     first_pass_isfinite = xp.isfinite(xp.sum(X))
     if first_pass_isfinite:
         return
@@ -66,7 +66,7 @@ def _assert_all_finite_core(X, xp, *, allow_nan=False, input_name=""):
 
 if sklearn_check_version("1.2"):
 
-    def _array_api_assert_all_finite(
+    def _general_assert_all_finite(
         X, xp, is_array_api_compliant, *, allow_nan=False, input_name=""
     ):
         if _is_numpy_namespace(xp) or is_array_api_compliant:
@@ -74,11 +74,11 @@ def _array_api_assert_all_finite(
         elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
             return
         # handle dpnp inputs
-        _assert_all_finite_core(X, xp, allow_nan=allow_nan, input_name=input_name)
+        _sycl_usm_assert_all_finite(X, xp, allow_nan=allow_nan, input_name=input_name)
 
 else:
 
-    def _array_api_assert_all_finite(
+    def _general_assert_all_finite(
         X, xp, is_array_api_compliant, *, allow_nan=False, input_name=""
     ):
 
@@ -90,9 +90,8 @@ def _array_api_assert_all_finite(
             return
         elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
             return
-
-        # handle array_api and dpnp inputs
-        _assert_all_finite_core(X, xp, allow_nan, input_name=input_name)
+        # handle dpctl and dpnp inputs
+        _sycl_usm_assert_all_finite(X, xp, allow_nan, input_name=input_name)
 
 
 def _assert_all_finite(
@@ -101,18 +100,22 @@ def _assert_all_finite(
     allow_nan=False,
     input_name="",
 ):
-    # array_api compliance in sklearn varies betweeen the support sklearn versions
-    # therefore a separate check matching sklearn's assert_all_finite is necessary
-    # when the data is not float32 or float64 but of a float type. The onedal
-    # assert_all_finite is only for float32 and float64 contiguous arrays.
-
-    # initial match to daal4py, can be optimized later
+    # unlike sklearnex, sklearn does not support sycl_usm_ndarrays by default
+    # therefore a separate finite check implementation matching sklearn's
+    # `_assert_all_finite` is necessary when the data is not float32 or float64 or
+    # non-contiguous. The onedal assert_all_finite is only for float32 and float64
+    # contiguous arrays.
+
+    # size check is an initial match to daal4py for performance reasons, can be
+    # optimized later
     xp, is_array_api_compliant = get_namespace(X)
     if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X):
 
-        # all non-numpy arrays for sklearn 1.0 and dpnp for sklearn are not handeled properly
-        # separate function for import-time sklearn version check
-        _array_api_assert_all_finite(
+        # all sycl_usm_ndarrays for sklearn < 1.2 and dpnp for sklearn > 1.2 are not
+        # handled properly, it calls a separate function for an import-time sklearn
+        # version check before possible hand-off to sklearn's _assert_all_finite or to
+        # _assert_all_finite_core.
+        _general_assert_all_finite(
             X, xp, is_array_api_compliant, allow_nan=allow_nan, input_name=input_name
         )
     else:
@@ -142,8 +145,8 @@ def validate_data(
     # force finite check to not occur in sklearn, default is True
     # `ensure_all_finite` is the most up-to-date keyword name in sklearn
     # _finite_keyword provides backward compatability for `force_all_finite`
-    ensure_all_finite = True if "ensure_all_finite" not in kwargs else kwargs.pop(
-        "ensure_all_finite"
+    ensure_all_finite = (
+        True if "ensure_all_finite" not in kwargs else kwargs.pop("ensure_all_finite")
     )
     kwargs[_finite_keyword] = False
     out = _sklearn_validate_data(

From 83253b3cba87bbec4e5a16b5a75519013e93a5b2 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 07:13:13 +0100
Subject: [PATCH 087/131] fix text

---
 sklearnex/utils/validation.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 804fafdb48..5e85bc559d 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -50,7 +50,7 @@ def _sycl_usm_assert_all_finite(X, xp, *, allow_nan=False, input_name=""):
     # This is a reproduction of code from sklearn.utils.validation necessary for
     # non-contiguous or non-fp32/fp64 dpctl inputs when sklearn version is <1.2 or
     # for non-contiguous or non-fp32/fp64 dpnp inputs, as these cannot be checked
-    # for finiteness in sklearn nor onedal while preserving their object type.
+    # for finiteness in onedal or by sklearn (while preserving their object type).
     first_pass_isfinite = xp.isfinite(xp.sum(X))
     if first_pass_isfinite:
         return
@@ -100,12 +100,6 @@ def _assert_all_finite(
     allow_nan=False,
     input_name="",
 ):
-    # unlike sklearnex, sklearn does not support sycl_usm_ndarrays by default
-    # therefore a separate finite check implementation matching sklearn's
-    # `_assert_all_finite` is necessary when the data is not float32 or float64 or
-    # non-contiguous. The onedal assert_all_finite is only for float32 and float64
-    # contiguous arrays.
-
     # size check is an initial match to daal4py for performance reasons, can be
     # optimized later
     xp, is_array_api_compliant = get_namespace(X)
@@ -114,7 +108,7 @@ def _assert_all_finite(
         # all sycl_usm_ndarrays for sklearn < 1.2 and dpnp for sklearn > 1.2 are not
         # handled properly, it calls a separate function for an import-time sklearn
         # version check before possible hand-off to sklearn's _assert_all_finite or to
-        # _assert_all_finite_core.
+        # _sycl_usm_assert_all_finite.
         _general_assert_all_finite(
             X, xp, is_array_api_compliant, allow_nan=allow_nan, input_name=input_name
         )

From b22e23a47d1cb88d94d71dc29cf61f2f3f39fcc3 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 08:22:54 +0100
Subject: [PATCH 088/131] fixes for some failures

---
 sklearnex/utils/tests/test_finite.py | 7 +++++--
 sklearnex/utils/validation.py        | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index f75ff33301..a790301a27 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -87,7 +87,9 @@ def test_validate_data_random_location(
     if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X, ensure_all_finite=ensure_all_finite)
     else:
-        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        msg_err = (
+            "Input X contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        )
         with pytest.raises(ValueError, match=msg_err):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
@@ -119,7 +121,8 @@ def test_validate_data_random_shape_and_location(
     if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X)
     else:
-        msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
+        type_err = "infinity" if allow_nan else "NaN, infinity"
+        msg_err = f"Input X contains {type_err}."
         with pytest.raises(ValueError, match=msg_err):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 5e85bc559d..61cb9acba8 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -153,8 +153,8 @@ def validate_data(
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
         arg = iter(out)
-        if not isinstance(X, str) or X != "no_validation":
+        if X != "no_validation":
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
-        if y is not None or not isinstance(y, str) or y != "no_validation":
+        if y is not None and y != "no_validation":
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y")
     return out

From 2f8ec169a563ccc1c0d6fadb9dc27ee68d25fec3 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 08:23:45 +0100
Subject: [PATCH 089/131] make consistent

---
 sklearnex/utils/tests/test_finite.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index a790301a27..157b79f6c7 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -87,9 +87,8 @@ def test_validate_data_random_location(
     if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X, ensure_all_finite=ensure_all_finite)
     else:
-        msg_err = (
-            "Input X contains " + ("infinity" if allow_nan else "NaN, infinity") + "."
-        )
+        type_err = "infinity" if allow_nan else "NaN, infinity"
+        msg_err = f"Input X contains {type_err}."
         with pytest.raises(ValueError, match=msg_err):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
 

From 1fd9973d018eb1b059c85c555216ce2e9377daae Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 09:21:14 +0100
Subject: [PATCH 090/131] fix bad logic

---
 sklearnex/utils/validation.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 61cb9acba8..996299f37b 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -94,7 +94,7 @@ def _general_assert_all_finite(
         _sycl_usm_assert_all_finite(X, xp, allow_nan, input_name=input_name)
 
 
-def _assert_all_finite(
+def _sklearnex_assert_all_finite(
     X,
     *,
     allow_nan=False,
@@ -122,7 +122,7 @@ def assert_all_finite(
     allow_nan=False,
     input_name="",
 ):
-    _assert_all_finite(
+    _sklearnex_assert_all_finite(
         X.data if sp.issparse(X) else X,
         allow_nan=allow_nan,
         input_name=input_name,
@@ -139,9 +139,7 @@ def validate_data(
     # force finite check to not occur in sklearn, default is True
     # `ensure_all_finite` is the most up-to-date keyword name in sklearn
     # _finite_keyword provides backward compatability for `force_all_finite`
-    ensure_all_finite = (
-        True if "ensure_all_finite" not in kwargs else kwargs.pop("ensure_all_finite")
-    )
+    ensure_all_finite = kwargs.pop("ensure_all_finite", True)
     kwargs[_finite_keyword] = False
     out = _sklearn_validate_data(
         _estimator,
@@ -153,8 +151,8 @@ def validate_data(
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
         arg = iter(out)
-        if X != "no_validation":
+        if not isinstance(X, str) or X != "no_validation":
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
-        if y is not None and y != "no_validation":
+        if not (y is None or isinstance(y, str) and y == "no_validation"):
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y")
     return out

From c20c8cc5891d6b41e5ffb36898617c6d310344b2 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 10:03:43 +0100
Subject: [PATCH 091/131] fix in string

---
 sklearnex/utils/tests/test_finite.py | 4 ++--
 sklearnex/utils/validation.py        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 157b79f6c7..c2dec65e00 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -83,7 +83,7 @@ def test_validate_data_random_location(
         sycl_queue=queue,
     )
 
-    allow_nan = ensure_all_finite == "allow_nan"
+    allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X, ensure_all_finite=ensure_all_finite)
     else:
@@ -116,7 +116,7 @@ def test_validate_data_random_shape_and_location(
         sycl_queue=queue,
     )
 
-    allow_nan = ensure_all_finite == "allow_nan"
+    allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):
         validate_data(est, X)
     else:
diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 996299f37b..10257623a0 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -46,7 +46,7 @@ def _is_contiguous(X):
     return False
 
 
-def _sycl_usm_assert_all_finite(X, xp, *, allow_nan=False, input_name=""):
+def _assert_all_finite(X, xp, *, allow_nan=False, input_name=""):
     # This is a reproduction of code from sklearn.utils.validation necessary for
     # non-contiguous or non-fp32/fp64 dpctl inputs when sklearn version is <1.2 or
     # for non-contiguous or non-fp32/fp64 dpnp inputs, as these cannot be checked
@@ -74,7 +74,7 @@ def _general_assert_all_finite(
         elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
             return
         # handle dpnp inputs
-        _sycl_usm_assert_all_finite(X, xp, allow_nan=allow_nan, input_name=input_name)
+        _assert_all_finite(X, xp, allow_nan=allow_nan, input_name=input_name)
 
 else:
 
@@ -91,7 +91,7 @@ def _general_assert_all_finite(
         elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
             return
         # handle dpctl and dpnp inputs
-        _sycl_usm_assert_all_finite(X, xp, allow_nan, input_name=input_name)
+        _assert_all_finite(X, xp, allow_nan, input_name=input_name)
 
 
 def _sklearnex_assert_all_finite(

From 1ce1b10df9ebd80cf5bf445373ff6e157cf4a207 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 10:57:15 +0100
Subject: [PATCH 092/131] attempt tp see if dataframe conversion is causing the
 issue

---
 sklearnex/utils/tests/test_finite.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index c2dec65e00..e8995fe6d0 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -77,11 +77,12 @@ def test_validate_data_random_location(
         loc = rand.randint(0, X.size - 1)
         X.reshape((-1,))[loc] = float(check)
 
-    X = _convert_to_dataframe(
+    _ = _convert_to_dataframe(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
-    )
+    ) #test to see if convert_to_dataframe is causing problems
+    X = np.atleast_2d(X)
 
     allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):
@@ -110,11 +111,12 @@ def test_validate_data_random_shape_and_location(
         loc = rand.randint(0, X.size - 1)
         X[loc] = float(check)
 
-    X = _convert_to_dataframe(
+    _ = _convert_to_dataframe(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
-    )
+    ) #test to see if convert_to_dataframe is causing problems
+    X = np.atleast_2d(X)
 
     allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):

From 5355039022d9f39c447f39c91ed46d65f4555810 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 13:46:18 +0100
Subject: [PATCH 093/131] fix iter problem

---
 sklearnex/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 10257623a0..acdd21323c 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -41,7 +41,7 @@ def _is_contiguous(X):
     # can then be inspected for strides and this must be updated. _is_contiguous is
     # therefore conservative in verifying attributes and does not support array_api.
     # This will block onedal_assert_all_finite from being used for array_api inputs.
-    if hasattr(X, "flags") and X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]:
+    if hasattr(X, "flags") and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]):
         return True
     return False
 
@@ -150,7 +150,7 @@ def validate_data(
     if ensure_all_finite:
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
-        arg = iter(out)
+        arg = iter(out if isinstance(out, tuple) else (out,))
         if not isinstance(X, str) or X != "no_validation":
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
         if not (y is None or isinstance(y, str) and y == "no_validation"):

From b5b84427f2b8c5d5ce39f34f75076190a36ffd6f Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 14:26:05 +0100
Subject: [PATCH 094/131] fix testing issues

---
 sklearnex/utils/tests/test_finite.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index e8995fe6d0..f20d95a05c 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -44,7 +44,7 @@
 @pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
 def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
     est = DummyEstimator()
-    X = np.array(shape, dtype=dtype)
+    X = np.empty(shape, dtype=dtype)
     X.fill(np.finfo(dtype).max)
     X = np.atleast_2d(X)
     X_array = validate_data(est, X, ensure_all_finite=ensure_all_finite)
@@ -120,7 +120,7 @@ def test_validate_data_random_shape_and_location(
 
     allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):
-        validate_data(est, X)
+        validate_data(est, X, ensure_all_finite=ensure_all_finite)
     else:
         type_err = "infinity" if allow_nan else "NaN, infinity"
         msg_err = f"Input X contains {type_err}."
@@ -129,26 +129,25 @@ def test_validate_data_random_shape_and_location(
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-@pytest.mark.parametrize("array_api_dispatch", [True, False])
+@pytest.mark.parametrize("array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False])
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
     est = DummyEstimator()
     X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
 
     dispatch = {}
-    if sklearn_check_version("1.2"):
+    if array_api_dispatch:
+        pytest.skip(dataframe == "pandas", "pandas inputs do not work with sklearn's array_api_dispatch")
         dispatch["array_api_dispatch"] = array_api_dispatch
 
     with config_context(**dispatch):
-        validate_data(est, X, y)
-        est.fit(X, y)
+        X_out, y_out = validate_data(est, X, y)
+        # check sklearn validate_data operations work underneath
         X_array = validate_data(est, X, reset=False)
-        X_out = est.predict(X)
 
     if dataframe == "pandas" or (
         dataframe == "array_api"
-        and not (sklearn_check_version("1.2") and array_api_dispatch)
-    ):
+        and not array_api_dispatch):
         # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
         assert isinstance(X_array, np.ndarray)
         assert isinstance(X_out, np.ndarray)

From d025c89547d7eb5f21deba665dd42ed173925400 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 14:27:38 +0100
Subject: [PATCH 095/131] formatting

---
 sklearnex/utils/tests/test_finite.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index f20d95a05c..884b3ec6c5 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -81,7 +81,7 @@ def test_validate_data_random_location(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
-    ) #test to see if convert_to_dataframe is causing problems
+    )  # test to see if convert_to_dataframe is causing problems
     X = np.atleast_2d(X)
 
     allow_nan = ensure_all_finite == "allow-nan"
@@ -115,7 +115,7 @@ def test_validate_data_random_shape_and_location(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
-    ) #test to see if convert_to_dataframe is causing problems
+    )  # test to see if convert_to_dataframe is causing problems
     X = np.atleast_2d(X)
 
     allow_nan = ensure_all_finite == "allow-nan"
@@ -129,7 +129,9 @@ def test_validate_data_random_shape_and_location(
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-@pytest.mark.parametrize("array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False])
+@pytest.mark.parametrize(
+    "array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False]
+)
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
 def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
     est = DummyEstimator()
@@ -137,7 +139,10 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
 
     dispatch = {}
     if array_api_dispatch:
-        pytest.skip(dataframe == "pandas", "pandas inputs do not work with sklearn's array_api_dispatch")
+        pytest.skip(
+            dataframe == "pandas",
+            "pandas inputs do not work with sklearn's array_api_dispatch",
+        )
         dispatch["array_api_dispatch"] = array_api_dispatch
 
     with config_context(**dispatch):
@@ -145,9 +150,7 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
         # check sklearn validate_data operations work underneath
         X_array = validate_data(est, X, reset=False)
 
-    if dataframe == "pandas" or (
-        dataframe == "array_api"
-        and not array_api_dispatch):
+    if dataframe == "pandas" or (dataframe == "array_api" and not array_api_dispatch):
         # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
         assert isinstance(X_array, np.ndarray)
         assert isinstance(X_out, np.ndarray)

From 428bfb6f5a0db7df71a546d80e80221afbf8a32b Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 14:31:43 +0100
Subject: [PATCH 096/131] revert change

---
 sklearnex/utils/tests/test_finite.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 884b3ec6c5..6be0f50841 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -77,7 +77,7 @@ def test_validate_data_random_location(
         loc = rand.randint(0, X.size - 1)
         X.reshape((-1,))[loc] = float(check)
 
-    _ = _convert_to_dataframe(
+    X = _convert_to_dataframe(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
@@ -111,7 +111,7 @@ def test_validate_data_random_shape_and_location(
         loc = rand.randint(0, X.size - 1)
         X[loc] = float(check)
 
-    _ = _convert_to_dataframe(
+    X = _convert_to_dataframe(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,

From da2313873bb0db18bbfbe88a4b0756b735cb5533 Mon Sep 17 00:00:00 2001
From: icfaust <icfaust@gmail.com>
Date: Thu, 21 Nov 2024 05:38:41 -0800
Subject: [PATCH 097/131] fixes for pandas

---
 sklearnex/utils/tests/test_finite.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 6be0f50841..637d12b631 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -139,10 +139,8 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
 
     dispatch = {}
     if array_api_dispatch:
-        pytest.skip(
-            dataframe == "pandas",
-            "pandas inputs do not work with sklearn's array_api_dispatch",
-        )
+        if dataframe == "pandas":
+            pytest.skip("pandas inputs do not work with sklearn's array_api_dispatch")
         dispatch["array_api_dispatch"] = array_api_dispatch
 
     with config_context(**dispatch):

From 1d0c330f513acd12af54ef5ca43286bf941585f9 Mon Sep 17 00:00:00 2001
From: icfaust <icfaust@gmail.com>
Date: Thu, 21 Nov 2024 05:42:04 -0800
Subject: [PATCH 098/131] there is a slowdown with pandas that needs to be
 solved

---
 sklearnex/utils/tests/test_finite.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 637d12b631..2ad2341d6f 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -81,8 +81,7 @@ def test_validate_data_random_location(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
-    )  # test to see if convert_to_dataframe is causing problems
-    X = np.atleast_2d(X)
+    )
 
     allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):
@@ -115,8 +114,7 @@ def test_validate_data_random_shape_and_location(
         np.atleast_2d(X),
         target_df=dataframe,
         sycl_queue=queue,
-    )  # test to see if convert_to_dataframe is causing problems
-    X = np.atleast_2d(X)
+    )
 
     allow_nan = ensure_all_finite == "allow-nan"
     if check is None or (allow_nan and check == "NaN"):

From f3f63a6a11955670c3763c8cfd2932a0d4864aa7 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 14:56:33 +0100
Subject: [PATCH 099/131] swap to transpose for speed

---
 sklearnex/utils/tests/test_finite.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 2ad2341d6f..2904ff2bf3 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -77,8 +77,10 @@ def test_validate_data_random_location(
         loc = rand.randint(0, X.size - 1)
         X.reshape((-1,))[loc] = float(check)
 
+    # column heavy pandas inputs are very slow in sklearn's check_array
+    # transpose inputs to guarantee fast processing in tests
     X = _convert_to_dataframe(
-        np.atleast_2d(X),
+        np.atleast_2d(X).T,
         target_df=dataframe,
         sycl_queue=queue,
     )
@@ -111,7 +113,7 @@ def test_validate_data_random_shape_and_location(
         X[loc] = float(check)
 
     X = _convert_to_dataframe(
-        np.atleast_2d(X),
+        np.atleast_2d(X).T,
         target_df=dataframe,
         sycl_queue=queue,
     )

From 56c80545af46e1116f9445300fa4517f14476d32 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 21 Nov 2024 14:58:07 +0100
Subject: [PATCH 100/131] more clarity

---
 sklearnex/utils/tests/test_finite.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py
index 2904ff2bf3..fdaec2e2e4 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_finite.py
@@ -77,8 +77,8 @@ def test_validate_data_random_location(
         loc = rand.randint(0, X.size - 1)
         X.reshape((-1,))[loc] = float(check)
 
-    # column heavy pandas inputs are very slow in sklearn's check_array
-    # transpose inputs to guarantee fast processing in tests
+    # column heavy pandas inputs are very slow in sklearn's check_array even without
+    # the finite check, just transpose inputs to guarantee fast processing in tests
     X = _convert_to_dataframe(
         np.atleast_2d(X).T,
         target_df=dataframe,

From 1580d770ed403475853cf4909438f61d028b1744 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 14:24:05 +0100
Subject: [PATCH 101/131] add _check_sample_weight

---
 sklearnex/utils/validation.py | 120 +++++++++++++++++-----------------
 1 file changed, 59 insertions(+), 61 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index acdd21323c..72876bcae6 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -14,11 +14,16 @@
 # limitations under the License.
 # ===============================================================================
 
+import numbers
+import warnings
+
+import numpy as np
 import scipy.sparse as sp
 from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite
+from sklearn.utils.validation import _num_samples, check_array, check_non_negative
 
 from daal4py.sklearn._utils import sklearn_check_version
-from onedal.utils._array_api import _is_numpy_namespace
+from onedal.utils._array_api import _get_sycl_namespace, _is_numpy_namespace
 from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
 
 from ._array_api import get_namespace
@@ -41,57 +46,7 @@ def _is_contiguous(X):
     # can then be inspected for strides and this must be updated. _is_contiguous is
     # therefore conservative in verifying attributes and does not support array_api.
     # This will block onedal_assert_all_finite from being used for array_api inputs.
-    if hasattr(X, "flags") and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]):
-        return True
-    return False
-
-
-def _assert_all_finite(X, xp, *, allow_nan=False, input_name=""):
-    # This is a reproduction of code from sklearn.utils.validation necessary for
-    # non-contiguous or non-fp32/fp64 dpctl inputs when sklearn version is <1.2 or
-    # for non-contiguous or non-fp32/fp64 dpnp inputs, as these cannot be checked
-    # for finiteness in onedal or by sklearn (while preserving their object type).
-    first_pass_isfinite = xp.isfinite(xp.sum(X))
-    if first_pass_isfinite:
-        return
-
-    has_inf = xp.any(xp.isinf(X))
-    has_nan_error = False if allow_nan else xp.any(xp.isnan(X))
-    if has_inf or has_nan_error:
-        type_err = "infinity" if allow_nan else "NaN, infinity"
-        padded_input_name = input_name + " " if input_name else ""
-        msg_err = f"Input {padded_input_name}contains {type_err}."
-        raise ValueError(msg_err)
-
-
-if sklearn_check_version("1.2"):
-
-    def _general_assert_all_finite(
-        X, xp, is_array_api_compliant, *, allow_nan=False, input_name=""
-    ):
-        if _is_numpy_namespace(xp) or is_array_api_compliant:
-            _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
-        elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
-            return
-        # handle dpnp inputs
-        _assert_all_finite(X, xp, allow_nan=allow_nan, input_name=input_name)
-
-else:
-
-    def _general_assert_all_finite(
-        X, xp, is_array_api_compliant, *, allow_nan=False, input_name=""
-    ):
-
-        if _is_numpy_namespace(xp):
-            _sklearn_assert_all_finite(X, allow_nan, input_name=input_name)
-        elif is_array_api_compliant and not xp.isdtype(
-            X, ("real floating", "complex floating")
-        ):
-            return
-        elif "float" not in xp.dtype.name or "complex" not in xp.dtype.name:
-            return
-        # handle dpctl and dpnp inputs
-        _assert_all_finite(X, xp, allow_nan, input_name=input_name)
+    return hasattr(X, "flags") and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"])
 
 
 def _sklearnex_assert_all_finite(
@@ -102,16 +57,9 @@ def _sklearnex_assert_all_finite(
 ):
     # size check is an initial match to daal4py for performance reasons, can be
     # optimized later
-    xp, is_array_api_compliant = get_namespace(X)
+    xp, _ = get_namespace(X)
     if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X):
-
-        # all sycl_usm_ndarrays for sklearn < 1.2 and dpnp for sklearn > 1.2 are not
-        # handled properly, it calls a separate function for an import-time sklearn
-        # version check before possible hand-off to sklearn's _assert_all_finite or to
-        # _sycl_usm_assert_all_finite.
-        _general_assert_all_finite(
-            X, xp, is_array_api_compliant, allow_nan=allow_nan, input_name=input_name
-        )
+        _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
     else:
         _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
 
@@ -141,6 +89,7 @@ def validate_data(
     # _finite_keyword provides backward compatability for `force_all_finite`
     ensure_all_finite = kwargs.pop("ensure_all_finite", True)
     kwargs[_finite_keyword] = False
+
     out = _sklearn_validate_data(
         _estimator,
         X=X,
@@ -156,3 +105,52 @@ def validate_data(
         if not (y is None or isinstance(y, str) and y == "no_validation"):
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y")
     return out
+
+
+def _check_sample_weight(
+    sample_weight, X, dtype=None, copy=False, only_non_negative=False
+):
+
+    n_samples = _num_samples(X)
+    xp, _ = get_namespace(X)
+
+    if dtype is not None and dtype not in [xp.float32, xp.float64]:
+        dtype = xp.float64
+
+    if sample_weight is None:
+        sample_weight = xp.ones(n_samples, dtype=dtype)
+    elif isinstance(sample_weight, numbers.Number):
+        sample_weight = xp.full(n_samples, sample_weight, dtype=dtype)
+    else:
+        if dtype is None:
+            dtype = [xp.float64, xp.float32]
+
+        # create param dict such that the variable finite_keyword can
+        # be added to it without direct sklearn_check_version maintenance
+        params = {
+            "accept_sparse": False,
+            "ensure_2d": False,
+            "dtype": dtype,
+            "order": "C",
+            "copy": copy,
+            "input_name": "sample_weight",
+            _finite_keyword: False,
+        }
+
+        sample_weight = check_array(sample_weight, **params)
+        assert_all_finite(sample_weight, input_name="sample_weight")
+
+        if sample_weight.ndim != 1:
+            raise ValueError("Sample weights must be 1D array or scalar")
+
+        if sample_weight.shape != (n_samples,):
+            raise ValueError(
+                "sample_weight.shape == {}, expected {}!".format(
+                    sample_weight.shape, (n_samples,)
+                )
+            )
+
+    if only_non_negative:
+        check_non_negative(sample_weight, "`sample_weight`")
+
+    return sample_weight

From ffc9f1f33c361495177e8277f9d6fdda4bcce449 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 15:07:22 +0100
Subject: [PATCH 102/131] add more testing'

---
 .../{test_finite.py => test_validation.py}    | 76 ++++++++++++++++++-
 sklearnex/utils/validation.py                 |  2 +-
 2 files changed, 75 insertions(+), 3 deletions(-)
 rename sklearnex/utils/tests/{test_finite.py => test_validation.py} (66%)

diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_validation.py
similarity index 66%
rename from sklearnex/utils/tests/test_finite.py
rename to sklearnex/utils/tests/test_validation.py
index fdaec2e2e4..31530c4866 100644
--- a/sklearnex/utils/tests/test_finite.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -27,7 +27,7 @@
 )
 from sklearnex import config_context
 from sklearnex.tests.utils import DummyEstimator, gen_dataset
-from sklearnex.utils.validation import validate_data
+from sklearnex.utils.validation import _check_sample_weight, validate_data
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -129,11 +129,83 @@ def test_validate_data_random_shape_and_location(
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize(
     "array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False]
 )
-@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("seed", [0, int(time.time())])
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(
+        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
+    ),
+)
+def test__check_sample_weights_random_shape_and_location(
+    dataframe, queue, dtype, array_api_dispatch, check, seed
+):
+    # This testing assumes that array api inputs to validate_data will only occur
+    # with sklearn array_api support which began in sklearn 1.2. This would assume
+    # that somewhere upstream of the validate_data call, a data conversion of dpnp,
+    # dpctl, or array_api inputs to numpy inputs would have occurred.
+
+    lb, ub = 32768, 1048576  # lb is a patching condition, ub 2^20
+    rand.seed(seed)
+    shape = (rand.randint(lb, ub), 2)
+    X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype)
+    sample_weight = rand.uniform(high=np.finfo(dtype).max, size=shape[0]).astype(dtype)
+
+    if check:
+        loc = rand.randint(0, shape[0] - 1)
+        sample_weight[loc] = float(check)
+
+    X = _convert_to_dataframe(
+        X,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+    sample_weight = _convert_to_dataframe(
+        sample_weight,
+        target_df=dataframe,
+        sycl_queue=queue,
+    )
+
+    dispatch = {}
+    if array_api_dispatch:
+        if dataframe == "pandas":
+            pytest.skip("pandas inputs do not work with sklearn's array_api_dispatch")
+        dispatch["array_api_dispatch"] = array_api_dispatch
+
+    with config_context(**dispatch):
+
+        if check is None:
+            X_out = _check_sample_weight(X, sample_weight)
+            if dataframe == "pandas" or (
+                dataframe == "array_api" and not array_api_dispatch
+            ):
+                assert isinstance(X, np.ndarray)
+            else:
+                assert type(X_out) == type(X)
+        else:
+            msg_err = "Input sample_weight contains NaN, infinity."
+            with pytest.raises(ValueError, match=msg_err):
+                X_out = _check_sample_weight(X, sample_weight)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False]
+)
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(
+        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
+    ),
+)
 def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
+    # This testing assumes that array api inputs to validate_data will only occur
+    # with sklearn array_api support which began in sklearn 1.2. This would assume
+    # that somewhere upstream of the validate_data call, a data conversion of dpnp,
+    # dpctl, or array_api inputs to numpy inputs would have occurred.
     est = DummyEstimator()
     X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
 
diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 72876bcae6..f0ed55d86a 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -125,7 +125,7 @@ def _check_sample_weight(
         if dtype is None:
             dtype = [xp.float64, xp.float32]
 
-        # create param dict such that the variable finite_keyword can
+        # create param dict such that the variable _finite_keyword can
         # be added to it without direct sklearn_check_version maintenance
         params = {
             "accept_sparse": False,

From d184ed044c1bd26b6f38362a6d706331e49714db Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 15:09:29 +0100
Subject: [PATCH 103/131] rename

---
 sklearnex/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 31530c4866..13934acc7c 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -140,7 +140,7 @@ def test_validate_data_random_shape_and_location(
         "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
     ),
 )
-def test__check_sample_weights_random_shape_and_location(
+def test__check_sample_weight_random_shape_and_location(
     dataframe, queue, dtype, array_api_dispatch, check, seed
 ):
     # This testing assumes that array api inputs to validate_data will only occur

From c68616f26b77f39e2dfcc7f502efb5079583070b Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 15:18:58 +0100
Subject: [PATCH 104/131] remove unnecessary imports

---
 sklearnex/utils/validation.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index f0ed55d86a..17a83ea054 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -15,15 +15,12 @@
 # ===============================================================================
 
 import numbers
-import warnings
 
-import numpy as np
 import scipy.sparse as sp
 from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite
 from sklearn.utils.validation import _num_samples, check_array, check_non_negative
 
 from daal4py.sklearn._utils import sklearn_check_version
-from onedal.utils._array_api import _get_sycl_namespace, _is_numpy_namespace
 from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
 
 from ._array_api import get_namespace

From e7ea94e3fea7d28e213bd36a8816499742dfc15f Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 15:42:59 +0100
Subject: [PATCH 105/131] fix test slowness

---
 sklearnex/utils/tests/test_validation.py | 40 +++++++++---------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 13934acc7c..d1976decce 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -130,9 +130,6 @@ def test_validate_data_random_shape_and_location(
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
-@pytest.mark.parametrize(
-    "array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False]
-)
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize(
     "dataframe, queue",
@@ -141,7 +138,7 @@ def test_validate_data_random_shape_and_location(
     ),
 )
 def test__check_sample_weight_random_shape_and_location(
-    dataframe, queue, dtype, array_api_dispatch, check, seed
+    dataframe, queue, dtype, check, seed
 ):
     # This testing assumes that array api inputs to validate_data will only occur
     # with sklearn array_api support which began in sklearn 1.2. This would assume
@@ -170,21 +167,17 @@ def test__check_sample_weight_random_shape_and_location(
     )
 
     dispatch = {}
-    if array_api_dispatch:
-        if dataframe == "pandas":
-            pytest.skip("pandas inputs do not work with sklearn's array_api_dispatch")
-        dispatch["array_api_dispatch"] = array_api_dispatch
+    if dataframe in ["array_api", "dpctl"]:
+        dispatch["array_api_dispatch"] = True
 
     with config_context(**dispatch):
 
         if check is None:
             X_out = _check_sample_weight(X, sample_weight)
-            if dataframe == "pandas" or (
-                dataframe == "array_api" and not array_api_dispatch
-            ):
-                assert isinstance(X, np.ndarray)
-            else:
+            if dispatch:
                 assert type(X_out) == type(X)
+            else:
+                assert isinstance(X, np.ndarray)
         else:
             msg_err = "Input sample_weight contains NaN, infinity."
             with pytest.raises(ValueError, match=msg_err):
@@ -192,16 +185,13 @@ def test__check_sample_weight_random_shape_and_location(
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
-@pytest.mark.parametrize(
-    "array_api_dispatch", [True, False] if sklearn_check_version("1.2") else [False]
-)
 @pytest.mark.parametrize(
     "dataframe, queue",
     get_dataframes_and_queues(
         "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
     ),
 )
-def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
+def test_validate_data_output(dtype, dataframe, queue):
     # This testing assumes that array api inputs to validate_data will only occur
     # with sklearn array_api support which began in sklearn 1.2. This would assume
     # that somewhere upstream of the validate_data call, a data conversion of dpnp,
@@ -210,22 +200,20 @@ def test_validate_data_output(array_api_dispatch, dtype, dataframe, queue):
     X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
 
     dispatch = {}
-    if array_api_dispatch:
-        if dataframe == "pandas":
-            pytest.skip("pandas inputs do not work with sklearn's array_api_dispatch")
-        dispatch["array_api_dispatch"] = array_api_dispatch
+    if dataframe in ["array_api", "dpctl"]:
+        dispatch["array_api_dispatch"] = True
 
     with config_context(**dispatch):
         X_out, y_out = validate_data(est, X, y)
         # check sklearn validate_data operations work underneath
         X_array = validate_data(est, X, reset=False)
 
-    if dataframe == "pandas" or (dataframe == "array_api" and not array_api_dispatch):
-        # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
-        assert isinstance(X_array, np.ndarray)
-        assert isinstance(X_out, np.ndarray)
-    else:
+    if dispatch:
         assert type(X) == type(
             X_array
         ), f"validate_data converted {type(X)} to {type(X_array)}"
         assert type(X) == type(X_out), f"from_array converted {type(X)} to {type(X_out)}"
+    else:
+        # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
+        assert isinstance(X_array, np.ndarray)
+        assert isinstance(X_out, np.ndarray)

From dbe108dd0d9c09bc4ec9801c9dce9a71739e874b Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 15:45:40 +0100
Subject: [PATCH 106/131] focus get_dataframes_and_queues

---
 sklearnex/utils/tests/test_validation.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index d1976decce..64bc18e280 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -65,7 +65,12 @@ def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
 @pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
-@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(
+        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
+    ),
+)
 def test_validate_data_random_location(
     dataframe, queue, dtype, shape, ensure_all_finite, check, seed
 ):
@@ -99,7 +104,12 @@ def test_validate_data_random_location(
 @pytest.mark.parametrize("ensure_all_finite", ["allow-nan", True])
 @pytest.mark.parametrize("check", ["inf", "NaN", None])
 @pytest.mark.parametrize("seed", [0, int(time.time())])
-@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+@pytest.mark.parametrize(
+    "dataframe, queue",
+    get_dataframes_and_queues(
+        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
+    ),
+)
 def test_validate_data_random_shape_and_location(
     dataframe, queue, dtype, ensure_all_finite, check, seed
 ):

From 7284b59910cd839d571a6c586a9b302fcb5d5760 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Fri, 22 Nov 2024 15:50:22 +0100
Subject: [PATCH 107/131] put config_context around

---
 sklearnex/utils/tests/test_validation.py | 44 +++++++++++++++---------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 64bc18e280..3c1978e127 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -90,14 +90,20 @@ def test_validate_data_random_location(
         sycl_queue=queue,
     )
 
-    allow_nan = ensure_all_finite == "allow-nan"
-    if check is None or (allow_nan and check == "NaN"):
-        validate_data(est, X, ensure_all_finite=ensure_all_finite)
-    else:
-        type_err = "infinity" if allow_nan else "NaN, infinity"
-        msg_err = f"Input X contains {type_err}."
-        with pytest.raises(ValueError, match=msg_err):
+    dispatch = {}
+    if sklearn_check_version("1.2") and dataframe != "pandas":
+        dispatch["array_api_dispatch"] = True
+
+    with config_context(**dispatch):
+
+        allow_nan = ensure_all_finite == "allow-nan"
+        if check is None or (allow_nan and check == "NaN"):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
+        else:
+            type_err = "infinity" if allow_nan else "NaN, infinity"
+            msg_err = f"Input X contains {type_err}."
+            with pytest.raises(ValueError, match=msg_err):
+                validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -128,14 +134,20 @@ def test_validate_data_random_shape_and_location(
         sycl_queue=queue,
     )
 
-    allow_nan = ensure_all_finite == "allow-nan"
-    if check is None or (allow_nan and check == "NaN"):
-        validate_data(est, X, ensure_all_finite=ensure_all_finite)
-    else:
-        type_err = "infinity" if allow_nan else "NaN, infinity"
-        msg_err = f"Input X contains {type_err}."
-        with pytest.raises(ValueError, match=msg_err):
+    dispatch = {}
+    if sklearn_check_version("1.2") and dataframe != "pandas":
+        dispatch["array_api_dispatch"] = True
+
+    with config_context(**dispatch):
+
+        allow_nan = ensure_all_finite == "allow-nan"
+        if check is None or (allow_nan and check == "NaN"):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
+        else:
+            type_err = "infinity" if allow_nan else "NaN, infinity"
+            msg_err = f"Input X contains {type_err}."
+            with pytest.raises(ValueError, match=msg_err):
+                validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -177,7 +189,7 @@ def test__check_sample_weight_random_shape_and_location(
     )
 
     dispatch = {}
-    if dataframe in ["array_api", "dpctl"]:
+    if sklearn_check_version("1.2") and dataframe != "pandas":
         dispatch["array_api_dispatch"] = True
 
     with config_context(**dispatch):
@@ -210,7 +222,7 @@ def test_validate_data_output(dtype, dataframe, queue):
     X, y = gen_dataset(est, queue=queue, target_df=dataframe, dtype=dtype)[0]
 
     dispatch = {}
-    if dataframe in ["array_api", "dpctl"]:
+    if sklearn_check_version("1.2") and dataframe != "pandas":
         dispatch["array_api_dispatch"] = True
 
     with config_context(**dispatch):

From e1be91d13c5cef9815ecc9d8a7c3ced8e7386efa Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 24 Nov 2024 14:28:54 +0100
Subject: [PATCH 108/131] Update test_validation.py

---
 sklearnex/utils/tests/test_validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 3c1978e127..dc6117e6d4 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -195,7 +195,7 @@ def test__check_sample_weight_random_shape_and_location(
     with config_context(**dispatch):
 
         if check is None:
-            X_out = _check_sample_weight(X, sample_weight)
+            X_out = _check_sample_weight(sample_weight, X)
             if dispatch:
                 assert type(X_out) == type(X)
             else:
@@ -203,7 +203,7 @@ def test__check_sample_weight_random_shape_and_location(
         else:
             msg_err = "Input sample_weight contains NaN, infinity."
             with pytest.raises(ValueError, match=msg_err):
-                X_out = _check_sample_weight(X, sample_weight)
+                X_out = _check_sample_weight(sample_weight, X)
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])

From 8a0f9e9dd1219d2ad1e514c9fecd9055cdfb0d60 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 24 Nov 2024 15:20:57 +0100
Subject: [PATCH 109/131] Update base.py

---
 sklearnex/tests/utils/base.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index e484423cfc..706de39a91 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -47,7 +47,6 @@
     NearestNeighbors,
 )
 from sklearnex.svm import SVC, NuSVC
-from sklearnex.utils.validation import validate_data
 
 
 def _load_all_models(with_sklearnex=True, estimator=True):
@@ -378,8 +377,6 @@ def _get_processor_info():
 class DummyEstimator(BaseEstimator):
 
     def fit(self, X, y=None):
-        X, y = validate_data(self, X, y)
-
         sua_iface, xp, _ = _get_sycl_namespace(X)
         X_table = to_table(X)
         y_table = to_table(y)

From 52722077467d2844dcce2233f24efb7e5dafd7f4 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Sun, 24 Nov 2024 21:07:29 +0100
Subject: [PATCH 110/131] Update test_validation.py

---
 sklearnex/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index dc6117e6d4..d366a74560 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -199,7 +199,7 @@ def test__check_sample_weight_random_shape_and_location(
             if dispatch:
                 assert type(X_out) == type(X)
             else:
-                assert isinstance(X, np.ndarray)
+                assert isinstance(X_out, np.ndarray)
         else:
             msg_err = "Input sample_weight contains NaN, infinity."
             with pytest.raises(ValueError, match=msg_err):

From 56b5c4c4730de70243cb158e88ddda6ac38bc082 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Mon, 25 Nov 2024 06:46:06 +0100
Subject: [PATCH 111/131] generalize regex

---
 sklearnex/utils/tests/test_validation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index d366a74560..3f7fb0758d 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -100,8 +100,8 @@ def test_validate_data_random_location(
         if check is None or (allow_nan and check == "NaN"):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
         else:
-            type_err = "infinity" if allow_nan else "NaN, infinity"
-            msg_err = f"Input X contains {type_err}."
+            type_err = "infinity" if allow_nan else "[NaN|infinity]"
+            msg_err = f"Input X contains {type_err}"
             with pytest.raises(ValueError, match=msg_err):
                 validate_data(est, X, ensure_all_finite=ensure_all_finite)
 
@@ -144,7 +144,7 @@ def test_validate_data_random_shape_and_location(
         if check is None or (allow_nan and check == "NaN"):
             validate_data(est, X, ensure_all_finite=ensure_all_finite)
         else:
-            type_err = "infinity" if allow_nan else "NaN, infinity"
+            type_err = "infinity" if allow_nan else "[NaN|infinity]"
             msg_err = f"Input X contains {type_err}."
             with pytest.raises(ValueError, match=msg_err):
                 validate_data(est, X, ensure_all_finite=ensure_all_finite)
@@ -201,7 +201,7 @@ def test__check_sample_weight_random_shape_and_location(
             else:
                 assert isinstance(X_out, np.ndarray)
         else:
-            msg_err = "Input sample_weight contains NaN, infinity."
+            msg_err = "Input sample_weight contains [NaN|infinity]"
             with pytest.raises(ValueError, match=msg_err):
                 X_out = _check_sample_weight(sample_weight, X)
 

From 0d1b30607d0fa12c93eb7eeaf9c1b818cee44467 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Mon, 25 Nov 2024 10:29:42 +0100
Subject: [PATCH 112/131] add fixes for sklearn 1.0 and input_name

---
 sklearnex/utils/validation.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 17a83ea054..76470091ce 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -56,7 +56,10 @@ def _sklearnex_assert_all_finite(
     # optimized later
     xp, _ = get_namespace(X)
     if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X):
-        _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
+        if sklearn_check_version("1.1"):
+            _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
+        else:
+            _sklearn_assert_all_finite(X, allow_nan=allow_nan)
     else:
         _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
 
@@ -122,17 +125,16 @@ def _check_sample_weight(
         if dtype is None:
             dtype = [xp.float64, xp.float32]
 
-        # create param dict such that the variable _finite_keyword can
-        # be added to it without direct sklearn_check_version maintenance
         params = {
             "accept_sparse": False,
             "ensure_2d": False,
             "dtype": dtype,
             "order": "C",
             "copy": copy,
-            "input_name": "sample_weight",
             _finite_keyword: False,
         }
+        if sklearn_check_version("1.1"):
+            params["input_name"] = "sample_weight"
 
         sample_weight = check_array(sample_weight, **params)
         assert_all_finite(sample_weight, input_name="sample_weight")

From 8ff312eecb289815b8e5ff65c558a39dadb1a72d Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Mon, 25 Nov 2024 10:35:24 +0100
Subject: [PATCH 113/131] fixes for test failures

---
 sklearnex/utils/tests/test_validation.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 3f7fb0758d..92ba0d742a 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -29,6 +29,13 @@
 from sklearnex.tests.utils import DummyEstimator, gen_dataset
 from sklearnex.utils.validation import _check_sample_weight, validate_data
 
+# array_api support starts in sklearn 1.2, and array_api_strict conformance starts in sklearn 1.3
+_dataframes_supported = (
+    "numpy,pandas"
+    + (",dpctl" if sklearn_check_version("1.2") else "")
+    + (",array_api" if sklearn_check_version("1.3") else "")
+)
+
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 @pytest.mark.parametrize(
@@ -67,9 +74,7 @@ def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize(
     "dataframe, queue",
-    get_dataframes_and_queues(
-        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
-    ),
+    get_dataframes_and_queues(_dataframes_supported),
 )
 def test_validate_data_random_location(
     dataframe, queue, dtype, shape, ensure_all_finite, check, seed
@@ -112,9 +117,7 @@ def test_validate_data_random_location(
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize(
     "dataframe, queue",
-    get_dataframes_and_queues(
-        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
-    ),
+    get_dataframes_and_queues(_dataframes_supported),
 )
 def test_validate_data_random_shape_and_location(
     dataframe, queue, dtype, ensure_all_finite, check, seed
@@ -155,9 +158,7 @@ def test_validate_data_random_shape_and_location(
 @pytest.mark.parametrize("seed", [0, int(time.time())])
 @pytest.mark.parametrize(
     "dataframe, queue",
-    get_dataframes_and_queues(
-        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
-    ),
+    get_dataframes_and_queues(_dataframes_supported),
 )
 def test__check_sample_weight_random_shape_and_location(
     dataframe, queue, dtype, check, seed
@@ -209,9 +210,7 @@ def test__check_sample_weight_random_shape_and_location(
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 @pytest.mark.parametrize(
     "dataframe, queue",
-    get_dataframes_and_queues(
-        "numpy,pandas" + ("dpctl,array_api" if sklearn_check_version("1.2") else "")
-    ),
+    get_dataframes_and_queues(_dataframes_supported),
 )
 def test_validate_data_output(dtype, dataframe, queue):
     # This testing assumes that array api inputs to validate_data will only occur

From 87b7e3b461c431d07da1114c15ea8e9ca3c9c4b9 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Mon, 25 Nov 2024 21:42:18 +0100
Subject: [PATCH 114/131] Update validation.py

---
 onedal/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py
index 836dd84a75..38dcfd3fb3 100644
--- a/onedal/utils/validation.py
+++ b/onedal/utils/validation.py
@@ -449,7 +449,7 @@ def _assert_all_finite(X, allow_nan=False, input_name=""):
     policy = _get_policy(None, X)
     X_t = to_table(_convert_to_supported(policy, X))
     params = {
-        "fptype": "float" if X_t.dtype == np.float32 else "double",
+        "fptype": X_t.dtype,
         "method": "dense",
         "allow_nan": allow_nan,
     }

From 29e8f8c1a34aad809695d86d55bb197bb6e3fae1 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Mon, 25 Nov 2024 21:42:56 +0100
Subject: [PATCH 115/131] Update test_validation.py

---
 onedal/utils/tests/test_validation.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index 7662f486f3..37486f0337 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -137,9 +137,8 @@ def test_assert_finite_sparse(dtype, allow_nan, check, seed):
     )
 
     if check:
-        locx = rand.randint(0, X.shape[0] - 1)
-        locy = rand.randint(0, X.shape[1] - 1)
-        X[locx, locy] = float(check)
+        locx = rand.randint(0, X.data.shape[0] - 1)
+        X.data[locx] = float(check)
 
     if check is None or (allow_nan and check == "NaN"):
         assert_all_finite(X, allow_nan=allow_nan)

From 27ce5fc64fa75c2bcde2eac87f7fec7899cf2416 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 27 Nov 2024 11:49:57 +0100
Subject: [PATCH 116/131] Update validation.py

---
 sklearnex/utils/validation.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 76470091ce..479c0b300d 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -20,8 +20,7 @@
 from sklearn.utils.validation import _assert_all_finite as _sklearn_assert_all_finite
 from sklearn.utils.validation import _num_samples, check_array, check_non_negative
 
-from daal4py.sklearn._utils import sklearn_check_version
-from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+from daal4py.sklearn._utils import daal_check_version, sklearn_check_version
 
 from ._array_api import get_namespace
 
@@ -37,13 +36,26 @@
     _finite_keyword = "force_all_finite"
 
 
-def _is_contiguous(X):
-    # array_api does not have a `strides` or `flags` attribute for testing memory
-    # order. When dlpack support is brought in for oneDAL, the dlpack python capsule
-    # can then be inspected for strides and this must be updated. _is_contiguous is
-    # therefore conservative in verifying attributes and does not support array_api.
-    # This will block onedal_assert_all_finite from being used for array_api inputs.
-    return hasattr(X, "flags") and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"])
+if daal_check_version(2024, "P", 700):
+    from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+    
+    def _onedal_supported_format(X, xp=None):
+        # array_api does not have a `strides` or `flags` attribute for testing memory
+        # order. When dlpack support is brought in for oneDAL, general support for
+        # array_api can be enabled and the hasattr check can be removed.
+        # _onedal_supported_format is therefore conservative in verifying attributes and
+        # does not support array_api. This will block onedal_assert_all_finite from being
+        # used for array_api inputs but will allow dpnp ndarrays and dpctl tensors.
+        return X.dtype in [xp.float32, xp.float64] and hasattr(X, "flags")
+
+else:
+    from daal4py.utils.validation import _assert_all_finite as _onedal_assert_all_finite
+    from onedal.utils._array_api import _is_numpy_namespace
+    
+    def _onedal_supported_format(X, xp=None):
+        # daal4py _assert_all_finite only supports numpy namespaces, use internally-
+        # defined check to validate inputs, otherwise offload to sklearn
+        return X.dtype in [xp.float32, xp.float64] and _is_numpy_namespace(xp)
 
 
 def _sklearnex_assert_all_finite(
@@ -55,7 +67,7 @@ def _sklearnex_assert_all_finite(
     # size check is an initial match to daal4py for performance reasons, can be
     # optimized later
     xp, _ = get_namespace(X)
-    if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X):
+    if X.size < 32768 or not _onedal_supported_format(X, xp):
         if sklearn_check_version("1.1"):
             _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name)
         else:

From 5d31988df229e45c116bfbb8a0c21db0ee3bbc32 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 27 Nov 2024 11:50:39 +0100
Subject: [PATCH 117/131] formattintg

---
 sklearnex/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 479c0b300d..19f8fed17a 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -38,7 +38,7 @@
 
 if daal_check_version(2024, "P", 700):
     from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
-    
+
     def _onedal_supported_format(X, xp=None):
         # array_api does not have a `strides` or `flags` attribute for testing memory
         # order. When dlpack support is brought in for oneDAL, general support for
@@ -51,7 +51,7 @@ def _onedal_supported_format(X, xp=None):
 else:
     from daal4py.utils.validation import _assert_all_finite as _onedal_assert_all_finite
     from onedal.utils._array_api import _is_numpy_namespace
-    
+
     def _onedal_supported_format(X, xp=None):
         # daal4py _assert_all_finite only supports numpy namespaces, use internally-
         # defined check to validate inputs, otherwise offload to sklearn

From c4dccd61076198dd8d225071c7ada649b6223685 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 27 Nov 2024 11:55:47 +0100
Subject: [PATCH 118/131] make suggested changes

---
 sklearnex/utils/tests/test_validation.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 92ba0d742a..70da28dbce 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -42,9 +42,7 @@
     "shape",
     [
         [16, 2048],
-        [
-            2**16 + 3,
-        ],
+        [2**16 + 3],
         [1000, 1000],
     ],
 )
@@ -63,9 +61,7 @@ def test_sum_infinite_actually_finite(dtype, shape, ensure_all_finite):
     "shape",
     [
         [16, 2048],
-        [
-            2**16 + 3,
-        ],
+        [2**16 + 3],
         [1000, 1000],
     ],
 )

From f83f1ef1a3217a1997434c09fbe8efc6777ecdc8 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 27 Nov 2024 11:59:43 +0100
Subject: [PATCH 119/131] follow changes made in #2126

---
 onedal/utils/tests/test_validation.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py
index 37486f0337..31eb8da2cc 100644
--- a/onedal/utils/tests/test_validation.py
+++ b/onedal/utils/tests/test_validation.py
@@ -33,9 +33,7 @@
     "shape",
     [
         [16, 2048],
-        [
-            2**16 + 3,
-        ],
+        [65539],  # 2**16 + 3,
         [1000, 1000],
         [
             3,
@@ -58,9 +56,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue)
     "shape",
     [
         [16, 2048],
-        [
-            2**16 + 3,
-        ],
+        [65539],  # 2**16 + 3,
         [1000, 1000],
         [
             3,

From e43c047cb109bb85eb822dd974f80b452b756230 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 27 Nov 2024 12:09:44 +0100
Subject: [PATCH 120/131] fix future device problem

---
 sklearnex/utils/validation.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 19f8fed17a..80edd2ec57 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -130,9 +130,17 @@ def _check_sample_weight(
         dtype = xp.float64
 
     if sample_weight is None:
-        sample_weight = xp.ones(n_samples, dtype=dtype)
+        if hasattr(X, "device"):
+            sample_weight = xp.ones(n_samples, dtype=dtype, device=X.device)
+        else:
+            sample_weight = xp.ones(n_samples, dtype=dtype)
     elif isinstance(sample_weight, numbers.Number):
-        sample_weight = xp.full(n_samples, sample_weight, dtype=dtype)
+        if hasattr(X, "device"):
+            sample_weight = xp.full(
+                n_samples, sample_weight, dtype=dtype, device=X.device
+            )
+        else:
+            sample_weight = xp.full(n_samples, sample_weight, dtype=dtype)
     else:
         if dtype is None:
             dtype = [xp.float64, xp.float32]

From 5c81f9df84f6ca603f8a16516cb6391ed9be2684 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 27 Nov 2024 17:55:17 +0100
Subject: [PATCH 121/131] Update validation.py

---
 sklearnex/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 80edd2ec57..c2ba2c1dc5 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -36,7 +36,7 @@
     _finite_keyword = "force_all_finite"
 
 
-if daal_check_version(2024, "P", 700):
+if daal_check_version((2024, "P", 700)):
     from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
 
     def _onedal_supported_format(X, xp=None):

From 164435de60077f573f19244dcb682b36b4f6b513 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Wed, 4 Dec 2024 21:54:14 +0100
Subject: [PATCH 122/131] minor changes based on #2206, suggestions

---
 sklearnex/utils/tests/test_validation.py |  2 +-
 sklearnex/utils/validation.py            | 35 ++++++++++++++++++++++--
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 70da28dbce..c770abd495 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -1,5 +1,5 @@
 # ==============================================================================
-# Copyright 2024 Intel Corporation
+# Copyright 2024 UXL Foundation Contributors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index c2ba2c1dc5..4e908a31ce 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -46,7 +46,13 @@ def _onedal_supported_format(X, xp=None):
         # _onedal_supported_format is therefore conservative in verifying attributes and
         # does not support array_api. This will block onedal_assert_all_finite from being
         # used for array_api inputs but will allow dpnp ndarrays and dpctl tensors.
-        return X.dtype in [xp.float32, xp.float64] and hasattr(X, "flags")
+        # only check contiguous arrays to prevent unnecessary copying of data, even if
+        # non-contiguous arrays can now be converted to oneDAL tables.
+        return (
+            X.dtype in [xp.float32, xp.float64]
+            and hasattr(X, "flags")
+            and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"])
+        )
 
 else:
     from daal4py.utils.validation import _assert_all_finite as _onedal_assert_all_finite
@@ -108,14 +114,37 @@ def validate_data(
         y=y,
         **kwargs,
     )
+
+    check_x = not isinstance(X, str) or X != "no_validation"
+    check_y = not (y is None or isinstance(y, str) and y == "no_validation")
+
     if ensure_all_finite:
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
         arg = iter(out if isinstance(out, tuple) else (out,))
-        if not isinstance(X, str) or X != "no_validation":
+        if check_x:
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
-        if not (y is None or isinstance(y, str) and y == "no_validation"):
+        if check_y:
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y")
+
+    if check_y and "dtype" in kwargs:
+        # validate_data does not do full dtype conversions, as it uses check_X_y
+        # oneDAL can make tables from [int32, float32, float64], requiring
+        # a dtype check and conversion. This will query the array_namespace and
+        # convert y as necessary. This is done after assert_all_finite, because
+        # int y arrays do not need to finite check, and this will lead to a speedup
+        # in comparison to sklearn
+        dtype = kwargs["dtype"]
+        if not isinstance(dtype, (tuple, list)):
+            dtype = tuple(dtype)
+
+        outx, outy = out if check_x else (None, out)
+        if outy.dtype not in dtype:
+            yp, _ = get_namespace(outy)
+            # use asarray rather than astype because of numpy support
+            outy = yp.asarray(outy, dtype=dtype[0])
+            out = (outx, outy) if check_x else outy
+
     return out
 
 

From 4aff9e025f23137e877739df0bf3fc4cba3a65c2 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 5 Dec 2024 10:05:36 +0100
Subject: [PATCH 123/131] remove xp as keyword

---
 sklearnex/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 4e908a31ce..cfc5106aab 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -39,7 +39,7 @@
 if daal_check_version((2024, "P", 700)):
     from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite
 
-    def _onedal_supported_format(X, xp=None):
+    def _onedal_supported_format(X, xp):
         # array_api does not have a `strides` or `flags` attribute for testing memory
         # order. When dlpack support is brought in for oneDAL, general support for
         # array_api can be enabled and the hasattr check can be removed.
@@ -58,7 +58,7 @@ def _onedal_supported_format(X, xp=None):
     from daal4py.utils.validation import _assert_all_finite as _onedal_assert_all_finite
     from onedal.utils._array_api import _is_numpy_namespace
 
-    def _onedal_supported_format(X, xp=None):
+    def _onedal_supported_format(X, xp):
         # daal4py _assert_all_finite only supports numpy namespaces, use internally-
         # defined check to validate inputs, otherwise offload to sklearn
         return X.dtype in [xp.float32, xp.float64] and _is_numpy_namespace(xp)

From db11608bd83ff0bb657fca137949fe3cc5a2af94 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 5 Dec 2024 10:15:17 +0100
Subject: [PATCH 124/131] only_non_negative -> ensure_non_negative

---
 sklearnex/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index cfc5106aab..3361ae0c39 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -149,7 +149,7 @@ def validate_data(
 
 
 def _check_sample_weight(
-    sample_weight, X, dtype=None, copy=False, only_non_negative=False
+    sample_weight, X, dtype=None, copy=False, ensure_non_negative=False
 ):
 
     n_samples = _num_samples(X)
@@ -198,7 +198,7 @@ def _check_sample_weight(
                 )
             )
 
-    if only_non_negative:
+    if ensure_non_negative:
         check_non_negative(sample_weight, "`sample_weight`")
 
     return sample_weight

From bf62e50b0d84db076a408523f1d4f229746544db Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 5 Dec 2024 10:42:58 +0100
Subject: [PATCH 125/131] add commentary

---
 sklearnex/utils/validation.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 3361ae0c39..298a1c2eda 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -121,6 +121,11 @@ def validate_data(
     if ensure_all_finite:
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
+        # the return object from validate_data can (annoyingly) be a single 
+        # element (either x or y) or both (as a tuple). An iterator along with
+        # check_x and check_y can go through the output properly without
+        # stacking layers of if statements to make sure the proper input_name
+        # is used
         arg = iter(out if isinstance(out, tuple) else (out,))
         if check_x:
             assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X")
@@ -131,9 +136,7 @@ def validate_data(
         # validate_data does not do full dtype conversions, as it uses check_X_y
         # oneDAL can make tables from [int32, float32, float64], requiring
         # a dtype check and conversion. This will query the array_namespace and
-        # convert y as necessary. This is done after assert_all_finite, because
-        # int y arrays do not need to finite check, and this will lead to a speedup
-        # in comparison to sklearn
+        # convert y as necessary. This is important especially for regressors.
         dtype = kwargs["dtype"]
         if not isinstance(dtype, (tuple, list)):
             dtype = tuple(dtype)

From 993a27207cea1722851406fd214a8f1eb0bcba77 Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 5 Dec 2024 10:43:12 +0100
Subject: [PATCH 126/131] formatting

---
 sklearnex/utils/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index 298a1c2eda..b5c8b89251 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -121,7 +121,7 @@ def validate_data(
     if ensure_all_finite:
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
-        # the return object from validate_data can (annoyingly) be a single 
+        # the return object from validate_data can (annoyingly) be a single
         # element (either x or y) or both (as a tuple). An iterator along with
         # check_x and check_y can go through the output properly without
         # stacking layers of if statements to make sure the proper input_name

From c034883d96b1d2bfa369e3f7a1f84d2aca5fb81d Mon Sep 17 00:00:00 2001
From: "Faust, Ian" <ian.faust@intel.com>
Date: Thu, 5 Dec 2024 11:16:32 +0100
Subject: [PATCH 127/131] address changes

---
 sklearnex/tests/utils/base.py            |  5 ++++-
 sklearnex/utils/tests/test_validation.py | 22 +++++++++++++---------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 706de39a91..1fd5b25e92 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -388,7 +388,10 @@ def fit(self, X, y=None):
                 X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
             )
             self.y_attr_ = from_table(
-                y_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp
+                y_table,
+                sua_iface=sua_iface,
+                sycl_queue=y.sycl_queue if y else X.sycl_queue,
+                xp=xp,
             )
         else:
             self.x_attr = from_table(X_table)
diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index c770abd495..8b6391958d 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -225,12 +225,16 @@ def test_validate_data_output(dtype, dataframe, queue):
         # check sklearn validate_data operations work underneath
         X_array = validate_data(est, X, reset=False)
 
-    if dispatch:
-        assert type(X) == type(
-            X_array
-        ), f"validate_data converted {type(X)} to {type(X_array)}"
-        assert type(X) == type(X_out), f"from_array converted {type(X)} to {type(X_out)}"
-    else:
-        # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
-        assert isinstance(X_array, np.ndarray)
-        assert isinstance(X_out, np.ndarray)
+    for orig, first, second in ((X, X_out, X_array), (y, y_out, None)):
+        if dispatch:
+            assert type(orig) == type(
+                first
+            ), f"validate_data converted {type(orig)} to {type(first)}"
+            if second:
+                assert type(orig) == type(
+                    second
+                ), f"from_array converted {type(orig)} to {type(second)}"
+        else:
+            # array_api_strict from sklearn < 1.2 and pandas will convert to numpy arrays
+            assert isinstance(first, np.ndarray)
+            assert second is None or isinstance(second, np.ndarray)

From e5c9b8bd22f93272c73290d5a852c19279f51fa5 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Thu, 5 Dec 2024 13:59:03 +0100
Subject: [PATCH 128/131] Update test_validation.py

---
 sklearnex/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index 8b6391958d..aa0f9d6894 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -230,7 +230,7 @@ def test_validate_data_output(dtype, dataframe, queue):
             assert type(orig) == type(
                 first
             ), f"validate_data converted {type(orig)} to {type(first)}"
-            if second:
+            if second is not None:
                 assert type(orig) == type(
                     second
                 ), f"from_array converted {type(orig)} to {type(second)}"

From c55843bc755e9df994fa7ad57280a5f48e2e146b Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Thu, 5 Dec 2024 16:03:34 +0100
Subject: [PATCH 129/131] Update base.py

---
 sklearnex/tests/utils/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
index 1fd5b25e92..33d3804b8f 100755
--- a/sklearnex/tests/utils/base.py
+++ b/sklearnex/tests/utils/base.py
@@ -390,7 +390,7 @@ def fit(self, X, y=None):
             self.y_attr_ = from_table(
                 y_table,
                 sua_iface=sua_iface,
-                sycl_queue=y.sycl_queue if y else X.sycl_queue,
+                sycl_queue=X.sycl_queue if y is None else y.sycl_queue,
                 xp=xp,
             )
         else:

From ac6d8317f857fb3eaac8dac4ed29d93024cc9b78 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Thu, 5 Dec 2024 19:26:05 +0100
Subject: [PATCH 130/131] Update test_validation.py

---
 sklearnex/utils/tests/test_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearnex/utils/tests/test_validation.py b/sklearnex/utils/tests/test_validation.py
index aa0f9d6894..37d0a6df6e 100644
--- a/sklearnex/utils/tests/test_validation.py
+++ b/sklearnex/utils/tests/test_validation.py
@@ -1,5 +1,5 @@
 # ==============================================================================
-# Copyright 2024 UXL Foundation Contributors
+# Copyright contributors to the oneDAL project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 1305ca1dac41c02aad3abc933c3f63b4a36fe81a Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Mon, 9 Dec 2024 14:25:28 +0100
Subject: [PATCH 131/131] Update sklearnex/utils/validation.py

Co-authored-by: ethanglaser <42726565+ethanglaser@users.noreply.github.com>
---
 sklearnex/utils/validation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py
index b5c8b89251..4d12602d74 100755
--- a/sklearnex/utils/validation.py
+++ b/sklearnex/utils/validation.py
@@ -121,7 +121,8 @@ def validate_data(
     if ensure_all_finite:
         # run local finite check
         allow_nan = ensure_all_finite == "allow-nan"
-        # the return object from validate_data can (annoyingly) be a single
+        # the return object from validate_data can be a single
+
         # element (either x or y) or both (as a tuple). An iterator along with
         # check_x and check_y can go through the output properly without
         # stacking layers of if statements to make sure the proper input_name