From 32fe2691f7eea7d2d2ed3bf3460965450f2ba256 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Wed, 23 Oct 2024 13:02:21 +0200 Subject: [PATCH 01/68] add finiteness_checker pybind11 bindings --- onedal/dal.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/onedal/dal.cpp b/onedal/dal.cpp index 814b22aa8b..14e0aed35d 100644 --- a/onedal/dal.cpp +++ b/onedal/dal.cpp @@ -75,6 +75,9 @@ namespace oneapi::dal::python { #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 ONEDAL_PY_INIT_MODULE(logistic_regression); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 + #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 + ONEDAL_PY_INIT_MODULE(finiteness_checker); + #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 #endif // ONEDAL_DATA_PARALLEL_SPMD #ifdef ONEDAL_DATA_PARALLEL_SPMD @@ -133,6 +136,9 @@ namespace oneapi::dal::python { #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 init_logistic_regression(m); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240001 + #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 + init_finiteness_checker(m); + #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240700 } #endif // ONEDAL_DATA_PARALLEL_SPMD From cdbf1b5e5bfdc8036beee80545ea11e553ceac99 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Wed, 23 Oct 2024 13:04:00 +0200 Subject: [PATCH 02/68] added finiteness checker --- onedal/primitives/finiteness_checker.cpp | 96 ++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 onedal/primitives/finiteness_checker.cpp diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp new file mode 100644 index 0000000000..6aaf7c52d6 --- /dev/null +++ b/onedal/primitives/finiteness_checker.cpp @@ -0,0 +1,96 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/finiteness_checker.hpp" + +#include "onedal/common.hpp" +#include "onedal/version.hpp" + +namespace py = pybind11; + +namespace oneapi::dal::python { + +template +struct method2t { + method2t(const Task& task, const Ops& ops) : ops(ops) {} + + template + auto operator()(const py::dict& params) { + using namespace finiteness_checker; + + const auto method = params["method"].cast(); + + ONEDAL_PARAM_DISPATCH_VALUE(method, "dense", ops, Float, method::dense); + ONEDAL_PARAM_DISPATCH_VALUE(method, "by_default", ops, Float, method::by_default); + ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(method); + } + + Ops ops; +}; + +struct params2desc { + template + auto operator()(const pybind11::dict& params) { + using namespace dal::finiteness_checker; + + auto desc = descriptor(); + desc.set_allow_NaN(params["allow_nan"].cast()); + return desc; + } +}; + +template +void init_compute_ops(py::module_& m) { + m.def("compute", + [](const Policy& policy, + const py::dict& params, + const table& data) { + using namespace finiteness_checker; + using input_t = compute_input; + + compute_ops ops(policy, input_t{ data}, params2desc{}); + return fptype2t{ method2t{ Task{}, ops } }(params); + }); +} + +template +void init_compute_result(py::module_& m) { + using namespace finiteness_checker; + using result_t = compute_result; + + py::class_(m, "compute_result") + .def(py::init()) + .DEF_ONEDAL_PY_PROPERTY(finite, result_t) +} + +ONEDAL_PY_TYPE2STR(finiteness_checker::task::compute, "compute"); + +ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_ops); +ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result); + +ONEDAL_PY_INIT_MODULE(finiteness_checker) { + using namespace dal::detail; + using namespace finiteness_checker; + using namespace dal::finiteness; + + using task_list = types; + auto sub = m.def_submodule("finiteness_checker"); + + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); +} + +} // namespace oneapi::dal::python From 62674a24547cf4f7771efbd48657666ed41a97fe Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 13:37:53 +0200 Subject: [PATCH 03/68] Update finiteness_checker.cpp --- onedal/primitives/finiteness_checker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp index 6aaf7c52d6..51a3ef161a 100644 --- a/onedal/primitives/finiteness_checker.cpp +++ b/onedal/primitives/finiteness_checker.cpp @@ -14,7 +14,7 @@ * limitations under the License. *******************************************************************************/ -#include "oneapi/dal/algo/finiteness_checker.hpp" +#include "oneapi/dal/algo/finiteness_checker/compute.hpp" #include "onedal/common.hpp" #include "onedal/version.hpp" From c75c23b34e714ac22eace32d4a44ae5699286262 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 13:46:49 +0200 Subject: [PATCH 04/68] Update finiteness_checker.cpp --- onedal/primitives/finiteness_checker.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp index 51a3ef161a..761ee28de9 100644 --- a/onedal/primitives/finiteness_checker.cpp +++ b/onedal/primitives/finiteness_checker.cpp @@ -14,7 +14,12 @@ * limitations under the License. *******************************************************************************/ +// fix error with missing headers +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 +#include "oneapi/dal/algo/finiteness_checker.hpp +#else #include "oneapi/dal/algo/finiteness_checker/compute.hpp" +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 #include "onedal/common.hpp" #include "onedal/version.hpp" From 6a20938aba804e69b09bf5d15c12f3128982df7d Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 13:47:36 +0200 Subject: [PATCH 05/68] Update finiteness_checker.cpp --- onedal/primitives/finiteness_checker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp index 761ee28de9..531554f857 100644 --- a/onedal/primitives/finiteness_checker.cpp +++ b/onedal/primitives/finiteness_checker.cpp @@ -16,9 +16,9 @@ // fix error with missing headers #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 -#include "oneapi/dal/algo/finiteness_checker.hpp + #include "oneapi/dal/algo/finiteness_checker.hpp #else -#include "oneapi/dal/algo/finiteness_checker/compute.hpp" + #include "oneapi/dal/algo/finiteness_checker/compute.hpp" #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 #include "onedal/common.hpp" From 382d7a1268a4612f6eec162a30c02b18bcc0e041 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 13:47:47 +0200 Subject: [PATCH 06/68] Update finiteness_checker.cpp --- onedal/primitives/finiteness_checker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp index 531554f857..ebc7bfd798 100644 --- a/onedal/primitives/finiteness_checker.cpp +++ b/onedal/primitives/finiteness_checker.cpp @@ -16,7 +16,7 @@ // fix error with missing headers #if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 - #include "oneapi/dal/algo/finiteness_checker.hpp + #include "oneapi/dal/algo/finiteness_checker.hpp" #else #include "oneapi/dal/algo/finiteness_checker/compute.hpp" #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250200 From c8ffd9c0c2c9a132449020fa2ffc492b7c9bd1fb Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 13:54:20 +0200 Subject: [PATCH 07/68] Update finiteness_checker.cpp --- onedal/primitives/finiteness_checker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp index ebc7bfd798..92a17a875d 100644 --- a/onedal/primitives/finiteness_checker.cpp +++ b/onedal/primitives/finiteness_checker.cpp @@ -52,7 +52,7 @@ struct params2desc { using namespace dal::finiteness_checker; auto desc = descriptor(); - desc.set_allow_NaN(params["allow_nan"].cast()); + desc.set_allow_NaN(params["allow_nan"].cast()); return desc; } }; From 9aa13d5e72340509c33986befce7ff5f3169a325 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 13:58:13 +0200 Subject: [PATCH 08/68] Update finiteness_checker.cpp --- onedal/primitives/finiteness_checker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp index 92a17a875d..7189aec5d9 100644 --- a/onedal/primitives/finiteness_checker.cpp +++ b/onedal/primitives/finiteness_checker.cpp @@ -78,7 +78,7 @@ void init_compute_result(py::module_& m) { py::class_(m, "compute_result") .def(py::init()) - .DEF_ONEDAL_PY_PROPERTY(finite, result_t) + .DEF_ONEDAL_PY_PROPERTY(finite, result_t); } ONEDAL_PY_TYPE2STR(finiteness_checker::task::compute, "compute"); @@ -89,7 +89,7 @@ ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result); ONEDAL_PY_INIT_MODULE(finiteness_checker) { using namespace dal::detail; using namespace finiteness_checker; - using namespace dal::finiteness; + using namespace dal::finiteness_checker; using task_list = types; auto sub = m.def_submodule("finiteness_checker"); From 84e15d598392ebf5da945468cd1cf110a25d3764 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 23 Oct 2024 14:21:02 +0200 Subject: [PATCH 09/68] Rename finiteness_checker.cpp to finiteness_checker.cpp --- onedal/{primitives => utils}/finiteness_checker.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename onedal/{primitives => utils}/finiteness_checker.cpp (100%) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp similarity index 100% rename from onedal/primitives/finiteness_checker.cpp rename to onedal/utils/finiteness_checker.cpp From 63073c60d17c192781e30db5425eeee4832761d9 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 24 Oct 2024 10:58:08 +0200 Subject: [PATCH 10/68] Update finiteness_checker.cpp --- onedal/utils/finiteness_checker.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp index 7189aec5d9..6bc6a2e66b 100644 --- a/onedal/utils/finiteness_checker.cpp +++ b/onedal/utils/finiteness_checker.cpp @@ -94,8 +94,10 @@ ONEDAL_PY_INIT_MODULE(finiteness_checker) { using task_list = types; auto sub = m.def_submodule("finiteness_checker"); - ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); - ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); + #ifndef ONEDAL_DATA_PARALLEL_SPMD + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); + ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); + #endif } } // namespace oneapi::dal::python From 3dddf2dc3469f197c7e539c73f407670173c9864 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 1 Nov 2024 00:30:15 +0100 Subject: [PATCH 11/68] add next step --- onedal/utils/validation.py | 41 +++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index bde2390e80..eb313cd980 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -20,6 +20,10 @@ import numpy as np from scipy import sparse as sp +from onedal import _backend +from ..common._policy import _get_policy +from ..datatypes import _convert_to_supported, to_table + if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"): # numpy_version >= 2.0 @@ -31,7 +35,9 @@ from sklearn.preprocessing import LabelEncoder from sklearn.utils.validation import check_array -from daal4py.sklearn.utils.validation import _assert_all_finite +from daal4py.sklearn.utils.validation import ( + _assert_all_finite as _daal4py_assert_all_finite, +) class DataConversionWarning(UserWarning): @@ -135,10 +141,10 @@ def _check_array( if force_all_finite: if sp.issparse(array): if hasattr(array, "data"): - _assert_all_finite(array.data) + _daal4py_assert_all_finite(array.data) force_all_finite = False else: - _assert_all_finite(array) + _daal4py_assert_all_finite(array) force_all_finite = False array = check_array( array=array, @@ -200,7 +206,7 @@ def _check_X_y( if y_numeric and y.dtype.kind == "O": y = y.astype(np.float64) if force_all_finite: - _assert_all_finite(y) + _daal4py_assert_all_finite(y) lengths = [X.shape[0], y.shape[0]] uniques = np.unique(lengths) @@ -285,7 +291,7 @@ def _type_of_target(y): # check float and contains non-integer float values if y.dtype.kind == "f" and np.any(y != y.astype(int)): # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.] - _assert_all_finite(y) + _daal4py_assert_all_finite(y) return "continuous" + suffix if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1): @@ -430,3 +436,28 @@ def _is_csr(x): return isinstance(x, sp.csr_matrix) or ( hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) ) + + +def _assert_all_finite(X, allow_nan=False, input_name=""): + # NOTE: This function does not respond to target_offload, as the memory movement + # is likely to cause a significant reduction in performance + # requires extracting the queue to generate a policy for converting the data to fp32 + X = to_table(_convert_to_supported(_get_policy(None, X), X)) + if not _backend.finiteness_checker(allow_nan=allow_nan).compute(X).finite: + type_err = "infinity" if allow_nan else "NaN, infinity" + padded_input_name = input_name + " " if input_name else "" + msg_err = f"Input {padded_input_name}contains {type_err}." + raise ValueError(msg_err) + + +def assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + _assert_all_finite( + X.data if sp.issparse(X) else X, + allow_nan=allow_nan, + input_name=input_name, + ) From 1e1213e60e2d52310b26625a1c749379affcd007 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 1 Nov 2024 00:37:07 +0100 Subject: [PATCH 12/68] follow conventions --- onedal/utils/validation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index eb313cd980..3a9d849486 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -442,8 +442,11 @@ def _assert_all_finite(X, allow_nan=False, input_name=""): # NOTE: This function does not respond to target_offload, as the memory movement # is likely to cause a significant reduction in performance # requires extracting the queue to generate a policy for converting the data to fp32 - X = to_table(_convert_to_supported(_get_policy(None, X), X)) - if not _backend.finiteness_checker(allow_nan=allow_nan).compute(X).finite: + policy = _get_policy(None, X) + X = to_table(_convert_to_supported(policy, X)) + if not _backend.finiteness_checker.compute( + policy, {"allow_nan": allow_nan}, X + ).finite: type_err = "infinity" if allow_nan else "NaN, infinity" padded_input_name = input_name + " " if input_name else "" msg_err = f"Input {padded_input_name}contains {type_err}." From 053171340099a68ced8fec11f79371f6bac253ef Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 1 Nov 2024 00:38:57 +0100 Subject: [PATCH 13/68] make xtable explicit --- onedal/utils/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 3a9d849486..67c7a2dee0 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -443,9 +443,9 @@ def _assert_all_finite(X, allow_nan=False, input_name=""): # is likely to cause a significant reduction in performance # requires extracting the queue to generate a policy for converting the data to fp32 policy = _get_policy(None, X) - X = to_table(_convert_to_supported(policy, X)) + X_table = to_table(_convert_to_supported(policy, X)) if not _backend.finiteness_checker.compute( - policy, {"allow_nan": allow_nan}, X + policy, {"allow_nan": allow_nan}, X_table ).finite: type_err = "infinity" if allow_nan else "NaN, infinity" padded_input_name = input_name + " " if input_name else "" From e831167b32b85135b9e685c7dd83227db89603e2 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 1 Nov 2024 00:42:29 +0100 Subject: [PATCH 14/68] remove comment --- onedal/utils/validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 67c7a2dee0..10bb920291 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -441,7 +441,6 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): # NOTE: This function does not respond to target_offload, as the memory movement # is likely to cause a significant reduction in performance - # requires extracting the queue to generate a policy for converting the data to fp32 policy = _get_policy(None, X) X_table = to_table(_convert_to_supported(policy, X)) if not _backend.finiteness_checker.compute( From d6eb1d05e9de1c6bc0a1f9683659ddef4540480d Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 00:57:56 +0100 Subject: [PATCH 15/68] Update validation.py --- onedal/utils/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 10bb920291..f4597cd01c 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -21,8 +21,8 @@ import numpy as np from scipy import sparse as sp from onedal import _backend -from ..common._policy import _get_policy -from ..datatypes import _convert_to_supported, to_table +from onedal.common._policy import _get_policy +from onedal.datatypes import _convert_to_supported, to_table if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"): From fb30d6e69a2c6244112079a9c6a0dd75cd9a3a85 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:34:52 +0100 Subject: [PATCH 16/68] Update __init__.py --- onedal/utils/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py index 0a1b05fbc2..0bc9ed35a3 100644 --- a/onedal/utils/__init__.py +++ b/onedal/utils/__init__.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +import scipy.sparse as sp from .validation import ( _check_array, @@ -22,7 +23,6 @@ _column_or_1d, _is_arraylike, _is_arraylike_not_scalar, - _is_csr, _is_integral_float, _is_multilabel, _num_features, @@ -31,6 +31,12 @@ _validate_targets, ) +def _is_csr(x): + """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array""" + return isinstance(x, sp.csr_matrix) or ( + hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) + ) + __all__ = [ "_column_or_1d", "_validate_targets", From 63a18c2f66ad93720408c33aa3a3b05f74d58f48 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:35:12 +0100 Subject: [PATCH 17/68] Update validation.py --- onedal/utils/validation.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index f4597cd01c..1421bfaefc 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -431,13 +431,6 @@ def _num_samples(x): raise TypeError(message) from type_error -def _is_csr(x): - """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array""" - return isinstance(x, sp.csr_matrix) or ( - hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) - ) - - def _assert_all_finite(X, allow_nan=False, input_name=""): # NOTE: This function does not respond to target_offload, as the memory movement # is likely to cause a significant reduction in performance From 76c0856a12c04d4d3eb13d3c21382b1b84a23dc7 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:40:03 +0100 Subject: [PATCH 18/68] Update __init__.py --- onedal/utils/__init__.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py index 0bc9ed35a3..a7e1495cf9 100644 --- a/onedal/utils/__init__.py +++ b/onedal/utils/__init__.py @@ -13,8 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -import scipy.sparse as sp +def _is_csr(x): + """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array""" + return isinstance(x, sp.csr_matrix) or ( + hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) + ) from .validation import ( _check_array, _check_classification_targets, @@ -23,6 +27,7 @@ _column_or_1d, _is_arraylike, _is_arraylike_not_scalar, + _is_csr, _is_integral_float, _is_multilabel, _num_features, @@ -31,12 +36,6 @@ _validate_targets, ) -def _is_csr(x): - """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array""" - return isinstance(x, sp.csr_matrix) or ( - hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) - ) - __all__ = [ "_column_or_1d", "_validate_targets", From 7deb2bbce9c0435b2484ae0fcfc754f5521bb01d Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:40:24 +0100 Subject: [PATCH 19/68] Update __init__.py --- onedal/utils/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py index a7e1495cf9..0a1b05fbc2 100644 --- a/onedal/utils/__init__.py +++ b/onedal/utils/__init__.py @@ -14,11 +14,6 @@ # limitations under the License. # ============================================================================== -def _is_csr(x): - """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array""" - return isinstance(x, sp.csr_matrix) or ( - hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) - ) from .validation import ( _check_array, _check_classification_targets, From ed46b2907bb0a00678dab9c2516543941471b64a Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:41:17 +0100 Subject: [PATCH 20/68] Update validation.py --- onedal/utils/validation.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 1421bfaefc..f4597cd01c 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -431,6 +431,13 @@ def _num_samples(x): raise TypeError(message) from type_error +def _is_csr(x): + """Return True if x is scipy.sparse.csr_matrix or scipy.sparse.csr_array""" + return isinstance(x, sp.csr_matrix) or ( + hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) + ) + + def _assert_all_finite(X, allow_nan=False, input_name=""): # NOTE: This function does not respond to target_offload, as the memory movement # is likely to cause a significant reduction in performance From 67d6273f3520232daad4f7f16b49291240600e16 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:42:45 +0100 Subject: [PATCH 21/68] Update _data_conversion.py --- onedal/datatypes/_data_conversion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 0caac10884..011a2eb89d 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -17,11 +17,11 @@ import warnings import numpy as np +import scipy.sparse as sp from daal4py.sklearn._utils import make2d from onedal import _backend, _is_dpc_backend -from ..utils import _is_csr from ..utils._dpep_helpers import is_dpctl_available dpctl_available = is_dpctl_available("0.14") @@ -46,7 +46,7 @@ def convert_one_to_table(arg): if isinstance(arg, dpt.usm_ndarray): return _backend.dpctl_to_table(arg) - if not _is_csr(arg): + if not sp.issparse(arg): arg = make2d(arg) return _backend.to_table(arg) From 8abead922bd8c2fceff7e8e6dffe4b76389fe1d4 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Fri, 1 Nov 2024 22:58:03 +0100 Subject: [PATCH 22/68] Update _data_conversion.py --- onedal/datatypes/_data_conversion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 386101eb14..12dc24eca3 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -103,7 +103,7 @@ def convert_one_to_table(arg, sua_iface=None): if sua_iface: return _backend.sua_iface_to_table(arg) - if not sp.sparse(arg): + if not sp.issparse(arg): arg = make2d(arg) return _backend.to_table(arg) @@ -130,7 +130,7 @@ def convert_one_to_table(arg, sua_iface=None): "SYCL usm array conversion to table requires the DPC backend" ) - if not sp.sparse(arg): + if not sp.issparse(arg): arg = make2d(arg) return _backend.to_table(arg) From 47d0f8bf7f0544089bcc2626dc06863be663757b Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 00:39:18 +0100 Subject: [PATCH 23/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index bfb3c02cbd..3d8443378d 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -31,6 +31,10 @@ constexpr const char py_capsule_name[] = "PyCapsule"; constexpr const char get_capsule_name[] = "_get_capsule"; constexpr const char queue_capsule_name[] = "SyclQueueRef"; constexpr const char context_capsule_name[] = "SyclContextRef"; +constexpr const char device_name[] = "sycl_device"; +constexpr const char filter_name[] = "filter_selector"; + + sycl::queue extract_queue(py::capsule capsule) { constexpr const char* gtr_name = queue_capsule_name; @@ -79,7 +83,12 @@ sycl::queue get_queue_from_python(const py::object& syclobj) { const auto caps = syclobj.cast(); return extract_from_capsule(std::move(caps)); } - else { + else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), filter_name)) { + auto attr = syclobj.attr(device_name).attr(filter_name); + return get_queue_by_filter_string(attr.cast()); + } + else + { throw std::runtime_error("Unable to interpret \"syclobj\""); } } From e48c2bdca15b554e9b325508b8827465ae6d34bf Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 00:45:56 +0100 Subject: [PATCH 24/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index 3d8443378d..364f248992 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -32,7 +32,7 @@ constexpr const char get_capsule_name[] = "_get_capsule"; constexpr const char queue_capsule_name[] = "SyclQueueRef"; constexpr const char context_capsule_name[] = "SyclContextRef"; constexpr const char device_name[] = "sycl_device"; -constexpr const char filter_name[] = "filter_selector"; +constexpr const char get_filter_name[] = "get_filter_string"; @@ -83,9 +83,9 @@ sycl::queue get_queue_from_python(const py::object& syclobj) { const auto caps = syclobj.cast(); return extract_from_capsule(std::move(caps)); } - else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), filter_name)) { - auto attr = syclobj.attr(device_name).attr(filter_name); - return get_queue_by_filter_string(attr.cast()); + else if (py::hasattr(syclobj, device_name) && py::hasattr(syclobj.attr(device_name), get_filter_name)) { + auto attr = syclobj.attr(device_name).attr(get_filter_name); + return get_queue_by_filter_string(attr().cast()); } else { From c6751c4bc2dea6fd8e38c470d9f398bb0b8f8161 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 00:47:04 +0100 Subject: [PATCH 25/68] Update _policy.py --- onedal/common/_policy.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py index 90705854f6..abd267f4a6 100644 --- a/onedal/common/_policy.py +++ b/onedal/common/_policy.py @@ -48,12 +48,7 @@ def __init__(self): if _is_dpc_backend: - from onedal._device_offload import DummySyclQueue - class _DataParallelInteropPolicy(_backend.data_parallel_policy): def __init__(self, queue): self._queue = queue - if isinstance(queue, DummySyclQueue): - super().__init__(self._queue.sycl_device.get_filter_string()) - return super().__init__(self._queue) From f3e4a3a678298b7a7b135bae67ef29e293a45ee5 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 01:01:33 +0100 Subject: [PATCH 26/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index 364f248992..3bd18c3689 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -34,8 +34,6 @@ constexpr const char context_capsule_name[] = "SyclContextRef"; constexpr const char device_name[] = "sycl_device"; constexpr const char get_filter_name[] = "get_filter_string"; - - sycl::queue extract_queue(py::capsule capsule) { constexpr const char* gtr_name = queue_capsule_name; constexpr std::size_t gtr_size = sizeof(queue_capsule_name); @@ -74,6 +72,20 @@ sycl::queue get_queue_by_get_capsule(const py::object& syclobj) { return extract_from_capsule(std::move(capsule)); } +sycl::queue get_queue_by_filter_string(const std::string& filter) { + filter_selector_wrapper selector{ filter }; + return sycl::queue{ selector }; +} + +sycl::queue get_queue_by_device_id(std::uint32_t id) { + if (auto device = get_device_by_id(id)) { + return sycl::queue{ device.value() }; + } + else { + throw std::runtime_error(unknown_device); + } +} + sycl::queue get_queue_from_python(const py::object& syclobj) { static auto pycapsule = py::cast(py_capsule_name); if (py::hasattr(syclobj, get_capsule_name)) { @@ -93,20 +105,6 @@ sycl::queue get_queue_from_python(const py::object& syclobj) { } } -sycl::queue get_queue_by_filter_string(const std::string& filter) { - filter_selector_wrapper selector{ filter }; - return sycl::queue{ selector }; -} - -sycl::queue get_queue_by_device_id(std::uint32_t id) { - if (auto device = get_device_by_id(id)) { - return sycl::queue{ device.value() }; - } - else { - throw std::runtime_error(unknown_device); - } -} - std::string get_device_name(const sycl::queue& queue) { const auto& device = queue.get_device(); if (device.is_gpu()) { From 39cdb5f3c48810a178b12608fa18eb2a8edecfd0 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 01:28:12 +0100 Subject: [PATCH 27/68] Rename finiteness_checker.cpp to finiteness_checker.cpp --- onedal/{utils => primitives}/finiteness_checker.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename onedal/{utils => primitives}/finiteness_checker.cpp (100%) diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/primitives/finiteness_checker.cpp similarity index 100% rename from onedal/utils/finiteness_checker.cpp rename to onedal/primitives/finiteness_checker.cpp From 0f39613063f153d054826cbcac9f931232c14177 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 01:33:21 +0100 Subject: [PATCH 28/68] Create finiteness_checker.py --- onedal/primitives/finiteness_checker.py | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 onedal/primitives/finiteness_checker.py diff --git a/onedal/primitives/finiteness_checker.py b/onedal/primitives/finiteness_checker.py new file mode 100644 index 0000000000..c1a2b5c364 --- /dev/null +++ b/onedal/primitives/finiteness_checker.py @@ -0,0 +1,48 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import scipy.sparse as sp + +from onedal import _backend +from onedal.common._policy import _get_policy +from onedal.datatypes import _convert_to_supported, to_table + + +def _assert_all_finite(X, allow_nan=False, input_name=""): + # NOTE: This function does not respond to target_offload, as the memory movement + # is likely to cause a significant reduction in performance + policy = _get_policy(None, X) + X_table = to_table(_convert_to_supported(policy, X)) + if not _backend.finiteness_checker.compute( + policy, {"allow_nan": allow_nan}, X_table + ).finite: + type_err = "infinity" if allow_nan else "NaN, infinity" + padded_input_name = input_name + " " if input_name else "" + msg_err = f"Input {padded_input_name}contains {type_err}." + raise ValueError(msg_err) + + +def assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + _assert_all_finite( + X.data if sp.issparse(X) else X, + allow_nan=allow_nan, + input_name=input_name, + ) From b42cfe365d6dba0735dee79e732b6f1bddd9b1dc Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 01:33:45 +0100 Subject: [PATCH 29/68] Update validation.py --- onedal/utils/validation.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index f4597cd01c..bb501617fa 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -20,10 +20,6 @@ import numpy as np from scipy import sparse as sp -from onedal import _backend -from onedal.common._policy import _get_policy -from onedal.datatypes import _convert_to_supported, to_table - if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"): # numpy_version >= 2.0 @@ -436,30 +432,3 @@ def _is_csr(x): return isinstance(x, sp.csr_matrix) or ( hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) ) - - -def _assert_all_finite(X, allow_nan=False, input_name=""): - # NOTE: This function does not respond to target_offload, as the memory movement - # is likely to cause a significant reduction in performance - policy = _get_policy(None, X) - X_table = to_table(_convert_to_supported(policy, X)) - if not _backend.finiteness_checker.compute( - policy, {"allow_nan": allow_nan}, X_table - ).finite: - type_err = "infinity" if allow_nan else "NaN, infinity" - padded_input_name = input_name + " " if input_name else "" - msg_err = f"Input {padded_input_name}contains {type_err}." - raise ValueError(msg_err) - - -def assert_all_finite( - X, - *, - allow_nan=False, - input_name="", -): - _assert_all_finite( - X.data if sp.issparse(X) else X, - allow_nan=allow_nan, - input_name=input_name, - ) From 0ed615e9b44825e483aaad292187296416a08960 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 01:34:51 +0100 Subject: [PATCH 30/68] Update __init__.py --- onedal/primitives/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py index 39213819b5..c501a78d67 100644 --- a/onedal/primitives/__init__.py +++ b/onedal/primitives/__init__.py @@ -15,13 +15,16 @@ # ============================================================================== from .get_tree import get_tree_state_cls, get_tree_state_reg +from .finiteness_checker import assert_all_finite, _assert_all_finite from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel __all__ = [ + "assert_all_finite", "get_tree_state_cls", "get_tree_state_reg", "linear_kernel", "rbf_kernel", "poly_kernel", "sigmoid_kernel", + "_assert_all_finite", ] From f101affd5068f017edd6f399666528920a4e309f Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 13:32:43 +0100 Subject: [PATCH 31/68] attempt at fixing circular imports again --- onedal/common/_policy.py | 1 + onedal/datatypes/_data_conversion.py | 31 ++++++++-------- onedal/primitives/finiteness_checker.py | 48 ------------------------- onedal/utils/validation.py | 31 ++++++++++++++++ 4 files changed, 49 insertions(+), 62 deletions(-) delete mode 100644 onedal/primitives/finiteness_checker.py diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py index abd267f4a6..0d7d8ca6a3 100644 --- a/onedal/common/_policy.py +++ b/onedal/common/_policy.py @@ -48,6 +48,7 @@ def __init__(self): if _is_dpc_backend: + class _DataParallelInteropPolicy(_backend.data_parallel_policy): def __init__(self, queue): self._queue = queue diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 12dc24eca3..af5b41eb6b 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -31,13 +31,23 @@ def _apply_and_pass(func, *args, **kwargs): if _is_dpc_backend: - from ..utils._dpep_helpers import dpctl_available, dpnp_available + try: + import dpnp - if dpctl_available: - import dpctl.tensor as dpt + def _onedal_gpu_table_to_array(table, xp=None): + # By default DPNP ndarray created with a copy. + # TODO: + # investigate why dpnp.array(table, copy=False) doesn't work. + # Work around with using dpctl.tensor.asarray. + if xp == dpnp: + return dpnp.array(dpnp.dpctl.tensor.asarray(table), copy=False) + else: + return xp.asarray(table) - if dpnp_available: - import dpnp + except ImportError: + + def _onedal_gpu_table_to_array(table, xp=None): + return xp.asarray(table) from ..common._policy import _HostInteropPolicy @@ -86,15 +96,8 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): _backend.from_table(table), usm_type="device", sycl_queue=sycl_queue ) else: - xp_name = xp.__name__ - if dpnp_available and xp_name == "dpnp": - # By default DPNP ndarray created with a copy. - # TODO: - # investigate why dpnp.array(table, copy=False) doesn't work. - # Work around with using dpctl.tensor.asarray. - return dpnp.array(dpt.asarray(table), copy=False) - else: - return xp.asarray(table) + return _onedal_gpu_table_to_array(table, xp=xp) + return _backend.from_table(table) def convert_one_to_table(arg, sua_iface=None): diff --git a/onedal/primitives/finiteness_checker.py b/onedal/primitives/finiteness_checker.py deleted file mode 100644 index c1a2b5c364..0000000000 --- a/onedal/primitives/finiteness_checker.py +++ /dev/null @@ -1,48 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import scipy.sparse as sp - -from onedal import _backend -from onedal.common._policy import _get_policy -from onedal.datatypes import _convert_to_supported, to_table - - -def _assert_all_finite(X, allow_nan=False, input_name=""): - # NOTE: This function does not respond to target_offload, as the memory movement - # is likely to cause a significant reduction in performance - policy = _get_policy(None, X) - X_table = to_table(_convert_to_supported(policy, X)) - if not _backend.finiteness_checker.compute( - policy, {"allow_nan": allow_nan}, X_table - ).finite: - type_err = "infinity" if allow_nan else "NaN, infinity" - padded_input_name = input_name + " " if input_name else "" - msg_err = f"Input {padded_input_name}contains {type_err}." - raise ValueError(msg_err) - - -def assert_all_finite( - X, - *, - allow_nan=False, - input_name="", -): - _assert_all_finite( - X.data if sp.issparse(X) else X, - allow_nan=allow_nan, - input_name=input_name, - ) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index bb501617fa..c620b7b2e4 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -35,6 +35,10 @@ _assert_all_finite as _daal4py_assert_all_finite, ) +from onedal import _backend +from onedal.common._policy import _get_policy +from onedal.datatypes import _convert_to_supported, to_table + class DataConversionWarning(UserWarning): """Warning used to notify implicit data conversions happening in the code.""" @@ -432,3 +436,30 @@ def _is_csr(x): return isinstance(x, sp.csr_matrix) or ( hasattr(sp, "csr_array") and isinstance(x, sp.csr_array) ) + + +def _assert_all_finite(X, allow_nan=False, input_name=""): + # NOTE: This function does not respond to target_offload, as the memory movement + # is likely to cause a significant reduction in performance + policy = _get_policy(None, X) + X_table = to_table(_convert_to_supported(policy, X)) + if not _backend.finiteness_checker.compute( + policy, {"allow_nan": allow_nan}, X_table + ).finite: + type_err = "infinity" if allow_nan else "NaN, infinity" + padded_input_name = input_name + " " if input_name else "" + msg_err = f"Input {padded_input_name}contains {type_err}." + raise ValueError(msg_err) + + +def assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + _assert_all_finite( + X.data if sp.issparse(X) else X, + allow_nan=allow_nan, + input_name=input_name, + ) From 24c0e9472a85b2023ddb21a27fe6a783adb5cc1c Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 13:33:06 +0100 Subject: [PATCH 32/68] fix isort --- onedal/primitives/__init__.py | 2 +- onedal/utils/validation.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py index c501a78d67..79d72e2f16 100644 --- a/onedal/primitives/__init__.py +++ b/onedal/primitives/__init__.py @@ -14,8 +14,8 @@ # limitations under the License. # ============================================================================== +from .finiteness_checker import _assert_all_finite, assert_all_finite from .get_tree import get_tree_state_cls, get_tree_state_reg -from .finiteness_checker import assert_all_finite, _assert_all_finite from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel __all__ = [ diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index c620b7b2e4..4c5cc9746f 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -34,7 +34,6 @@ from daal4py.sklearn.utils.validation import ( _assert_all_finite as _daal4py_assert_all_finite, ) - from onedal import _backend from onedal.common._policy import _get_policy from onedal.datatypes import _convert_to_supported, to_table From 3f96166299d3ac5f07931ba64e5b0e96af345496 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 13:35:06 +0100 Subject: [PATCH 33/68] remove __init__ changes --- onedal/primitives/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/onedal/primitives/__init__.py b/onedal/primitives/__init__.py index 79d72e2f16..39213819b5 100644 --- a/onedal/primitives/__init__.py +++ b/onedal/primitives/__init__.py @@ -14,17 +14,14 @@ # limitations under the License. # ============================================================================== -from .finiteness_checker import _assert_all_finite, assert_all_finite from .get_tree import get_tree_state_cls, get_tree_state_reg from .kernel_functions import linear_kernel, poly_kernel, rbf_kernel, sigmoid_kernel __all__ = [ - "assert_all_finite", "get_tree_state_cls", "get_tree_state_reg", "linear_kernel", "rbf_kernel", "poly_kernel", "sigmoid_kernel", - "_assert_all_finite", ] From d98505388701b670e037148e14490163e5675590 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 13:35:50 +0100 Subject: [PATCH 34/68] last move --- onedal/{primitives => utils}/finiteness_checker.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename onedal/{primitives => utils}/finiteness_checker.cpp (100%) diff --git a/onedal/primitives/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp similarity index 100% rename from onedal/primitives/finiteness_checker.cpp rename to onedal/utils/finiteness_checker.cpp From 90ec48b46bc0c06a1da5b07e7b5d93efc12c12b7 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 14:39:03 +0100 Subject: [PATCH 35/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index 3bd18c3689..828be51547 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -87,11 +87,10 @@ sycl::queue get_queue_by_device_id(std::uint32_t id) { } sycl::queue get_queue_from_python(const py::object& syclobj) { - static auto pycapsule = py::cast(py_capsule_name); if (py::hasattr(syclobj, get_capsule_name)) { return get_queue_by_get_capsule(syclobj); } - else if (py::isinstance(syclobj, pycapsule)) { + else if (py::isinstance(syclobj, py::capsule)) { const auto caps = syclobj.cast(); return extract_from_capsule(std::move(caps)); } From 8c2c854c06b0e4486aae563418ea047d24f528df Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 14:59:19 +0100 Subject: [PATCH 36/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index 828be51547..224e7a04e1 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -19,7 +19,6 @@ #endif // ONEDAL_DATA_PARALLEL #include - #include "onedal/common/policy_common.hpp" namespace oneapi::dal::python { @@ -90,7 +89,7 @@ sycl::queue get_queue_from_python(const py::object& syclobj) { if (py::hasattr(syclobj, get_capsule_name)) { return get_queue_by_get_capsule(syclobj); } - else if (py::isinstance(syclobj, py::capsule)) { + else if (py::isinstance(syclobj) { const auto caps = syclobj.cast(); return extract_from_capsule(std::move(caps)); } From 6fa38d7f49d95a831d663101e076530297980865 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 15:07:44 +0100 Subject: [PATCH 37/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index 224e7a04e1..b10c60880d 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -89,7 +89,7 @@ sycl::queue get_queue_from_python(const py::object& syclobj) { if (py::hasattr(syclobj, get_capsule_name)) { return get_queue_by_get_capsule(syclobj); } - else if (py::isinstance(syclobj) { + else if (py::isinstance(syclobj)) { const auto caps = syclobj.cast(); return extract_from_capsule(std::move(caps)); } From 9c1ca9c3f29d3f00f5b10444e3e78101fb39adc0 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 17:22:59 +0100 Subject: [PATCH 38/68] Update policy_common.cpp --- onedal/common/policy_common.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/onedal/common/policy_common.cpp b/onedal/common/policy_common.cpp index b10c60880d..284762b035 100644 --- a/onedal/common/policy_common.cpp +++ b/onedal/common/policy_common.cpp @@ -19,6 +19,7 @@ #endif // ONEDAL_DATA_PARALLEL #include + #include "onedal/common/policy_common.hpp" namespace oneapi::dal::python { From 4b67dbde880bfa8c3d5373473a589bd2f6577c56 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sat, 2 Nov 2024 19:27:45 +0100 Subject: [PATCH 39/68] Update validation.py --- onedal/utils/validation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 4c5cc9746f..2ea8de8f51 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -438,8 +438,6 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): - # NOTE: This function does not respond to target_offload, as the memory movement - # is likely to cause a significant reduction in performance policy = _get_policy(None, X) X_table = to_table(_convert_to_supported(policy, X)) if not _backend.finiteness_checker.compute( From fa59a3c0103e9bd9d31ac1c0bf94cc9d1f86ae26 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 22:23:58 +0100 Subject: [PATCH 40/68] add testing --- onedal/utils/tests/test_validation.py | 115 ++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 onedal/utils/tests/test_validation.py diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py new file mode 100644 index 0000000000..406a2fd7bc --- /dev/null +++ b/onedal/utils/tests/test_validation.py @@ -0,0 +1,115 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import time + +import numpy as np +import numpy.random as rand +import pytest +from numpy.testing import assert_raises + +from onedal.tests.utils._dataframes_support import ( + _convert_to_dataframe, + get_dataframes_and_queues, +) +from onedal.utils.validation import assert_all_finite, _assert_all_finite + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize( + "shape", + [ + [16, 2048], + [ + 2**16 + 3, + ], + [1000, 1000], + [ + 3, + ], + ], +) +@pytest.mark.parametrize("allow_nan", [False, True]) +@pytest.mark.parametrize( + "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") +) +def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue): + X = np.array(shape, dtype=dtype) + X.fill(np.finfo(dtype).max) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + _assert_all_finite(X, allow_nan=allow_nan) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize( + "shape", + [ + [16, 2048], + [ + 2**16 + 3, + ], + [1000, 1000], + [ + 3, + ], + ], +) +@pytest.mark.parametrize("allow_nan", [False, True]) +@pytest.mark.parametrize("check", ["inf", "NaN", None]) +@pytest.mark.parametrize("seed", [0, int(time.time())]) +@pytest.mark.parametrize( + "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") +) +def test_assert_finite_random_location( + dtype, shape, allow_nan, check, seed, dataframe, queue +): + rand.seed(seed) + X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + if check: + loc = rand.randint(0, X.size - 1) + X.reshape((-1,))[loc] = float(check) + + if check is None or (allow_nan and check == "NaN"): + _assert_all_finite(X, allow_nan=allow_nan) + else: + assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("allow_nan", [False, True]) +@pytest.mark.parametrize("check", ["inf", "NaN", None]) +@pytest.mark.parametrize("seed", [0, int(time.time())]) +@pytest.mark.parametrize( + "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") +) +def test_assert_finite_random_shape_and_location( + dtype, allow_nan, check, seed, dataframe, queue +): + lb, ub = 2, 1048576 # lb is a patching condition, ub 2^20 + rand.seed(seed) + X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + + if check: + loc = rand.randint(0, X.size - 1) + X[loc] = float(check) + + if check is None or (allow_nan and check == "NaN"): + _assert_all_finite(X, allow_nan=allow_nan) + else: + assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan) From 3330b3312f07a751859d8e9c7639512e5d035ed3 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 22:24:38 +0100 Subject: [PATCH 41/68] isort --- onedal/utils/tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index 406a2fd7bc..5788a9ccc3 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -25,7 +25,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from onedal.utils.validation import assert_all_finite, _assert_all_finite +from onedal.utils.validation import _assert_all_finite, assert_all_finite @pytest.mark.parametrize("dtype", [np.float32, np.float64]) From 48959403bde34845dd7bcc9bb357cc6e79eb846e Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 22:53:23 +0100 Subject: [PATCH 42/68] attempt to fix module error --- onedal/utils/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 2ea8de8f51..9b33d49fe0 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -440,7 +440,7 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): policy = _get_policy(None, X) X_table = to_table(_convert_to_supported(policy, X)) - if not _backend.finiteness_checker.compute( + if not _backend.finiteness_checker.compute.compute( policy, {"allow_nan": allow_nan}, X_table ).finite: type_err = "infinity" if allow_nan else "NaN, infinity" From 0c6dd5d284155478773d1d4cf88c4fab3c9b6558 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 23:20:51 +0100 Subject: [PATCH 43/68] add fptype --- onedal/utils/validation.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 9b33d49fe0..f6e62bef14 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -439,10 +439,12 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): policy = _get_policy(None, X) - X_table = to_table(_convert_to_supported(policy, X)) - if not _backend.finiteness_checker.compute.compute( - policy, {"allow_nan": allow_nan}, X_table - ).finite: + X_t = to_table(_convert_to_supported(policy, X)) + params = { + "fptype": "float" if X_t.dtype.name == "float32" else "double", + "allow_nan": allow_nan, + } + if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite: type_err = "infinity" if allow_nan else "NaN, infinity" padded_input_name = input_name + " " if input_name else "" msg_err = f"Input {padded_input_name}contains {type_err}." From e2182fa81ffc0b35b485a01f43b1d0dca5bb79e1 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sat, 2 Nov 2024 23:40:24 +0100 Subject: [PATCH 44/68] fix typo --- onedal/utils/validation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index f6e62bef14..1ce7e5378d 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -438,12 +438,12 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): - policy = _get_policy(None, X) - X_t = to_table(_convert_to_supported(policy, X)) params = { - "fptype": "float" if X_t.dtype.name == "float32" else "double", + "fptype": "float" if X.dtype.name == "float32" else "double", "allow_nan": allow_nan, } + policy = _get_policy(None, X) + X_t = to_table(_convert_to_supported(policy, X)) if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite: type_err = "infinity" if allow_nan else "NaN, infinity" padded_input_name = input_name + " " if input_name else "" From 982ef2c8e57e56d4d018b72fa7cd3e7ba58e0ebb Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sun, 3 Nov 2024 00:02:35 +0100 Subject: [PATCH 45/68] Update validation.py --- onedal/utils/validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 1ce7e5378d..6298f3ee5a 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -438,11 +438,12 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): + policy = _get_policy(None, X) params = { "fptype": "float" if X.dtype.name == "float32" else "double", + "method": "dense", "allow_nan": allow_nan, } - policy = _get_policy(None, X) X_t = to_table(_convert_to_supported(policy, X)) if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite: type_err = "infinity" if allow_nan else "NaN, infinity" From 2fb52a82bc27226d53ddfa27a462840e2011c9cb Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sun, 3 Nov 2024 02:38:41 +0100 Subject: [PATCH 46/68] remove sua_ifcae from to_table --- onedal/datatypes/_data_conversion.py | 39 +++++++++++----------------- onedal/datatypes/table.cpp | 11 ++++---- onedal/datatypes/tests/test_data.py | 12 ++++----- sklearnex/tests/test_memory_usage.py | 6 ++--- 4 files changed, 30 insertions(+), 38 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index af5b41eb6b..2ef6903041 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -19,15 +19,29 @@ import numpy as np import scipy.sparse as sp -from daal4py.sklearn._utils import make2d from onedal import _backend, _is_dpc_backend +def make2d(X): + # generalized for array-like inputs + if hasattr(X, "reshape") and hasattr(X, "ndim") and X.ndim == 1: + return X.reshape((-1, 1)) + if np.isscalar(X): + return np.atleast_2d(X) + return X + + def _apply_and_pass(func, *args, **kwargs): if len(args) == 1: return func(args[0], **kwargs) return tuple(map(lambda arg: func(arg, **kwargs), args)) +def convert_one_to_table(arg): + return _backend.to_table(arg if sp.issparse(arg) else make2d(arg)) + +def to_table(*args): + return _apply_and_pass(convert_one_to_table, *args) + if _is_dpc_backend: @@ -100,16 +114,6 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): return _backend.from_table(table) - def convert_one_to_table(arg, sua_iface=None): - # Note: currently only oneDAL homogen tables are supported and the - # contiuginity of the input array should be checked in advance. - if sua_iface: - return _backend.sua_iface_to_table(arg) - - if not sp.issparse(arg): - arg = make2d(arg) - return _backend.to_table(arg) - else: def _convert_to_supported(policy, *data): @@ -127,22 +131,9 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): ) return _backend.from_table(table) - def convert_one_to_table(arg, sua_iface=None): - if sua_iface: - raise RuntimeError( - "SYCL usm array conversion to table requires the DPC backend" - ) - - if not sp.issparse(arg): - arg = make2d(arg) - return _backend.to_table(arg) - def from_table(*args, sycl_queue=None, sua_iface=None, xp=None): return _apply_and_pass( convert_one_from_table, *args, sycl_queue=sycl_queue, sua_iface=sua_iface, xp=xp ) - -def to_table(*args, sua_iface=None): - return _apply_and_pass(convert_one_to_table, *args, sua_iface=sua_iface) diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index 9771306118..ce0f15936b 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -78,6 +78,12 @@ ONEDAL_PY_INIT_MODULE(table) { #endif // ONEDAL_DATA_PARALLEL m.def("to_table", [](py::object obj) { + #ifdef ONEDAL_DATA_PARALLEL + if (py::hasattr(obj, "__sycl_usm_array_interface__")) { + return convert_from_sua_iface(obj); + } + #endif // ONEDAL_DATA_PARALLEL + auto* obj_ptr = obj.ptr(); return convert_to_table(obj_ptr); }); @@ -87,11 +93,6 @@ ONEDAL_PY_INIT_MODULE(table) { return obj_ptr; }); -#ifdef ONEDAL_DATA_PARALLEL - m.def("sua_iface_to_table", [](py::object obj) { - return convert_from_sua_iface(obj); - }); -#endif // ONEDAL_DATA_PARALLEL } } // namespace oneapi::dal::python diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 471d6f0a64..de47e18ad4 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -68,7 +68,7 @@ def fit(self, X, y=None): X = xp.astype(X, dtype=xp.float64) dtype = get_dtype(X) params = bs_DBSCAN._get_onedal_params(dtype) - X_table = to_table(X, sua_iface=sua_iface) + X_table = to_table(X) # TODO: # check other candidates for the dummy base oneDAL func. # oneDAL backend func is needed to check result table checks. @@ -251,7 +251,7 @@ def test_input_sua_iface_zero_copy(dataframe, queue, order, dtype): sua_iface, X_dp_namespace, _ = _get_sycl_namespace(X_dp) - X_table = to_table(X_dp, sua_iface=sua_iface) + X_table = to_table(X_dp) _assert_sua_iface_fields(X_dp, X_table) X_dp_from_table = from_table( @@ -339,7 +339,7 @@ def test_sua_iface_interop_invalid_shape(dataframe, queue, data_shape): "Unable to convert from SUA interface: only 1D & 2D tensors are allowed" ) with pytest.raises(ValueError, match=expected_err_msg): - to_table(X, sua_iface=sua_iface) + to_table(X) @pytest.mark.skipif( @@ -368,7 +368,7 @@ def test_sua_iface_interop_unsupported_dtypes(dataframe, queue, dtype): expected_err_msg = "Unable to convert from SUA interface: unknown data type" with pytest.raises(ValueError, match=expected_err_msg): - to_table(X, sua_iface=sua_iface) + to_table(X) @pytest.mark.parametrize( @@ -393,7 +393,7 @@ def test_to_table_non_contiguous_input(dataframe, queue): else: expected_err_msg = "Numpy input Could not convert Python object to onedal table." with pytest.raises(ValueError, match=expected_err_msg): - to_table(X, sua_iface=sua_iface) + to_table(X) @pytest.mark.skipif( @@ -411,4 +411,4 @@ def test_sua_iface_interop_if_no_dpc_backend(dataframe, queue, dtype): expected_err_msg = "SYCL usm array conversion to table requires the DPC backend" with pytest.raises(RuntimeError, match=expected_err_msg): - to_table(X, sua_iface=sua_iface) + to_table(X) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 4035832d37..6e7fdb72b5 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -142,8 +142,8 @@ class DummyEstimatorWithTableConversions(BaseEstimator): def fit(self, X, y=None): sua_iface, xp, _ = _get_sycl_namespace(X) - X_table = to_table(X, sua_iface=sua_iface) - y_table = to_table(y, sua_iface=sua_iface) + X_table = to_table(X) + y_table = to_table(y) # The presence of the fitted attributes (ending with a trailing # underscore) is required for the correct check. The cleanup of # the memory will occur at the estimator instance deletion. @@ -160,7 +160,7 @@ def predict(self, X): # fitted attributes (ending with a trailing underscore). check_is_fitted(self) sua_iface, xp, _ = _get_sycl_namespace(X) - X_table = to_table(X, sua_iface=sua_iface) + X_table = to_table(X) returned_X = from_table( X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp ) From 28dc267ab319edf2cef611340c0ab634eae036c4 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sun, 3 Nov 2024 02:42:29 +0100 Subject: [PATCH 47/68] isort and black --- onedal/datatypes/_data_conversion.py | 3 ++- onedal/datatypes/table.cpp | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 2ef6903041..c08196f1d6 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -36,9 +36,11 @@ def _apply_and_pass(func, *args, **kwargs): return func(args[0], **kwargs) return tuple(map(lambda arg: func(arg, **kwargs), args)) + def convert_one_to_table(arg): return _backend.to_table(arg if sp.issparse(arg) else make2d(arg)) + def to_table(*args): return _apply_and_pass(convert_one_to_table, *args) @@ -136,4 +138,3 @@ def from_table(*args, sycl_queue=None, sua_iface=None, xp=None): return _apply_and_pass( convert_one_from_table, *args, sycl_queue=sycl_queue, sua_iface=sua_iface, xp=xp ) - diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index ce0f15936b..113d881228 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -92,7 +92,6 @@ ONEDAL_PY_INIT_MODULE(table) { auto* obj_ptr = convert_to_pyobject(t); return obj_ptr; }); - } } // namespace oneapi::dal::python From 2f85fd4713535424395acfe5d0f72d1451c27d16 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sun, 3 Nov 2024 08:19:57 +0100 Subject: [PATCH 48/68] Update test_memory_usage.py --- sklearnex/tests/test_memory_usage.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 6e7fdb72b5..6e3ef2b3f7 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -142,6 +142,14 @@ class DummyEstimatorWithTableConversions(BaseEstimator): def fit(self, X, y=None): sua_iface, xp, _ = _get_sycl_namespace(X) + assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] + assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS'] + if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']): + X = xp.copy(X) + if not (y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']): + y = xp.copy(y) + assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] + assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS'] X_table = to_table(X) y_table = to_table(y) # The presence of the fitted attributes (ending with a trailing @@ -160,6 +168,10 @@ def predict(self, X): # fitted attributes (ending with a trailing underscore). check_is_fitted(self) sua_iface, xp, _ = _get_sycl_namespace(X) + assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] + if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']): + X = xp.copy(X) + assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] X_table = to_table(X) returned_X = from_table( X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp From 8659248f70dc78cc94058690e217fa6383747b9b Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sun, 3 Nov 2024 09:19:39 +0100 Subject: [PATCH 49/68] format --- sklearnex/tests/test_memory_usage.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 6e3ef2b3f7..214c03a6ba 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -142,14 +142,14 @@ class DummyEstimatorWithTableConversions(BaseEstimator): def fit(self, X, y=None): sua_iface, xp, _ = _get_sycl_namespace(X) - assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] - assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS'] - if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']): + assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] + assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"] + if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]): X = xp.copy(X) - if not (y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS']): + if not (y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]): y = xp.copy(y) - assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] - assert y.flags['C_CONTIGUOUS'] or y.flags['F_CONTIGUOUS'] + assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] + assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"] X_table = to_table(X) y_table = to_table(y) # The presence of the fitted attributes (ending with a trailing @@ -168,10 +168,10 @@ def predict(self, X): # fitted attributes (ending with a trailing underscore). check_is_fitted(self) sua_iface, xp, _ = _get_sycl_namespace(X) - assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] - if not (X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS']): + assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] + if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]): X = xp.copy(X) - assert X.flags['C_CONTIGUOUS'] or X.flags['F_CONTIGUOUS'] + assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] X_table = to_table(X) returned_X = from_table( X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp From 3827d6f38cfcd5ef065d8d6a3ea34bc749de436a Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sun, 3 Nov 2024 11:01:26 +0100 Subject: [PATCH 50/68] Update _data_conversion.py --- onedal/datatypes/_data_conversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index c08196f1d6..0deacf4c74 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -24,8 +24,9 @@ def make2d(X): # generalized for array-like inputs + # dpnp -1 indexing is broken, use size if hasattr(X, "reshape") and hasattr(X, "ndim") and X.ndim == 1: - return X.reshape((-1, 1)) + return X.reshape((X.size, 1)) if np.isscalar(X): return np.atleast_2d(X) return X From 55fa7d214f7a2f0398f1a83a7961a8491c587269 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sun, 3 Nov 2024 12:28:38 +0100 Subject: [PATCH 51/68] Update _data_conversion.py --- onedal/datatypes/_data_conversion.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 0deacf4c74..353fef7e9c 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -22,16 +22,6 @@ from onedal import _backend, _is_dpc_backend -def make2d(X): - # generalized for array-like inputs - # dpnp -1 indexing is broken, use size - if hasattr(X, "reshape") and hasattr(X, "ndim") and X.ndim == 1: - return X.reshape((X.size, 1)) - if np.isscalar(X): - return np.atleast_2d(X) - return X - - def _apply_and_pass(func, *args, **kwargs): if len(args) == 1: return func(args[0], **kwargs) @@ -39,7 +29,7 @@ def _apply_and_pass(func, *args, **kwargs): def convert_one_to_table(arg): - return _backend.to_table(arg if sp.issparse(arg) else make2d(arg)) + return _backend.to_table(np.atleast_2d(arg) if np.isscalar(arg) else arg) def to_table(*args): From 175cd7899f2a3851c60cd1964c7f7fe1f48712f3 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Sun, 3 Nov 2024 13:33:34 +0100 Subject: [PATCH 52/68] Update test_validation.py --- onedal/utils/tests/test_validation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index 5788a9ccc3..6f9f1c383f 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -78,12 +78,13 @@ def test_assert_finite_random_location( ): rand.seed(seed) X = rand.uniform(high=np.finfo(dtype).max, size=shape).astype(dtype) - X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) if check: loc = rand.randint(0, X.size - 1) X.reshape((-1,))[loc] = float(check) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + if check is None or (allow_nan and check == "NaN"): _assert_all_finite(X, allow_nan=allow_nan) else: @@ -103,12 +104,13 @@ def test_assert_finite_random_shape_and_location( lb, ub = 2, 1048576 # lb is a patching condition, ub 2^20 rand.seed(seed) X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype) - X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) if check: loc = rand.randint(0, X.size - 1) X[loc] = float(check) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + if check is None or (allow_nan and check == "NaN"): _assert_all_finite(X, allow_nan=allow_nan) else: From 7016ad0871a5f4c5f1d0c53bad5709752a88361c Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Sun, 3 Nov 2024 14:33:38 +0100 Subject: [PATCH 53/68] remove unnecessary code --- onedal/datatypes/_data_conversion.py | 1 - sklearnex/tests/test_memory_usage.py | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 353fef7e9c..018b79524e 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -17,7 +17,6 @@ import warnings import numpy as np -import scipy.sparse as sp from onedal import _backend, _is_dpc_backend diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 214c03a6ba..6e7fdb72b5 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -142,14 +142,6 @@ class DummyEstimatorWithTableConversions(BaseEstimator): def fit(self, X, y=None): sua_iface, xp, _ = _get_sycl_namespace(X) - assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] - assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"] - if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]): - X = xp.copy(X) - if not (y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"]): - y = xp.copy(y) - assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] - assert y.flags["C_CONTIGUOUS"] or y.flags["F_CONTIGUOUS"] X_table = to_table(X) y_table = to_table(y) # The presence of the fitted attributes (ending with a trailing @@ -168,10 +160,6 @@ def predict(self, X): # fitted attributes (ending with a trailing underscore). check_is_fitted(self) sua_iface, xp, _ = _get_sycl_namespace(X) - assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] - if not (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]): - X = xp.copy(X) - assert X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"] X_table = to_table(X) returned_X = from_table( X_table, sua_iface=sua_iface, sycl_queue=X.sycl_queue, xp=xp From fb7375f796834d6dd6a2ed490bdcc38a018f80e3 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Tue, 19 Nov 2024 06:57:01 +0100 Subject: [PATCH 54/68] make reviewer changes --- onedal/utils/finiteness_checker.cpp | 2 +- onedal/utils/tests/test_validation.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/onedal/utils/finiteness_checker.cpp b/onedal/utils/finiteness_checker.cpp index 6bc6a2e66b..2b8d84bd6f 100644 --- a/onedal/utils/finiteness_checker.cpp +++ b/onedal/utils/finiteness_checker.cpp @@ -66,7 +66,7 @@ void init_compute_ops(py::module_& m) { using namespace finiteness_checker; using input_t = compute_input; - compute_ops ops(policy, input_t{ data}, params2desc{}); + compute_ops ops(policy, input_t{ data }, params2desc{}); return fptype2t{ method2t{ Task{}, ops } }(params); }); } diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index 6f9f1c383f..5f92a64bf7 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -19,7 +19,6 @@ import numpy as np import numpy.random as rand import pytest -from numpy.testing import assert_raises from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, @@ -88,7 +87,9 @@ def test_assert_finite_random_location( if check is None or (allow_nan and check == "NaN"): _assert_all_finite(X, allow_nan=allow_nan) else: - assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan) + msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." + with pytest.raises(ValueError, match=msg_err): + _assert_all_finite(X, allow_nan=allow_nan) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -114,4 +115,6 @@ def test_assert_finite_random_shape_and_location( if check is None or (allow_nan and check == "NaN"): _assert_all_finite(X, allow_nan=allow_nan) else: - assert_raises(ValueError, _assert_all_finite, X, allow_nan=allow_nan) + msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." + with pytest.raises(ValueError, match=msg_err): + _assert_all_finite(X, allow_nan=allow_nan) From 30816bf546a8b5aa5470a34ec0b4e6c82577a3c9 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Tue, 19 Nov 2024 15:43:29 +0100 Subject: [PATCH 55/68] make dtype check change --- onedal/datatypes/table.cpp | 4 ++++ onedal/utils/validation.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index 113d881228..634cc99a1d 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -72,6 +72,10 @@ ONEDAL_PY_INIT_MODULE(table) { const auto column_count = t.get_column_count(); return py::make_tuple(row_count, column_count); }); + table_obj.def_property_readonly("dtype", [](const table& t){ + // returns a numpy dtype, even if source was not from numpy + return convert_dal_to_npy_type(t.get_metadata().get_data_type(0)); + }); #ifdef ONEDAL_DATA_PARALLEL define_sycl_usm_array_property(table_obj); diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 5294483ac2..836dd84a75 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -447,12 +447,12 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): policy = _get_policy(None, X) + X_t = to_table(_convert_to_supported(policy, X)) params = { - "fptype": "float" if X.dtype.name == "float32" else "double", + "fptype": "float" if X_t.dtype == np.float32 else "double", "method": "dense", "allow_nan": allow_nan, } - X_t = to_table(_convert_to_supported(policy, X)) if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite: type_err = "infinity" if allow_nan else "NaN, infinity" padded_input_name = input_name + " " if input_name else "" From abb3b1683f71fe758beec194795ab6a8b24545f3 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Tue, 19 Nov 2024 16:06:59 +0100 Subject: [PATCH 56/68] add sparse testing --- onedal/utils/tests/test_validation.py | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index 5f92a64bf7..aefa1dbb36 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -19,6 +19,7 @@ import numpy as np import numpy.random as rand import pytest +import scipy.sparse as sp from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, @@ -118,3 +119,31 @@ def test_assert_finite_random_shape_and_location( msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." with pytest.raises(ValueError, match=msg_err): _assert_all_finite(X, allow_nan=allow_nan) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("allow_nan", [False, True]) +@pytest.mark.parametrize("check", ["inf", "NaN", None]) +@pytest.mark.parametrize("seed", [0, int(time.time())]) +def test_assert_finite_sparse(dtype, allow_nan, check, seed): + lb, ub = 2, 1048576 # lb is a patching condition, ub 2^20 + rand.seed(seed) + X = sp.random( + rand.randint(lb, ub), + rand.randint(lb, ub), + format="csr", + dtype=dtype, + random_state=rand.default_rng(seed), + ) + + if check: + locx = rand.randint(0, X.shape[0] - 1) + locy = rand.randint(0, X.shape[1] - 1) + X[locx, locy] = float(check) + + if check is None or (allow_nan and check == "NaN"): + assert_all_finite(X, allow_nan=allow_nan) + else: + msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." + with pytest.raises(ValueError, match=msg_err): + assert_all_finite(X, allow_nan=allow_nan) From 97aef73e5866db07206fdf47571f9fb94f93185c Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Tue, 19 Nov 2024 17:06:17 +0100 Subject: [PATCH 57/68] try again --- onedal/datatypes/table.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index 634cc99a1d..a06a08710d 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -74,7 +74,7 @@ ONEDAL_PY_INIT_MODULE(table) { }); table_obj.def_property_readonly("dtype", [](const table& t){ // returns a numpy dtype, even if source was not from numpy - return convert_dal_to_npy_type(t.get_metadata().get_data_type(0)); + return py::dtype(convert_dal_to_npy_type(t.get_metadata().get_data_type(0))); }); #ifdef ONEDAL_DATA_PARALLEL From 6e29651587f42226b06c2d733d386a0bc19e0168 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Tue, 19 Nov 2024 17:29:19 +0100 Subject: [PATCH 58/68] try again --- onedal/utils/tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index aefa1dbb36..d953038f33 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -126,7 +126,7 @@ def test_assert_finite_random_shape_and_location( @pytest.mark.parametrize("check", ["inf", "NaN", None]) @pytest.mark.parametrize("seed", [0, int(time.time())]) def test_assert_finite_sparse(dtype, allow_nan, check, seed): - lb, ub = 2, 1048576 # lb is a patching condition, ub 2^20 + lb, ub = 2, 256 rand.seed(seed) X = sp.random( rand.randint(lb, ub), From 59363a8126643a1eb5aff981d1d7ce09cdbf711b Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Tue, 19 Nov 2024 17:30:46 +0100 Subject: [PATCH 59/68] try again --- onedal/utils/tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index d953038f33..7662f486f3 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -126,7 +126,7 @@ def test_assert_finite_random_shape_and_location( @pytest.mark.parametrize("check", ["inf", "NaN", None]) @pytest.mark.parametrize("seed", [0, int(time.time())]) def test_assert_finite_sparse(dtype, allow_nan, check, seed): - lb, ub = 2, 256 + lb, ub = 2, 2056 rand.seed(seed) X = sp.random( rand.randint(lb, ub), From e3facab10a9833bd9f79dfe25d6c1ed0ed70c77c Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Wed, 20 Nov 2024 23:16:14 +0100 Subject: [PATCH 60/68] Update onedal/utils/tests/test_validation.py Co-authored-by: Samir Nasibli --- onedal/utils/tests/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index 7662f486f3..b08720085c 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -34,7 +34,7 @@ [ [16, 2048], [ - 2**16 + 3, + 65539, # 2**16 + 3, ], [1000, 1000], [ From e8d8c71bdf0133e9ecdd65a2d9edc09d6370481a Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Thu, 21 Nov 2024 23:21:15 +0100 Subject: [PATCH 61/68] formatting --- onedal/utils/tests/test_validation.py | 2 +- sklearnex/utils/tests/test_finite.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index b08720085c..d7dfba87e5 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -34,7 +34,7 @@ [ [16, 2048], [ - 65539, # 2**16 + 3, + 65539, # 2**16 + 3, ], [1000, 1000], [ diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py index 2874ec3400..4ef907af06 100644 --- a/sklearnex/utils/tests/test_finite.py +++ b/sklearnex/utils/tests/test_finite.py @@ -37,7 +37,7 @@ ) @pytest.mark.parametrize("allow_nan", [False, True]) def test_sum_infinite_actually_finite(dtype, shape, allow_nan): - X = np.array(shape, dtype=dtype) + X = np.empty(shape, dtype=dtype) X.fill(np.finfo(dtype).max) _assert_all_finite(X, allow_nan=allow_nan) @@ -48,7 +48,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan): [ [16, 2048], [ - 2**16 + 3, + 65539, # 2**16 + 3, ], [1000, 1000], ], From afc76b8a3fbd8065a9807cac9a9c345492e02a59 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Thu, 21 Nov 2024 23:23:07 +0100 Subject: [PATCH 62/68] formatting again --- onedal/utils/tests/test_validation.py | 6 +++--- sklearnex/utils/tests/test_finite.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index d7dfba87e5..dc1ad7f41e 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -47,7 +47,7 @@ "dataframe, queue", get_dataframes_and_queues("numpy,dpnp,dpctl") ) def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue): - X = np.array(shape, dtype=dtype) + X = np.empty(shape, dtype=dtype) X.fill(np.finfo(dtype).max) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) _assert_all_finite(X, allow_nan=allow_nan) @@ -59,7 +59,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue) [ [16, 2048], [ - 2**16 + 3, + 65539, # 2**16 + 3, ], [1000, 1000], [ @@ -103,7 +103,7 @@ def test_assert_finite_random_location( def test_assert_finite_random_shape_and_location( dtype, allow_nan, check, seed, dataframe, queue ): - lb, ub = 2, 1048576 # lb is a patching condition, ub 2^20 + lb, ub = 2, 1048576 # ub is 2^20 rand.seed(seed) X = rand.uniform(high=np.finfo(dtype).max, size=rand.randint(lb, ub)).astype(dtype) diff --git a/sklearnex/utils/tests/test_finite.py b/sklearnex/utils/tests/test_finite.py index 4ef907af06..7d83667699 100644 --- a/sklearnex/utils/tests/test_finite.py +++ b/sklearnex/utils/tests/test_finite.py @@ -48,7 +48,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan): [ [16, 2048], [ - 65539, # 2**16 + 3, + 65539, # 2**16 + 3, ], [1000, 1000], ], From 4efad2ce70dcdf708be1290b2678727125bd4857 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 22 Nov 2024 14:20:00 +0100 Subject: [PATCH 63/68] add _check_sample_weight --- sklearnex/utils/validation.py | 142 +++++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py index b2d1898643..c35b2ec272 100755 --- a/sklearnex/utils/validation.py +++ b/sklearnex/utils/validation.py @@ -14,4 +14,144 @@ # limitations under the License. # =============================================================================== -from daal4py.sklearn.utils.validation import _assert_all_finite +import warnings + +import numbers +import numpy as np +import scipy.sparse as sp +from sklearn.utils.validation import _num_samples, check_array, check_non_negative, _assert_all_finite as _sklearn_assert_all_finite + +from daal4py.sklearn._utils import sklearn_check_version +from onedal.utils._array_api import _is_numpy_namespace, _get_sycl_namespace +from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite + +from ._array_api import get_namespace + +if sklearn_check_version("1.6"): + from sklearn.utils.validation import validate_data as _sklearn_validate_data + + _finite_keyword = "ensure_all_finite" + +else: + from sklearn.base import BaseEstimator + + _sklearn_validate_data = BaseEstimator._validate_data + _finite_keyword = "force_all_finite" + + +def _is_contiguous(X): + # array_api does not have a `strides` or `flags` attribute for testing memory + # order. When dlpack support is brought in for oneDAL, the dlpack python capsule + # can then be inspected for strides and this must be updated. _is_contiguous is + # therefore conservative in verifying attributes and does not support array_api. + # This will block onedal_assert_all_finite from being used for array_api inputs. + return hasattr(X, "flags") and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]) + + +def _sklearnex_assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + # size check is an initial match to daal4py for performance reasons, can be + # optimized later + xp, _ = get_namespace(X) + if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X): + _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) + else: + _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) + + +def assert_all_finite( + X, + *, + allow_nan=False, + input_name="", +): + _sklearnex_assert_all_finite( + X.data if sp.issparse(X) else X, + allow_nan=allow_nan, + input_name=input_name, + ) + + +def validate_data( + _estimator, + /, + X="no_validation", + y="no_validation", + **kwargs, +): + # force finite check to not occur in sklearn, default is True + # `ensure_all_finite` is the most up-to-date keyword name in sklearn + # _finite_keyword provides backward compatability for `force_all_finite` + ensure_all_finite = kwargs.pop("ensure_all_finite", True) + kwargs[_finite_keyword] = False + + out = _sklearn_validate_data( + _estimator, + X=X, + y=y, + **kwargs, + ) + if ensure_all_finite: + # run local finite check + allow_nan = ensure_all_finite == "allow-nan" + arg = iter(out if isinstance(out, tuple) else (out,)) + if not isinstance(X, str) or X != "no_validation": + assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X") + if not (y is None or isinstance(y, str) and y == "no_validation"): + assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y") + return out + + +def _check_sample_weight( + sample_weight, X, dtype=None, copy=False, only_non_negative=False +): + + n_samples = _num_samples(X) + xp, _ = get_namespace(X) + + if dtype is not None and dtype not in [xp.float32, xp.float64]: + dtype = xp.float64 + + if sample_weight is None: + sample_weight = xp.ones(n_samples, dtype=dtype) + elif isinstance(sample_weight, numbers.Number): + sample_weight = xp.full(n_samples, sample_weight, dtype=dtype) + else: + if dtype is None: + dtype = [xp.float64, xp.float32] + + # create param dict such that the variable finite_keyword can + # be added to it without direct sklearn_check_version maintenance + params = {"accept_sparse":False, + "ensure_2d":False, + "dtype":dtype, + "order":"C", + "copy":copy, + "input_name":"sample_weight", + _finite_keyword:False, + } + + sample_weight = check_array( + sample_weight, + **params + ) + assert_all_finite(sample_weight, input_name="sample_weight") + + if sample_weight.ndim != 1: + raise ValueError("Sample weights must be 1D array or scalar") + + if sample_weight.shape != (n_samples,): + raise ValueError( + "sample_weight.shape == {}, expected {}!".format( + sample_weight.shape, (n_samples,) + ) + ) + + if only_non_negative: + check_non_negative(sample_weight, "`sample_weight`") + + return sample_weight \ No newline at end of file From 63e2fa8a8a1248f15bcf995221ee74a519ca2e87 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Fri, 22 Nov 2024 14:21:39 +0100 Subject: [PATCH 64/68] Revert "add _check_sample_weight" This reverts commit 4efad2ce70dcdf708be1290b2678727125bd4857. --- sklearnex/utils/validation.py | 142 +--------------------------------- 1 file changed, 1 insertion(+), 141 deletions(-) diff --git a/sklearnex/utils/validation.py b/sklearnex/utils/validation.py index c35b2ec272..b2d1898643 100755 --- a/sklearnex/utils/validation.py +++ b/sklearnex/utils/validation.py @@ -14,144 +14,4 @@ # limitations under the License. # =============================================================================== -import warnings - -import numbers -import numpy as np -import scipy.sparse as sp -from sklearn.utils.validation import _num_samples, check_array, check_non_negative, _assert_all_finite as _sklearn_assert_all_finite - -from daal4py.sklearn._utils import sklearn_check_version -from onedal.utils._array_api import _is_numpy_namespace, _get_sycl_namespace -from onedal.utils.validation import _assert_all_finite as _onedal_assert_all_finite - -from ._array_api import get_namespace - -if sklearn_check_version("1.6"): - from sklearn.utils.validation import validate_data as _sklearn_validate_data - - _finite_keyword = "ensure_all_finite" - -else: - from sklearn.base import BaseEstimator - - _sklearn_validate_data = BaseEstimator._validate_data - _finite_keyword = "force_all_finite" - - -def _is_contiguous(X): - # array_api does not have a `strides` or `flags` attribute for testing memory - # order. When dlpack support is brought in for oneDAL, the dlpack python capsule - # can then be inspected for strides and this must be updated. _is_contiguous is - # therefore conservative in verifying attributes and does not support array_api. - # This will block onedal_assert_all_finite from being used for array_api inputs. - return hasattr(X, "flags") and (X.flags["C_CONTIGUOUS"] or X.flags["F_CONTIGUOUS"]) - - -def _sklearnex_assert_all_finite( - X, - *, - allow_nan=False, - input_name="", -): - # size check is an initial match to daal4py for performance reasons, can be - # optimized later - xp, _ = get_namespace(X) - if X.size < 32768 or X.dtype not in [xp.float32, xp.float64] or not _is_contiguous(X): - _sklearn_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) - else: - _onedal_assert_all_finite(X, allow_nan=allow_nan, input_name=input_name) - - -def assert_all_finite( - X, - *, - allow_nan=False, - input_name="", -): - _sklearnex_assert_all_finite( - X.data if sp.issparse(X) else X, - allow_nan=allow_nan, - input_name=input_name, - ) - - -def validate_data( - _estimator, - /, - X="no_validation", - y="no_validation", - **kwargs, -): - # force finite check to not occur in sklearn, default is True - # `ensure_all_finite` is the most up-to-date keyword name in sklearn - # _finite_keyword provides backward compatability for `force_all_finite` - ensure_all_finite = kwargs.pop("ensure_all_finite", True) - kwargs[_finite_keyword] = False - - out = _sklearn_validate_data( - _estimator, - X=X, - y=y, - **kwargs, - ) - if ensure_all_finite: - # run local finite check - allow_nan = ensure_all_finite == "allow-nan" - arg = iter(out if isinstance(out, tuple) else (out,)) - if not isinstance(X, str) or X != "no_validation": - assert_all_finite(next(arg), allow_nan=allow_nan, input_name="X") - if not (y is None or isinstance(y, str) and y == "no_validation"): - assert_all_finite(next(arg), allow_nan=allow_nan, input_name="y") - return out - - -def _check_sample_weight( - sample_weight, X, dtype=None, copy=False, only_non_negative=False -): - - n_samples = _num_samples(X) - xp, _ = get_namespace(X) - - if dtype is not None and dtype not in [xp.float32, xp.float64]: - dtype = xp.float64 - - if sample_weight is None: - sample_weight = xp.ones(n_samples, dtype=dtype) - elif isinstance(sample_weight, numbers.Number): - sample_weight = xp.full(n_samples, sample_weight, dtype=dtype) - else: - if dtype is None: - dtype = [xp.float64, xp.float32] - - # create param dict such that the variable finite_keyword can - # be added to it without direct sklearn_check_version maintenance - params = {"accept_sparse":False, - "ensure_2d":False, - "dtype":dtype, - "order":"C", - "copy":copy, - "input_name":"sample_weight", - _finite_keyword:False, - } - - sample_weight = check_array( - sample_weight, - **params - ) - assert_all_finite(sample_weight, input_name="sample_weight") - - if sample_weight.ndim != 1: - raise ValueError("Sample weights must be 1D array or scalar") - - if sample_weight.shape != (n_samples,): - raise ValueError( - "sample_weight.shape == {}, expected {}!".format( - sample_weight.shape, (n_samples,) - ) - ) - - if only_non_negative: - check_non_negative(sample_weight, "`sample_weight`") - - return sample_weight \ No newline at end of file +from daal4py.sklearn.utils.validation import _assert_all_finite From 48cafbca4de30c508b6355262a9fc2b881e668b1 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Mon, 25 Nov 2024 21:33:29 +0100 Subject: [PATCH 65/68] Update test_validation.py --- onedal/utils/tests/test_validation.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index dc1ad7f41e..a2eaac753c 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -137,9 +137,8 @@ def test_assert_finite_sparse(dtype, allow_nan, check, seed): ) if check: - locx = rand.randint(0, X.shape[0] - 1) - locy = rand.randint(0, X.shape[1] - 1) - X[locx, locy] = float(check) + locx = rand.randint(0, X.data.shape[0] - 1) + X.data[locx] = float(check) if check is None or (allow_nan and check == "NaN"): assert_all_finite(X, allow_nan=allow_nan) From 085f8a7f2296dfb4c592eea4fcfc4f5b91b13453 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Mon, 25 Nov 2024 21:34:42 +0100 Subject: [PATCH 66/68] Update validation.py --- onedal/utils/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 836dd84a75..38dcfd3fb3 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -449,7 +449,7 @@ def _assert_all_finite(X, allow_nan=False, input_name=""): policy = _get_policy(None, X) X_t = to_table(_convert_to_supported(policy, X)) params = { - "fptype": "float" if X_t.dtype == np.float32 else "double", + "fptype": X_t.dtype, "method": "dense", "allow_nan": allow_nan, } From b539d230f9fb509773f4fe780a32ade815fba041 Mon Sep 17 00:00:00 2001 From: "Faust, Ian" Date: Wed, 27 Nov 2024 11:58:05 +0100 Subject: [PATCH 67/68] make changes --- onedal/utils/tests/test_validation.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index a2eaac753c..d03c0cad39 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -33,9 +33,7 @@ "shape", [ [16, 2048], - [ - 65539, # 2**16 + 3, - ], + [65539], # 2**16 + 3, [1000, 1000], [ 3, @@ -58,9 +56,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue) "shape", [ [16, 2048], - [ - 65539, # 2**16 + 3, - ], + [65539], # 2**16 + 3, [1000, 1000], [ 3, From 63d95664339dc683eaf07c812988198fdd4a3b95 Mon Sep 17 00:00:00 2001 From: Ian Faust Date: Thu, 28 Nov 2024 09:21:49 +0100 Subject: [PATCH 68/68] Update test_validation.py --- onedal/utils/tests/test_validation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onedal/utils/tests/test_validation.py b/onedal/utils/tests/test_validation.py index d03c0cad39..1835cea3b6 100644 --- a/onedal/utils/tests/test_validation.py +++ b/onedal/utils/tests/test_validation.py @@ -25,7 +25,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) -from onedal.utils.validation import _assert_all_finite, assert_all_finite +from onedal.utils.validation import assert_all_finite @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -48,7 +48,7 @@ def test_sum_infinite_actually_finite(dtype, shape, allow_nan, dataframe, queue) X = np.empty(shape, dtype=dtype) X.fill(np.finfo(dtype).max) X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) - _assert_all_finite(X, allow_nan=allow_nan) + assert_all_finite(X, allow_nan=allow_nan) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -82,11 +82,11 @@ def test_assert_finite_random_location( X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) if check is None or (allow_nan and check == "NaN"): - _assert_all_finite(X, allow_nan=allow_nan) + assert_all_finite(X, allow_nan=allow_nan) else: msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." with pytest.raises(ValueError, match=msg_err): - _assert_all_finite(X, allow_nan=allow_nan) + assert_all_finite(X, allow_nan=allow_nan) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @@ -110,11 +110,11 @@ def test_assert_finite_random_shape_and_location( X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) if check is None or (allow_nan and check == "NaN"): - _assert_all_finite(X, allow_nan=allow_nan) + assert_all_finite(X, allow_nan=allow_nan) else: msg_err = "Input contains " + ("infinity" if allow_nan else "NaN, infinity") + "." with pytest.raises(ValueError, match=msg_err): - _assert_all_finite(X, allow_nan=allow_nan) + assert_all_finite(X, allow_nan=allow_nan) @pytest.mark.parametrize("dtype", [np.float32, np.float64])