diff --git a/.gitignore b/.gitignore index b41a100b87..2da6f3f6db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,20 @@ +## common +__pycache__ +*.pyc +*.o +*.so +*.dylib +.cache +.coverage +.vscode *.swp -__pycache__/ +*.pytest_cache +htmlcov build/ cuml.egg-info/ dist/ python/cuML/cuml.cpp +log + +## eclipse +.project diff --git a/README.md b/README.md index ead974a650..4e72029b83 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,12 @@ $ make -j $ ./ml_test ``` +To test the python package: + +``` +$ py.test python/cuML/test +``` + ### Python Notebooks Demo notebooks can be found in python/notebooks folder. diff --git a/python/cuML/dbscan/dbscan_wrapper.pyx b/python/cuML/dbscan/dbscan_wrapper.pyx index 17fc24fc62..3d9cedb3a9 100644 --- a/python/cuML/dbscan/dbscan_wrapper.pyx +++ b/python/cuML/dbscan/dbscan_wrapper.pyx @@ -1,18 +1,18 @@ # - # Copyright (c) 2018, NVIDIA CORPORATION. - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # +# Copyright (c) 2018, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# cimport c_dbscan import numpy as np @@ -79,17 +79,24 @@ class DBSCAN: Dense matrix (floats or doubles) of shape (n_samples, n_features) """ - x = [] - for col in X.columns: - x.append(X[col]._column.dtype) - break + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + X_m = X.as_gpu_matrix(order='C') + self.n_rows = len(X) + self.n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(X) + self.n_rows = X.shape[0] + self.n_cols = X.shape[1] - self.gdf_datatype = np.dtype(x[0]) - self.n_rows = len(X) - self.n_cols = len(X._cols) + else: + msg = "X matrix format not supported" + raise TypeError(msg) - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) + input_ptr = self._get_ctype_ptr(X_m) self.labels_ = cudf.Series(np.zeros(self.n_rows, dtype=np.int32)) cdef uintptr_t labels_ptr = self._get_column_ptr(self.labels_) @@ -100,15 +107,16 @@ class DBSCAN: self.n_cols, self.eps, self.min_samples, - labels_ptr) + labels_ptr) else: c_dbscan.dbscanFit(input_ptr, self.n_rows, self.n_cols, self.eps, self.min_samples, - labels_ptr) + labels_ptr) del(X_m) + return self def fit_predict(self, X): """ diff --git a/python/cuML/kmeans/kmeans_wrapper.pyx b/python/cuML/kmeans/kmeans_wrapper.pyx index 74647ee503..d2f7b2953b 100644 --- a/python/cuML/kmeans/kmeans_wrapper.pyx +++ b/python/cuML/kmeans/kmeans_wrapper.pyx @@ -1,3 +1,19 @@ +# +# Copyright (c) 2018, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + cimport c_kmeans import numpy as np from numba import cuda @@ -106,7 +122,6 @@ class KMeans: c = gdf.as_gpu_matrix(order='C').shape return self._get_ctype_ptr(gdf.as_gpu_matrix(order='C')) - def fit(self, X): """ Compute k-means clustering with X. @@ -119,13 +134,25 @@ class KMeans: """ self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) - self.n_rows = len(X) - self.n_cols = len(X._cols) - # cdef np.ndarray[np.float32_t, ndim=2, mode = 'c', cast=True] host_ary = input_gdf.as_gpu_matrix(order='C').copy_to_host() + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + X_m = X.as_gpu_matrix(order='C') + self.n_rows = len(X) + self.n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(X) + self.n_rows = X.shape[0] + self.n_cols = X.shape[1] - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + input_ptr = self._get_ctype_ptr(X_m) self.labels_ = cudf.Series(np.zeros(self.n_rows, dtype=np.int32)) cdef uintptr_t labels_ptr = self._get_column_ptr(self.labels_) @@ -174,7 +201,6 @@ class KMeans: cluster_centers_ptr, # pred_centroids labels_ptr) # pred_labels - cluster_centers_gdf = cudf.DataFrame() for i in range(0, self.n_cols): cluster_centers_gdf[str(i)] = self.cluster_centers_[i:self.n_clusters*self.n_cols:self.n_cols] @@ -184,7 +210,6 @@ class KMeans: return self - def fit_predict(self, X): """ Compute cluster centers and predict cluster index for each sample. @@ -197,8 +222,6 @@ class KMeans: """ return self.fit(X).labels_ - - def predict(self, X): """ Predict the closest cluster each sample in X belongs to. @@ -210,11 +233,25 @@ class KMeans: """ self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) - self.n_rows = len(X) - self.n_cols = len(X._cols) - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + X_m = X.as_gpu_matrix(order='C') + self.n_rows = len(X) + self.n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(X) + self.n_rows = X.shape[0] + self.n_cols = X.shape[1] + + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + input_ptr = self._get_ctype_ptr(X_m) clust_mat = self.cluster_centers_.as_gpu_matrix(order='C') cdef uintptr_t cluster_centers_ptr = self._get_ctype_ptr(clust_mat) @@ -267,8 +304,6 @@ class KMeans: del(clust_mat) return self.labels_ - - def transform(self, X): """ Transform X to a cluster-distance space. @@ -280,31 +315,33 @@ class KMeans: """ - self.n_rows = len(X) - self.n_cols = len(X._cols) - - - # cdef np.ndarray[np.float32_t, ndim=2, mode = 'c', cast=True] host_ary = input_gdf.as_gpu_matrix(order='C').copy_to_host() + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + X_m = X.as_gpu_matrix(order='C') + self.n_rows = len(X) + self.n_cols = len(X._cols) - # cdef np.ndarray[np.float32_t, ndim=2, mode = 'c', cast=True] cluster_centers_ptr = self.cluster_centers_.as_gpu_matrix(order='C').copy_to_host() + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(X) + self.n_rows = X.shape[0] + self.n_cols = X.shape[1] + else: + msg = "X matrix format not supported" + raise TypeError(msg) - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) + input_ptr = self._get_ctype_ptr(X_m) clust_mat = self.cluster_centers_.as_gpu_matrix(order='C') cdef uintptr_t cluster_centers_ptr = self._get_ctype_ptr(clust_mat) preds_data = cuda.to_device(np.zeros(self.n_clusters*self.n_rows, - dtype=self.gdf_datatype.type)) + dtype=self.gdf_datatype.type)) cdef uintptr_t preds_ptr = self._get_ctype_ptr(preds_data) - - ary=np.array([1.0,1.5,3.5,2.5],dtype=np.float32) - dary=cuda.to_device(ary) - cdef uintptr_t ptr2 = dary.device_ctypes_pointer.value - if self.gdf_datatype.type == np.float32: c_kmeans.kmeans_transform( self.verbose, # verbose @@ -330,7 +367,6 @@ class KMeans: cluster_centers_ptr, # centroids preds_ptr) # preds - preds_gdf = cudf.DataFrame() for i in range(0, self.n_clusters): preds_gdf[str(i)] = preds_data[i*self.n_rows:(i+1)*self.n_rows] @@ -339,7 +375,6 @@ class KMeans: del(clust_mat) return preds_gdf - def fit_transform(self, input_gdf): """ Compute clustering and transform input_gdf to cluster-distance space. diff --git a/python/cuML/knn/knn_wrapper.py b/python/cuML/knn/knn_wrapper.py index a37cd0f159..83e149d500 100644 --- a/python/cuML/knn/knn_wrapper.py +++ b/python/cuML/knn/knn_wrapper.py @@ -1,27 +1,29 @@ - # Copyright (c) 2018, NVIDIA CORPORATION. - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # +# Copyright (c) 2018, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import faiss import numpy as np import pandas as pd import cudf + class KNNparams: - def __init__(self,n_gpus): + def __init__(self, n_gpus): self.n_gpus = n_gpus + class KNN: """ @@ -61,11 +63,11 @@ class KNN: n_samples = 3, n_dims = 3 dim_0 dim_1 dim_2 - + 0 1.0 2.0 3.0 1 1.0 2.0 4.0 2 2.0 2.0 4.0 - + # Index: index_neighbor_0 index_neighbor_1 index_neighbor_2 @@ -82,38 +84,43 @@ class KNN: For an additional example see `the KNN notebook `_. For additional docs, see `scikitlearn's KDtree `_. """ - def __init__(self, n_gpus=-1): # -1 means using all gpus + def __init__(self, n_gpus=-1): + # -1 means using all gpus self.params = KNNparams(n_gpus) - def fit(self,X): - X = self.to_nparray(X) - assert len(X.shape)==2, 'data should be two dimensional' + def fit(self, X): + if (isinstance(X, cudf.DataFrame)): + X = self.to_nparray(X) + assert len(X.shape) == 2, 'data should be two dimensional' n_dims = X.shape[1] - cpu_index = faiss.IndexFlatL2(n_dims) # build a flat (CPU) index - if self.params.n_gpus==1: - res = faiss.StandardGpuResources() # use a single GPU + cpu_index = faiss.IndexFlatL2(n_dims) + # build a flat (CPU) index + if self.params.n_gpus == 1: + res = faiss.StandardGpuResources() + # use a single GPU # make it a flat GPU index gpu_index = faiss.index_cpu_to_gpu(res, 0, cpu_index) else: - gpu_index = faiss.index_cpu_to_all_gpus(cpu_index,ngpu=self.params.n_gpus) + gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, + ngpu=self.params.n_gpus) gpu_index.add(X) self.gpu_index = gpu_index - def query(self,X,k): + def query(self, X, k): X = self.to_nparray(X) - D,I = self.gpu_index.search(X, k) - D = self.to_cudf(D,col='distance') - I = self.to_cudf(I,col='index') - return D,I + D, I = self.gpu_index.search(X, k) + D = self.to_cudf(D, col='distance') + I = self.to_cudf(I, col='index') + return D, I - def to_nparray(self,x): - if isinstance(x,cudf.DataFrame): + def to_nparray(self, x): + if isinstance(x, cudf.DataFrame): x = x.to_pandas() return np.ascontiguousarray(x) - def to_cudf(self,df,col=''): + def to_cudf(self, df, col=''): # convert pandas dataframe to cudf dataframe if isinstance(df,np.ndarray): - df = pd.DataFrame({'%s_neighbor_%d'%(col,i):df[:,i] for i in range(df.shape[1])}) + df = pd.DataFrame({'%s_neighbor_%d'%(col, i): df[:, i] for i in range(df.shape[1])}) pdf = cudf.DataFrame.from_pandas(df) return pdf diff --git a/python/cuML/pca/pca_wrapper.pyx b/python/cuML/pca/pca_wrapper.pyx index 0432c66935..3358bd6fb7 100644 --- a/python/cuML/pca/pca_wrapper.pyx +++ b/python/cuML/pca/pca_wrapper.pyx @@ -152,13 +152,7 @@ class PCA: 'jacobi': COV_EIG_JACOBI }[algorithm] - def _initialize_arrays(self, input_gdf, n_components, n_rows, n_cols): - - x = [] - for col in input_gdf.columns: - x.append(input_gdf[col]._column.dtype) - break - self.gdf_datatype = np.dtype(x[0]) + def _initialize_arrays(self, n_components, n_rows, n_cols): self.trans_input_ = cuda.to_device(np.zeros(n_rows*n_components, dtype=self.gdf_datatype)) @@ -172,9 +166,9 @@ class PCA: dtype=self.gdf_datatype)) self.mean_ = cudf.Series(np.zeros(n_cols, dtype=self.gdf_datatype)) self.singular_values_ = cudf.Series(np.zeros(n_components, - dtype=self.gdf_datatype)) - self.noise_variance_ = cudf.Series(np.zeros(1, dtype=self.gdf_datatype)) + self.noise_variance_ = cudf.Series(np.zeros(1, + dtype=self.gdf_datatype)) def _get_ctype_ptr(self, obj): # The manner to access the pointers in the gdf's might change, so @@ -185,7 +179,7 @@ class PCA: def _get_column_ptr(self, obj): return self._get_ctype_ptr(obj._column._data.to_gpu_array()) - def fit(self, X, _transform=True): + def fit(self, X, _transform=False): """ Fit the model with X. @@ -200,24 +194,38 @@ class PCA: """ # c params + + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + # PCA expects transpose of the input + X_m = X.as_gpu_matrix() + self.params.n_rows = len(X) + self.params.n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(np.array(X, order='F')) + self.params.n_rows = X.shape[0] + self.params.n_cols = X.shape[1] + + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + input_ptr = self._get_ctype_ptr(X_m) + cpdef c_pca.paramsPCA params params.n_components = self.params.n_components - params.n_rows = len(X) - params.n_cols = len(X._cols) + params.n_rows = self.params.n_rows + params.n_cols = self.params.n_cols params.whiten = self.params.whiten params.n_iterations = self.params.iterated_power params.tol = self.params.tol params.algorithm = self.params.svd_solver - # python params - self.params.n_rows = len(X) - self.params.n_cols = len(X._cols) - - self._initialize_arrays(X, self.params.n_components, - self.params.n_rows, self.params.n_cols) - - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) + self._initialize_arrays(params.n_components, + params.n_rows, params.n_cols) cdef uintptr_t components_ptr = self._get_ctype_ptr(self.components_) @@ -286,7 +294,10 @@ class PCA: self.mean_ptr = mean_ptr self.noise_variance_ptr = noise_vars_ptr - del(X_m) + if (isinstance(X, cudf.DataFrame)): + del(X_m) + + return self def fit_transform(self, X): """ @@ -326,6 +337,18 @@ class PCA: X_original : cuDF DataFrame, shape (n_samples, n_features) """ + cdef uintptr_t trans_input_ptr + if (isinstance(X, cudf.DataFrame)): + X_m = X.as_gpu_matrix() + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(np.array(X, order='F')) + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + trans_input_ptr = self._get_ctype_ptr(X_m) + cpdef c_pca.paramsPCA params params.n_components = self.params.n_components params.n_rows = len(X) @@ -340,10 +363,9 @@ class PCA: input_data = cuda.to_device(np.zeros(params.n_rows*params.n_cols, dtype=gdf_datatype.type)) - #cdef bool transpose_comp = False cdef uintptr_t input_ptr = input_data.device_ctypes_pointer.value - cdef uintptr_t trans_input_ptr = X.as_gpu_matrix().device_ctypes_pointer.value + cdef uintptr_t components_ptr = self.components_ptr cdef uintptr_t singular_vals_ptr = self.singular_values_ptr cdef uintptr_t mean_ptr = self.mean_ptr @@ -367,6 +389,7 @@ class PCA: for i in range(0, params.n_cols): X_original[str(i)] = input_data[i*params.n_rows:(i+1)*params.n_rows] + del(X_m) return X_original @@ -386,25 +409,37 @@ class PCA: X_new : cuDF DataFrame, shape (n_samples, n_components) """ + + + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + X_m = X.as_gpu_matrix() + n_rows = len(X) + n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + gdf_datatype = X.dtype + X_m = cuda.to_device(np.array(X, order='F')) + n_rows = X.shape[0] + n_cols = X.shape[1] + + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + input_ptr = self._get_ctype_ptr(X_m) + cpdef c_pca.paramsPCA params params.n_components = self.params.n_components - params.n_rows = len(X) - params.n_cols = len(X._cols) + params.n_rows = n_rows + params.n_cols = n_cols params.whiten = self.params.whiten - x = [] - for col in X.columns: - x.append(X[col]._column.dtype) - break - gdf_datatype = np.dtype(x[0]) - trans_input_data = cuda.to_device( np.zeros(params.n_rows*params.n_components, dtype=gdf_datatype.type)) - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) - cdef uintptr_t trans_input_ptr = self._get_ctype_ptr(trans_input_data) cdef uintptr_t components_ptr = self.components_ptr cdef uintptr_t singular_vals_ptr = self.singular_values_ptr @@ -431,4 +466,3 @@ class PCA: del(X_m) return X_new - diff --git a/python/cuML/test/data/mortgage_pca.log b/python/cuML/test/data/mortgage_pca.log deleted file mode 100644 index 17007e8dd8..0000000000 --- a/python/cuML/test/data/mortgage_pca.log +++ /dev/null @@ -1,658 +0,0 @@ -Namespace(data='mortgage', ncols=128, nrows=262144, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.99678 seconds -fit_ sklearn done in 1.08578 seconds -transform_ sklearn done in 0.21612 seconds -run_pca sklearn done in 1.30644 seconds -fit_ cuml done in 1.80075 seconds -transform_ cuml done in 0.14337 seconds -run_pca cuml done in 1.94828 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=262144, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.48904 seconds -fit_ sklearn done in 1.01135 seconds -transform_ sklearn done in 0.19976 seconds -run_pca sklearn done in 1.21528 seconds -fit_ cuml done in 1.67869 seconds -transform_ cuml done in 0.13437 seconds -run_pca cuml done in 1.81695 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=262144, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.66563 seconds -fit_ sklearn done in 1.09043 seconds -transform_ sklearn done in 0.20226 seconds -run_pca sklearn done in 1.29846 seconds -fit_ cuml done in 1.71418 seconds -transform_ cuml done in 0.14044 seconds -run_pca cuml done in 1.85914 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=262144, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.22292 seconds -fit_ sklearn done in 2.13966 seconds -transform_ sklearn done in 0.53018 seconds -run_pca sklearn done in 2.67693 seconds -fit_ cuml done in 2.64808 seconds -transform_ cuml done in 0.19078 seconds -run_pca cuml done in 2.84388 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=262144, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.87461 seconds -fit_ sklearn done in 2.06222 seconds -transform_ sklearn done in 0.35967 seconds -run_pca sklearn done in 2.42748 seconds -fit_ cuml done in 2.42728 seconds -transform_ cuml done in 0.17839 seconds -run_pca cuml done in 2.61086 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=262144, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.44527 seconds -fit_ sklearn done in 2.11344 seconds -transform_ sklearn done in 0.36642 seconds -run_pca sklearn done in 2.48496 seconds -fit_ cuml done in 2.53724 seconds -transform_ cuml done in 0.17510 seconds -run_pca cuml done in 2.71633 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=262144, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.29451 seconds -fit_ sklearn done in 9.27273 seconds -transform_ sklearn done in 0.90651 seconds -run_pca sklearn done in 10.18414 seconds -fit_ cuml done in 4.15159 seconds -transform_ cuml done in 0.23277 seconds -run_pca cuml done in 4.38886 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=262144, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.88762 seconds -fit_ sklearn done in 9.65011 seconds -transform_ sklearn done in 0.80024 seconds -run_pca sklearn done in 10.45589 seconds -fit_ cuml done in 3.98464 seconds -transform_ cuml done in 0.22643 seconds -run_pca cuml done in 4.21631 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=262144, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.68504 seconds -fit_ sklearn done in 9.08401 seconds -transform_ sklearn done in 0.80909 seconds -run_pca sklearn done in 9.89888 seconds -fit_ cuml done in 4.06853 seconds -transform_ cuml done in 0.23852 seconds -run_pca cuml done in 4.31330 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=524288, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.45755 seconds -fit_ sklearn done in 2.65082 seconds -transform_ sklearn done in 0.41976 seconds -run_pca sklearn done in 3.07541 seconds -fit_ cuml done in 1.76836 seconds -transform_ cuml done in 0.17319 seconds -run_pca cuml done in 1.94622 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=524288, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 26.68348 seconds -fit_ sklearn done in 2.37042 seconds -transform_ sklearn done in 0.38151 seconds -run_pca sklearn done in 2.75723 seconds -fit_ cuml done in 1.73919 seconds -transform_ cuml done in 0.17875 seconds -run_pca cuml done in 1.92247 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=524288, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.86625 seconds -fit_ sklearn done in 2.69047 seconds -transform_ sklearn done in 0.47927 seconds -run_pca sklearn done in 3.17381 seconds -fit_ cuml done in 1.77953 seconds -transform_ cuml done in 0.16733 seconds -run_pca cuml done in 1.95090 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=524288, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.46083 seconds -fit_ sklearn done in 4.56868 seconds -transform_ sklearn done in 0.75262 seconds -run_pca sklearn done in 5.32587 seconds -fit_ cuml done in 2.51925 seconds -transform_ cuml done in 0.21525 seconds -run_pca cuml done in 2.73922 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=524288, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.32491 seconds -fit_ sklearn done in 4.60649 seconds -transform_ sklearn done in 0.82979 seconds -run_pca sklearn done in 5.44134 seconds -fit_ cuml done in 2.57637 seconds -transform_ cuml done in 0.21955 seconds -run_pca cuml done in 2.80191 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=524288, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.98893 seconds -fit_ sklearn done in 4.66744 seconds -transform_ sklearn done in 0.83798 seconds -run_pca sklearn done in 5.51094 seconds -fit_ cuml done in 2.52110 seconds -transform_ cuml done in 0.22250 seconds -run_pca cuml done in 2.74837 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=524288, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.61801 seconds -fit_ sklearn done in 21.10771 seconds -transform_ sklearn done in 1.57978 seconds -run_pca sklearn done in 22.69304 seconds -fit_ cuml done in 4.09200 seconds -transform_ cuml done in 0.26716 seconds -run_pca cuml done in 4.36396 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=524288, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.84657 seconds -fit_ sklearn done in 21.27765 seconds -transform_ sklearn done in 1.60926 seconds -run_pca sklearn done in 22.89224 seconds -fit_ cuml done in 4.09738 seconds -transform_ cuml done in 0.26326 seconds -run_pca cuml done in 4.36517 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=524288, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.18355 seconds -fit_ sklearn done in 21.49113 seconds -transform_ sklearn done in 1.67404 seconds -run_pca sklearn done in 23.17062 seconds -fit_ cuml done in 3.98407 seconds -transform_ cuml done in 0.26010 seconds -run_pca cuml done in 4.24850 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=1048576, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.31405 seconds -fit_ sklearn done in 5.20402 seconds -transform_ sklearn done in 0.89086 seconds -run_pca sklearn done in 6.11645 seconds -fit_ cuml done in 1.75248 seconds -transform_ cuml done in 0.15705 seconds -run_pca cuml done in 1.91526 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=1048576, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.23295 seconds -fit_ sklearn done in 5.21628 seconds -transform_ sklearn done in 0.81592 seconds -run_pca sklearn done in 6.03845 seconds -fit_ cuml done in 1.78776 seconds -transform_ cuml done in 0.15305 seconds -run_pca cuml done in 1.94636 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=1048576, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.83325 seconds -fit_ sklearn done in 5.45007 seconds -transform_ sklearn done in 0.94755 seconds -run_pca sklearn done in 6.40279 seconds -fit_ cuml done in 1.75168 seconds -transform_ cuml done in 0.16849 seconds -run_pca cuml done in 1.92515 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=1048576, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.10036 seconds -fit_ sklearn done in 9.46769 seconds -transform_ sklearn done in 1.56198 seconds -run_pca sklearn done in 11.03606 seconds -fit_ cuml done in 2.59048 seconds -transform_ cuml done in 0.20938 seconds -run_pca cuml done in 2.80448 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=1048576, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.92483 seconds -fit_ sklearn done in 11.11083 seconds -transform_ sklearn done in 1.70699 seconds -run_pca sklearn done in 12.82402 seconds -fit_ cuml done in 2.72024 seconds -transform_ cuml done in 0.20684 seconds -run_pca cuml done in 2.93166 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=1048576, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.52303 seconds -fit_ sklearn done in 10.09022 seconds -transform_ sklearn done in 1.56429 seconds -run_pca sklearn done in 11.66085 seconds -fit_ cuml done in 2.60759 seconds -transform_ cuml done in 0.20337 seconds -run_pca cuml done in 2.81584 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=1048576, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.74517 seconds -fit_ sklearn done in 45.85603 seconds -transform_ sklearn done in 3.05424 seconds -run_pca sklearn done in 48.91784 seconds -fit_ cuml done in 4.16034 seconds -transform_ cuml done in 0.31226 seconds -run_pca cuml done in 4.47812 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=1048576, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.88688 seconds -fit_ sklearn done in 49.27925 seconds -transform_ sklearn done in 3.33431 seconds -run_pca sklearn done in 52.61978 seconds -fit_ cuml done in 4.23572 seconds -transform_ cuml done in 0.34922 seconds -run_pca cuml done in 4.58994 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=1048576, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.00454 seconds -fit_ sklearn done in 45.42521 seconds -transform_ sklearn done in 3.33629 seconds -run_pca sklearn done in 48.76878 seconds -fit_ cuml done in 4.48286 seconds -transform_ cuml done in 0.32194 seconds -run_pca cuml done in 4.80997 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=2097152, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.99390 seconds -fit_ sklearn done in 10.71542 seconds -transform_ sklearn done in 1.45293 seconds -run_pca sklearn done in 12.17264 seconds -fit_ cuml done in 1.73653 seconds -transform_ cuml done in 0.17033 seconds -run_pca cuml done in 1.91148 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=2097152, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.95585 seconds -fit_ sklearn done in 11.42979 seconds -transform_ sklearn done in 1.78493 seconds -run_pca sklearn done in 13.21990 seconds -fit_ cuml done in 1.86407 seconds -transform_ cuml done in 0.18496 seconds -run_pca cuml done in 2.05380 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=2097152, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.63059 seconds -fit_ sklearn done in 11.11318 seconds -transform_ sklearn done in 2.22767 seconds -run_pca sklearn done in 13.34705 seconds -fit_ cuml done in 1.85647 seconds -transform_ cuml done in 0.19689 seconds -run_pca cuml done in 2.05932 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=2097152, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.58450 seconds -fit_ sklearn done in 21.46743 seconds -transform_ sklearn done in 3.35026 seconds -run_pca sklearn done in 24.82357 seconds -fit_ cuml done in 2.68546 seconds -transform_ cuml done in 0.23924 seconds -run_pca cuml done in 2.93003 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=2097152, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.61698 seconds -fit_ sklearn done in 21.60644 seconds -transform_ sklearn done in 3.42317 seconds -run_pca sklearn done in 25.03560 seconds -fit_ cuml done in 20.93842 seconds -transform_ cuml done in 0.24087 seconds -run_pca cuml done in 21.18434 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=2097152, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.69018 seconds -fit_ sklearn done in 21.52280 seconds -transform_ sklearn done in 3.34641 seconds -run_pca sklearn done in 24.87458 seconds -fit_ cuml done in 2.75120 seconds -transform_ cuml done in 0.25016 seconds -run_pca cuml done in 3.00573 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=2097152, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.71979 seconds -fit_ sklearn done in 97.43249 seconds -transform_ sklearn done in 6.79210 seconds -run_pca sklearn done in 104.23004 seconds -fit_ cuml done in 4.30847 seconds -transform_ cuml done in 0.35945 seconds -run_pca cuml done in 4.67170 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=2097152, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.75861 seconds -fit_ sklearn done in 103.24361 seconds -transform_ sklearn done in 6.34151 seconds -run_pca sklearn done in 109.59052 seconds -fit_ cuml done in 4.30290 seconds -transform_ cuml done in 0.37410 seconds -run_pca cuml done in 4.68217 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=2097152, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.94628 seconds -fit_ sklearn done in 99.95401 seconds -transform_ sklearn done in 8.84417 seconds -run_pca sklearn done in 110.54873 seconds -fit_ cuml done in 4.63357 seconds -transform_ cuml done in 0.39581 seconds -run_pca cuml done in 5.03489 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=4194304, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.29356 seconds -fit_ sklearn done in 24.68316 seconds -transform_ sklearn done in 3.18978 seconds -run_pca sklearn done in 27.87815 seconds -fit_ cuml done in 1.89806 seconds -transform_ cuml done in 0.21956 seconds -run_pca cuml done in 2.12285 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=4194304, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.51338 seconds -fit_ sklearn done in 23.66214 seconds -transform_ sklearn done in 3.49472 seconds -run_pca sklearn done in 27.16237 seconds -fit_ cuml done in 1.95876 seconds -transform_ cuml done in 0.23908 seconds -run_pca cuml done in 5.81248 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=4194304, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.28985 seconds -fit_ sklearn done in 23.00165 seconds -transform_ sklearn done in 3.50455 seconds -run_pca sklearn done in 26.51156 seconds -fit_ cuml done in 1.86588 seconds -transform_ cuml done in 0.21208 seconds -run_pca cuml done in 2.08268 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=4194304, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.28420 seconds -fit_ sklearn done in 41.50539 seconds -transform_ sklearn done in 6.50003 seconds -run_pca sklearn done in 48.01238 seconds -fit_ cuml done in 2.79746 seconds -transform_ cuml done in 0.29673 seconds -run_pca cuml done in 3.09873 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=4194304, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.54900 seconds -fit_ sklearn done in 43.77899 seconds -transform_ sklearn done in 6.76520 seconds -run_pca sklearn done in 50.55054 seconds -fit_ cuml done in 2.73722 seconds -transform_ cuml done in 0.31091 seconds -run_pca cuml done in 3.05358 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=4194304, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.37835 seconds -fit_ sklearn done in 42.70646 seconds -transform_ sklearn done in 6.78554 seconds -run_pca sklearn done in 49.49871 seconds -fit_ cuml done in 2.82226 seconds -transform_ cuml done in 0.31971 seconds -run_pca cuml done in 3.14740 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=4194304, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.35404 seconds -fit_ sklearn done in 203.13472 seconds -transform_ sklearn done in 18.44060 seconds -run_pca sklearn done in 221.58208 seconds -Namespace(data='mortgage', ncols=512, nrows=4194304, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 27.63166 seconds -fit_ sklearn done in 216.17910 seconds -transform_ sklearn done in 13.66269 seconds -run_pca sklearn done in 229.84993 seconds -Namespace(data='mortgage', ncols=512, nrows=4194304, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 26.29415 seconds -fit_ sklearn done in 198.19406 seconds -transform_ sklearn done in 13.72354 seconds -run_pca sklearn done in 211.92347 seconds -Namespace(data='mortgage', ncols=128, nrows=8388608, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.83433 seconds -fit_ sklearn done in 50.14629 seconds -transform_ sklearn done in 8.00641 seconds -run_pca sklearn done in 58.15821 seconds -fit_ cuml done in 2.19972 seconds -transform_ cuml done in 0.27906 seconds -run_pca cuml done in 2.48290 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=8388608, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.62851 seconds -fit_ sklearn done in 47.79905 seconds -transform_ sklearn done in 6.95017 seconds -run_pca sklearn done in 54.75555 seconds -fit_ cuml done in 2.11510 seconds -transform_ cuml done in 0.26679 seconds -run_pca cuml done in 2.38650 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=8388608, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.53393 seconds -fit_ sklearn done in 48.45176 seconds -transform_ sklearn done in 7.13317 seconds -run_pca sklearn done in 55.59210 seconds -fit_ cuml done in 2.02992 seconds -transform_ cuml done in 0.26904 seconds -run_pca cuml done in 2.30387 seconds -compare pca: cuml vs sklearn singular_values_ NOT equal -compare pca: cuml vs sklearn components_ NOT equal -compare pca: cuml vs sklearn explained_variance_ equal -compare pca: cuml vs sklearn explained_variance_ratio_ equal -compare pca: cuml vs sklearn noise_variance_ NOT equal -compare pca: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=8388608, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.18611 seconds -fit_ sklearn done in 92.38851 seconds -transform_ sklearn done in 13.20018 seconds -run_pca sklearn done in 109.84474 seconds -Namespace(data='mortgage', ncols=256, nrows=8388608, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.72312 seconds -fit_ sklearn done in 91.66215 seconds -transform_ sklearn done in 13.45526 seconds -run_pca sklearn done in 105.12478 seconds -Namespace(data='mortgage', ncols=256, nrows=8388608, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.39052 seconds -fit_ sklearn done in 88.79534 seconds diff --git a/python/cuML/test/data/mortgage_tsvd.log b/python/cuML/test/data/mortgage_tsvd.log deleted file mode 100644 index 00fe7a592a..0000000000 --- a/python/cuML/test/data/mortgage_tsvd.log +++ /dev/null @@ -1,630 +0,0 @@ -Namespace(data='mortgage', ncols=128, nrows=262144, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.55005 seconds -fit_ sklearn done in 0.40303 seconds -transform_ sklearn done in 0.02568 seconds -run_tsvd sklearn done in 0.43238 seconds -fit_ cuml done in 1.87155 seconds -transform_ cuml done in 0.15691 seconds -run_tsvd cuml done in 2.03277 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=262144, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.72210 seconds -fit_ sklearn done in 0.65930 seconds -transform_ sklearn done in 0.02376 seconds -run_tsvd sklearn done in 0.68730 seconds -fit_ cuml done in 1.83400 seconds -transform_ cuml done in 0.15222 seconds -run_tsvd cuml done in 1.99051 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=262144, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.18018 seconds -fit_ sklearn done in 0.91388 seconds -transform_ sklearn done in 0.02952 seconds -run_tsvd sklearn done in 0.94932 seconds -fit_ cuml done in 1.91519 seconds -transform_ cuml done in 0.16490 seconds -run_tsvd cuml done in 2.08524 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=262144, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 27.13058 seconds -fit_ sklearn done in 1.33425 seconds -transform_ sklearn done in 0.04892 seconds -run_tsvd sklearn done in 1.38900 seconds -fit_ cuml done in 2.69618 seconds -transform_ cuml done in 0.18905 seconds -run_tsvd cuml done in 2.89002 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=262144, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.02979 seconds -fit_ sklearn done in 1.58077 seconds -transform_ sklearn done in 0.04965 seconds -run_tsvd sklearn done in 1.63606 seconds -fit_ cuml done in 2.60499 seconds -transform_ cuml done in 0.18830 seconds -run_tsvd cuml done in 2.79817 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=262144, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.11743 seconds -fit_ sklearn done in 1.89121 seconds -transform_ sklearn done in 0.04806 seconds -run_tsvd sklearn done in 1.94520 seconds -fit_ cuml done in 2.62869 seconds -transform_ cuml done in 0.18561 seconds -run_tsvd cuml done in 2.82018 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=262144, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.11812 seconds -fit_ sklearn done in 2.64744 seconds -transform_ sklearn done in 0.10043 seconds -run_tsvd sklearn done in 2.75299 seconds -fit_ cuml done in 4.18445 seconds -transform_ cuml done in 0.24103 seconds -run_tsvd cuml done in 4.42994 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=262144, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.82749 seconds -fit_ sklearn done in 3.31912 seconds -transform_ sklearn done in 0.08148 seconds -run_tsvd sklearn done in 3.40513 seconds -fit_ cuml done in 4.14152 seconds -transform_ cuml done in 0.23929 seconds -run_tsvd cuml done in 4.38611 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=262144, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.67268 seconds -fit_ sklearn done in 2.59185 seconds -transform_ sklearn done in 0.08956 seconds -run_tsvd sklearn done in 2.68672 seconds -fit_ cuml done in 4.19680 seconds -transform_ cuml done in 0.24636 seconds -run_tsvd cuml done in 4.44891 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=524288, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.33362 seconds -fit_ sklearn done in 1.09315 seconds -transform_ sklearn done in 0.04802 seconds -run_tsvd sklearn done in 1.14638 seconds -fit_ cuml done in 1.77718 seconds -transform_ cuml done in 0.17966 seconds -run_tsvd cuml done in 1.96147 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=524288, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.92292 seconds -fit_ sklearn done in 1.15666 seconds -transform_ sklearn done in 0.07013 seconds -run_tsvd sklearn done in 4.79608 seconds -fit_ cuml done in 11.05323 seconds -transform_ cuml done in 0.18340 seconds -run_tsvd cuml done in 11.24216 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=524288, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.71635 seconds -fit_ sklearn done in 1.07307 seconds -transform_ sklearn done in 0.05428 seconds -run_tsvd sklearn done in 1.13259 seconds -fit_ cuml done in 1.74381 seconds -transform_ cuml done in 0.17948 seconds -run_tsvd cuml done in 1.92760 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=524288, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.67657 seconds -fit_ sklearn done in 3.51541 seconds -transform_ sklearn done in 0.10160 seconds -run_tsvd sklearn done in 3.62172 seconds -fit_ cuml done in 2.76466 seconds -transform_ cuml done in 0.20930 seconds -run_tsvd cuml done in 2.97821 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=524288, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.86241 seconds -fit_ sklearn done in 3.52800 seconds -transform_ sklearn done in 0.09429 seconds -run_tsvd sklearn done in 3.62787 seconds -fit_ cuml done in 2.53034 seconds -transform_ cuml done in 0.20777 seconds -run_tsvd cuml done in 2.74322 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=524288, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.65927 seconds -fit_ sklearn done in 3.54139 seconds -transform_ sklearn done in 0.10071 seconds -run_tsvd sklearn done in 3.64874 seconds -fit_ cuml done in 2.73603 seconds -transform_ cuml done in 0.21488 seconds -run_tsvd cuml done in 2.95620 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=524288, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 26.18659 seconds -fit_ sklearn done in 6.45834 seconds -transform_ sklearn done in 0.15231 seconds -run_tsvd sklearn done in 6.61650 seconds -fit_ cuml done in 4.31314 seconds -transform_ cuml done in 0.27018 seconds -run_tsvd cuml done in 4.58811 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=524288, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 25.28994 seconds -fit_ sklearn done in 5.99353 seconds -transform_ sklearn done in 0.28143 seconds -run_tsvd sklearn done in 6.28072 seconds -fit_ cuml done in 4.32215 seconds -transform_ cuml done in 0.28943 seconds -run_tsvd cuml done in 4.61748 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=524288, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 25.28452 seconds -fit_ sklearn done in 5.14635 seconds -transform_ sklearn done in 0.15735 seconds -run_tsvd sklearn done in 5.30850 seconds -fit_ cuml done in 4.34246 seconds -transform_ cuml done in 0.27660 seconds -run_tsvd cuml done in 4.62334 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=1048576, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.13297 seconds -fit_ sklearn done in 2.15920 seconds -transform_ sklearn done in 0.11191 seconds -run_tsvd sklearn done in 2.27592 seconds -fit_ cuml done in 1.81384 seconds -transform_ cuml done in 0.16793 seconds -run_tsvd cuml done in 1.98657 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=1048576, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.64206 seconds -fit_ sklearn done in 2.50929 seconds -transform_ sklearn done in 0.10805 seconds -run_tsvd sklearn done in 5.77241 seconds -fit_ cuml done in 1.89565 seconds -transform_ cuml done in 0.16720 seconds -run_tsvd cuml done in 2.06797 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=1048576, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.54274 seconds -fit_ sklearn done in 1.97878 seconds -transform_ sklearn done in 0.09273 seconds -run_tsvd sklearn done in 2.07734 seconds -fit_ cuml done in 1.79011 seconds -transform_ cuml done in 0.17155 seconds -run_tsvd cuml done in 1.96698 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=1048576, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 20.94062 seconds -fit_ sklearn done in 5.86032 seconds -transform_ sklearn done in 0.17393 seconds -run_tsvd sklearn done in 6.03966 seconds -fit_ cuml done in 2.78463 seconds -transform_ cuml done in 0.22410 seconds -run_tsvd cuml done in 3.01367 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=1048576, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.62735 seconds -fit_ sklearn done in 6.65248 seconds -transform_ sklearn done in 0.21042 seconds -run_tsvd sklearn done in 6.86833 seconds -fit_ cuml done in 2.63035 seconds -transform_ cuml done in 0.20632 seconds -run_tsvd cuml done in 2.84204 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=1048576, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.40457 seconds -fit_ sklearn done in 7.37002 seconds -transform_ sklearn done in 0.19350 seconds -run_tsvd sklearn done in 7.56861 seconds -fit_ cuml done in 2.76066 seconds -transform_ cuml done in 0.27002 seconds -run_tsvd cuml done in 3.03490 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=1048576, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.71616 seconds -fit_ sklearn done in 10.85280 seconds -transform_ sklearn done in 0.39829 seconds -run_tsvd sklearn done in 11.25706 seconds -fit_ cuml done in 4.44440 seconds -transform_ cuml done in 0.34380 seconds -run_tsvd cuml done in 4.79378 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=1048576, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.36835 seconds -fit_ sklearn done in 11.04954 seconds -transform_ sklearn done in 0.67364 seconds -run_tsvd sklearn done in 11.73035 seconds -fit_ cuml done in 4.31492 seconds -transform_ cuml done in 0.34103 seconds -run_tsvd cuml done in 4.66121 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=1048576, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.48033 seconds -fit_ sklearn done in 11.43558 seconds -transform_ sklearn done in 0.33796 seconds -run_tsvd sklearn done in 11.78008 seconds -fit_ cuml done in 4.39691 seconds -transform_ cuml done in 0.33589 seconds -run_tsvd cuml done in 4.73759 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=2097152, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.00728 seconds -fit_ sklearn done in 4.46405 seconds -transform_ sklearn done in 0.33537 seconds -run_tsvd sklearn done in 4.80472 seconds -fit_ cuml done in 1.82028 seconds -transform_ cuml done in 0.18989 seconds -run_tsvd cuml done in 2.01558 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=2097152, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.90095 seconds -fit_ sklearn done in 4.20999 seconds -transform_ sklearn done in 0.18121 seconds -run_tsvd sklearn done in 4.39555 seconds -fit_ cuml done in 1.92924 seconds -transform_ cuml done in 0.17889 seconds -run_tsvd cuml done in 2.11255 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=2097152, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.81565 seconds -fit_ sklearn done in 4.89965 seconds -transform_ sklearn done in 0.26137 seconds -run_tsvd sklearn done in 9.52918 seconds -fit_ cuml done in 1.87530 seconds -transform_ cuml done in 0.20076 seconds -run_tsvd cuml done in 2.08054 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=2097152, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.61469 seconds -fit_ sklearn done in 12.57614 seconds -transform_ sklearn done in 0.52059 seconds -run_tsvd sklearn done in 13.10302 seconds -fit_ cuml done in 2.84733 seconds -transform_ cuml done in 0.22858 seconds -run_tsvd cuml done in 3.08106 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=2097152, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.62886 seconds -fit_ sklearn done in 13.34044 seconds -transform_ sklearn done in 0.49794 seconds -run_tsvd sklearn done in 13.84438 seconds -fit_ cuml done in 2.81230 seconds -transform_ cuml done in 0.23499 seconds -run_tsvd cuml done in 3.05317 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=2097152, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.92906 seconds -fit_ sklearn done in 12.77630 seconds -transform_ sklearn done in 0.47499 seconds -run_tsvd sklearn done in 15.51163 seconds -fit_ cuml done in 2.74032 seconds -transform_ cuml done in 0.23058 seconds -run_tsvd cuml done in 2.97619 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=2097152, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.35436 seconds -fit_ sklearn done in 24.55343 seconds -transform_ sklearn done in 0.87324 seconds -run_tsvd sklearn done in 29.76015 seconds -fit_ cuml done in 4.46640 seconds -transform_ cuml done in 0.36021 seconds -run_tsvd cuml done in 4.83147 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=2097152, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.78876 seconds -fit_ sklearn done in 23.86557 seconds -transform_ sklearn done in 0.83480 seconds -run_tsvd sklearn done in 24.70593 seconds -fit_ cuml done in 4.65521 seconds -transform_ cuml done in 0.37479 seconds -run_tsvd cuml done in 5.03424 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=2097152, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.24347 seconds -fit_ sklearn done in 21.27376 seconds -transform_ sklearn done in 0.84160 seconds -run_tsvd sklearn done in 22.12122 seconds -fit_ cuml done in 4.52885 seconds -transform_ cuml done in 0.33370 seconds -run_tsvd cuml done in 4.86679 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=4194304, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.49000 seconds -fit_ sklearn done in 8.85736 seconds -transform_ sklearn done in 0.52042 seconds -run_tsvd sklearn done in 9.38329 seconds -fit_ cuml done in 1.95332 seconds -transform_ cuml done in 0.22183 seconds -run_tsvd cuml done in 2.17993 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=4194304, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.14831 seconds -fit_ sklearn done in 9.37370 seconds -transform_ sklearn done in 0.66406 seconds -run_tsvd sklearn done in 10.04277 seconds -fit_ cuml done in 2.05283 seconds -transform_ cuml done in 0.19964 seconds -run_tsvd cuml done in 2.25806 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=4194304, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.54803 seconds -fit_ sklearn done in 8.50345 seconds -transform_ sklearn done in 1.01319 seconds -run_tsvd sklearn done in 12.66682 seconds -fit_ cuml done in 2.04666 seconds -transform_ cuml done in 0.23219 seconds -run_tsvd cuml done in 2.28419 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=4194304, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.15002 seconds -fit_ sklearn done in 27.18676 seconds -transform_ sklearn done in 1.05580 seconds -run_tsvd sklearn done in 28.24868 seconds -fit_ cuml done in 3.01594 seconds -transform_ cuml done in 0.28555 seconds -run_tsvd cuml done in 3.30701 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=4194304, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 24.29973 seconds -fit_ sklearn done in 28.66314 seconds -transform_ sklearn done in 0.87433 seconds -run_tsvd sklearn done in 29.54278 seconds -fit_ cuml done in 3.07100 seconds -transform_ cuml done in 0.31273 seconds -run_tsvd cuml done in 3.38830 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=4194304, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.04109 seconds -fit_ sklearn done in 25.78241 seconds -transform_ sklearn done in 0.91856 seconds -run_tsvd sklearn done in 26.70760 seconds -fit_ cuml done in 2.90317 seconds -transform_ cuml done in 0.34085 seconds -run_tsvd cuml done in 3.24929 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=512, nrows=4194304, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.91639 seconds -fit_ sklearn done in 51.46553 seconds -transform_ sklearn done in 4.46101 seconds -run_tsvd sklearn done in 55.93342 seconds -Namespace(data='mortgage', ncols=512, nrows=4194304, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 23.24303 seconds -fit_ sklearn done in 46.62416 seconds -transform_ sklearn done in 1.86961 seconds -run_tsvd sklearn done in 48.49904 seconds -Namespace(data='mortgage', ncols=512, nrows=4194304, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 26.52639 seconds -fit_ sklearn done in 53.82986 seconds -transform_ sklearn done in 4.67902 seconds -run_tsvd sklearn done in 58.51549 seconds -Namespace(data='mortgage', ncols=128, nrows=8388608, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 25.51521 seconds -fit_ sklearn done in 18.38479 seconds -transform_ sklearn done in 1.91283 seconds -run_tsvd sklearn done in 20.30518 seconds -fit_ cuml done in 2.10775 seconds -transform_ cuml done in 0.25134 seconds -run_tsvd cuml done in 2.36421 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=8388608, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 30.01381 seconds -fit_ sklearn done in 19.67601 seconds -transform_ sklearn done in 1.08838 seconds -run_tsvd sklearn done in 20.77110 seconds -fit_ cuml done in 2.19272 seconds -transform_ cuml done in 0.26063 seconds -run_tsvd cuml done in 2.45800 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=128, nrows=8388608, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.44099 seconds -fit_ sklearn done in 22.77061 seconds -transform_ sklearn done in 1.71283 seconds -run_tsvd sklearn done in 24.48877 seconds -fit_ cuml done in 2.14110 seconds -transform_ cuml done in 0.26677 seconds -run_tsvd cuml done in 2.41289 seconds -compare tsvd: cuml vs sklearn singular_values_ NOT equal -compare tsvd: cuml vs sklearn components_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ NOT equal -compare tsvd: cuml vs sklearn explained_variance_ratio_ NOT equal -compare tsvd: cuml vs sklearn transformed_result NOT equal -Namespace(data='mortgage', ncols=256, nrows=8388608, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.69345 seconds -fit_ sklearn done in 56.32050 seconds -transform_ sklearn done in 3.04759 seconds -run_tsvd sklearn done in 59.37479 seconds -Namespace(data='mortgage', ncols=256, nrows=8388608, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.39285 seconds -fit_ sklearn done in 58.20473 seconds -transform_ sklearn done in 1.83322 seconds -run_tsvd sklearn done in 60.04433 seconds -Namespace(data='mortgage', ncols=256, nrows=8388608, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.69369 seconds -fit_ sklearn done in 59.26015 seconds -transform_ sklearn done in 1.81056 seconds -run_tsvd sklearn done in 65.64520 seconds -Namespace(data='mortgage', ncols=512, nrows=8388608, quarters=16, random_state=16, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.96403 seconds -fit_ sklearn done in 104.91677 seconds -transform_ sklearn done in 9.13272 seconds -run_tsvd sklearn done in 114.05595 seconds -Namespace(data='mortgage', ncols=512, nrows=8388608, quarters=16, random_state=42, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 22.12935 seconds -fit_ sklearn done in 109.13730 seconds -transform_ sklearn done in 7.93190 seconds -run_tsvd sklearn done in 117.07665 seconds -Namespace(data='mortgage', ncols=512, nrows=8388608, quarters=16, random_state=100, rows_per_quarter=100000, test_model='cuml', threshold=0.001, use_assert=0) -load_mortgage done in 21.48032 seconds -fit_ sklearn done in 110.32901 seconds -transform_ sklearn done in 9.79031 seconds -run_tsvd sklearn done in 120.12523 seconds diff --git a/python/cuML/test/test_dbscan.py b/python/cuML/test/test_dbscan.py index a8247bed0a..08e8ccee3f 100644 --- a/python/cuML/test/test_dbscan.py +++ b/python/cuML/test/test_dbscan.py @@ -16,19 +16,41 @@ import pytest from cuml import DBSCAN as cuDBSCAN from sklearn.cluster import DBSCAN as skDBSCAN -from test_utils import array_equal import cudf import numpy as np @pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_dbscan_predict(datatype, input_type): -def test_dbscan_predict(datatype): + X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]], + dtype=datatype) + skdbscan = skDBSCAN(eps=3, min_samples=2) + sk_labels = skdbscan.fit_predict(X) + + cudbscan = cuDBSCAN(eps=3, min_samples=2) + + if input_type == 'dataframe': + gdf = cudf.DataFrame() + gdf['0'] = np.asarray([1, 2, 2, 8, 8, 25], dtype=datatype) + gdf['1'] = np.asarray([2, 2, 3, 7, 8, 80], dtype=datatype) + cu_labels = cudbscan.fit_predict(gdf) + else: + cu_labels = cudbscan.fit_predict(X) + + for i in range(X.shape[0]): + assert cu_labels[i] == sk_labels[i] + + +@pytest.mark.parametrize('datatype', [np.float32, np.float64]) +def test_dbscan_predict_numpy(datatype): gdf = cudf.DataFrame() - gdf['0']=np.asarray([1,2,2,8,8,25],dtype=datatype) - gdf['1']=np.asarray([2,2,3,7,8,80],dtype=datatype) + gdf['0'] = np.asarray([1, 2, 2, 8, 8, 25], dtype=datatype) + gdf['1'] = np.asarray([2, 2, 3, 7, 8, 80], dtype=datatype) - X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]], dtype = datatype) + X = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]], + dtype=datatype) print("Calling fit_predict") cudbscan = cuDBSCAN(eps = 3, min_samples = 2) @@ -38,5 +60,3 @@ def test_dbscan_predict(datatype): print(X.shape[0]) for i in range(X.shape[0]): assert cu_labels[i] == sk_labels[i] - - diff --git a/python/cuML/test/test_dbscan_random.py b/python/cuML/test/test_dbscan_random.py deleted file mode 100644 index 0c45b43a41..0000000000 --- a/python/cuML/test/test_dbscan_random.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pandas as pd -from sklearn.cluster import DBSCAN as skDBSCAN -from test_utils import timer,load_mortgage,pd2cudf,array_equal,parse_args,write_log -import pytest - -def test_dbscan(nrows=1000,ncols=100, eps = 3, min_samples = 2, - threshold=1e-3,data_source = 'random',use_assert=True, - quarters=8,rows_per_quarter=100000,test_model='cuml'): - print() - #X = np.random.rand(nrows,ncols) - #X = np.array([[1, 2], [2, 2], [2, 3],[8, 7], [8, 8], [25, 80]],dtype='float64') - if data_source=='random': - X = np.random.rand(nrows,ncols) - elif data_source=='mortgage': - X = load_mortgage(quarters=quarters,rows_per_quarter=rows_per_quarter) - X = X[np.random.randint(0,X.shape[0]-1,nrows),:ncols] - else: - raise NotImplementedError - X = pd.DataFrame({'fea%d'%i:X[:,i] for i in range(X.shape[1])}) - print('%s data'%data_source,X.shape) - test_dbscan_helper(X,eps,min_samples,threshold,use_assert,test_model) - -@pytest.mark.skip(reason="helper function, don't test") -def test_dbscan_helper(X, eps, min_samples, threshold, use_assert, test_model): - dbscan_imp1 = run_dbscan(X, - eps, min_samples, model='sklearn') - print() - if test_model == 'cuml': - X = pd2cudf(X) - - dbscan_imp2 = run_dbscan(X, - eps, min_samples, model=test_model) - print() - for attr in ['labels_']: - passed = array_equal(getattr(dbscan_imp1,attr),getattr(dbscan_imp2,attr), - threshold,with_sign = True) - message = 'compare pca: %s vs sklearn %s %s'%(test_model,attr,'equal' if passed else 'NOT equal') - print(message) - write_log(message) - if use_assert: - assert passed,message - print() - del dbscan_imp1,dbscan_imp2,X - -@timer -def run_dbscan(X,eps,min_samples,model): - if model == 'sklearn': - clustering = skDBSCAN(eps = eps, min_samples = min_samples) - elif model == 'cuml': - from cuML import DBSCAN as cumlDBSCAN - clustering = cumlDBSCAN(eps = eps, min_samples = min_samples) - else: - raise NotImplementedError - - @timer - def fit_(clustering,X,model): - clustering.fit(X) - return clustering - - #@timer - #def transform_(pca,X,model): - #return pca.transform(X) - - clustering = fit_(clustering,X,model=model) - print(clustering.labels_) - #Xpca = transform_(pca,X,model=model) - #pca.transformed_result = lambda: None - #setattr(pca,'transformed_result',Xpca) - return clustering - - -if __name__ == '__main__': - args = parse_args() - write_log(args) - test_dbscan(data_source=args.data,use_assert=False,nrows=args.nrows, - ncols=args.ncols,quarters=args.quarters, - test_model=args.test_model,threshold=args.threshold - ) diff --git a/python/cuML/test/test_pca.py b/python/cuML/test/test_pca.py index b19eaf0a8a..bec0048b4d 100644 --- a/python/cuML/test/test_pca.py +++ b/python/cuML/test/test_pca.py @@ -20,56 +20,81 @@ import cudf import numpy as np + @pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_pca_fit(datatype, input_type): -def test_pca_fit(datatype): - gdf = cudf.DataFrame() - gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype) - gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype) + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], + dtype=datatype) + skpca = skPCA(n_components=2) + skpca.fit(X) - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype = datatype) + cupca = cuPCA(n_components=2) - print("Calling fit") - cupca = cuPCA(n_components = 2) - cupca.fit(gdf) - skpca = skPCA(n_components = 2) - skpca.fit(X) + if input_type == 'dataframe': + gdf = cudf.DataFrame() + gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) + gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) + cupca.fit(gdf) + + else: + cupca.fit(X) - for attr in ['singular_values_','components_','explained_variance_','explained_variance_ratio_','noise_variance_']: + for attr in ['singular_values_', 'components_', 'explained_variance_', + 'explained_variance_ratio_', 'noise_variance_']: with_sign = False if attr in ['components_'] else True - assert array_equal(getattr(cupca,attr),getattr(skpca,attr), - 1e-3,with_sign=with_sign) + print(attr) + print(getattr(cupca, attr)) + print(getattr(skpca, attr)) + cuml_res = (getattr(cupca, attr)) + if isinstance(cuml_res, cudf.Series): + cuml_res = cuml_res.to_array() + else: + cuml_res = cuml_res.as_matrix() + skl_res = getattr(skpca, attr) + assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign) + @pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_pca_fit_transform(datatype, input_type): + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], + dtype=datatype) + skpca = skPCA(n_components=2) + Xskpca = skpca.fit_transform(X) -def test_pca_fit_transform(datatype): - gdf = cudf.DataFrame() - gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype) - gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype) + cupca = cuPCA(n_components=2) - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype = datatype) + if input_type == 'dataframe': + gdf = cudf.DataFrame() + gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) + gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) + Xcupca = cupca.fit_transform(gdf) - print("Calling fit_transform") - cupca = cuPCA(n_components = 2) - Xcupca = cupca.fit_transform(gdf) - skpca = skPCA(n_components = 2) - Xskpca = skpca.fit_transform(X) + else: + Xcupca = cupca.fit_transform(X) - assert array_equal(Xcupca, Xskpca, - 1e-3,with_sign=False) + assert array_equal(Xcupca, Xskpca, 1e-3, with_sign=True) -@pytest.mark.parametrize('datatype', [np.float32, np.float64]) -def test_pca_inverse_transform(datatype): +@pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_pca_inverse_transform(datatype, input_type): gdf = cudf.DataFrame() - gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype) - gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype) + gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) + gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) + cupca = cuPCA(n_components=2) + + if input_type == 'dataframe': + Xcupca = cupca.fit_transform(gdf) - cupca = cuPCA(n_components = 2) - Xcupca = cupca.fit_transform(gdf) + else: + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], + dtype=datatype) + Xcupca = cupca.fit_transform(X) - print("Calling inverse_transform") input_gdf = cupca.inverse_transform(Xcupca) assert array_equal(input_gdf, gdf, - 1e-3,with_sign=True) + 1e-3, with_sign=True) diff --git a/python/cuML/test/test_pca_random.py b/python/cuML/test/test_pca_random.py deleted file mode 100644 index 8ed0eaab9b..0000000000 --- a/python/cuML/test/test_pca_random.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pandas as pd -from sklearn.decomposition import PCA as skPCA -from test_utils import timer,load_mortgage,pd2cudf,array_equal,parse_args,write_log -import pytest - - -def test_pca_mortgage(nrows=1000,ncols=100,n_components=10, - svd_solver='full',whiten=False,random_state=42, - threshold=1e-3,data_source = 'random',use_assert=True, - quarters=8,rows_per_quarter=100000,test_model='cuml'): - print() - if data_source=='random': - X = np.random.rand(nrows,ncols) - elif data_source=='mortgage': - X = load_mortgage(quarters=quarters,rows_per_quarter=rows_per_quarter) - X = X[np.random.randint(0,X.shape[0]-1,nrows),:ncols] - else: - raise NotImplementedError - X = pd.DataFrame({'fea%d'%i:X[:,i] for i in range(X.shape[1])}) - print('%s data'%data_source,X.shape) - test_pca_helper(X,n_components,svd_solver,whiten,random_state,threshold,use_assert,test_model) - -@pytest.mark.skip(reason="helper function, don't test") -def test_pca_helper(X,n_components,svd_solver,whiten,random_state,threshold,use_assert,test_model): - pca_imp1 = run_pca(X, - n_components,svd_solver,whiten,random_state,model='sklearn') - print() - if test_model == 'cuml': - X = pd2cudf(X) - elif test_model == 'h2o4gpu': - X = np.array(X).astype(np.float32) - - pca_imp2 = run_pca(X, - n_components,svd_solver,whiten,random_state,model=test_model) - print() - for attr in ['singular_values_','components_','explained_variance_','explained_variance_ratio_','transformed_result']: - with_sign = False if attr in ['components_','transformed_result'] else True - passed = array_equal(getattr(pca_imp1,attr),getattr(pca_imp2,attr), - threshold,with_sign=with_sign) - message = 'compare pca: %s vs sklearn %s %s'%(test_model,attr,'equal' if passed else 'NOT equal') - print(message) - write_log(message) - if use_assert: - assert passed,message - print() - del pca_imp1,pca_imp2,X - -@timer -def run_pca(X,n_components,svd_solver,whiten,random_state,model): - if model == 'sklearn': - pca = skPCA(n_components=n_components, - svd_solver=svd_solver, whiten=whiten, random_state=random_state) - elif model == 'h2o4gpu': - from h2o4gpu.solvers.pca import PCAH2O as h2oPCA - pca = h2oPCA(n_components=n_components, - whiten=whiten)#, random_state=random_state) - elif model == 'cuml': - from cuML import PCA as cumlPCA - pca = cumlPCA(n_components=n_components, - svd_solver=svd_solver, whiten=whiten, random_state=random_state) - else: - raise NotImplementedError - - @timer - def fit_(pca,X,model): - pca.fit(X) - return pca - @timer - def transform_(pca,X,model): - return pca.transform(X) - - pca = fit_(pca,X,model=model) - Xpca = transform_(pca,X,model=model) - pca.transformed_result = lambda: None - setattr(pca,'transformed_result',Xpca) - return pca - - -if __name__ == '__main__': - args = parse_args() - write_log(args) - test_pca_mortgage(data_source=args.data,use_assert=args.use_assert,nrows=args.nrows, - ncols=args.ncols,quarters=args.quarters,random_state=args.random_state, - test_model=args.test_model,threshold=args.threshold - ) diff --git a/python/cuML/test/test_tsvd.py b/python/cuML/test/test_tsvd.py index 193eb12005..d5806164ea 100644 --- a/python/cuML/test/test_tsvd.py +++ b/python/cuML/test/test_tsvd.py @@ -19,56 +19,70 @@ import cudf import numpy as np + @pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_tsvd_fit(datatype, input_type): + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], + dtype=datatype) + sktsvd = skTSVD(n_components=1) + sktsvd.fit(X) -def test_tsvd_fit(datatype): - gdf = cudf.DataFrame() - gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype) - gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype) + cutsvd = cuTSVD(n_components=1) - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype = datatype) + if input_type == 'dataframe': + gdf = cudf.DataFrame() + gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) + gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) + cutsvd.fit(gdf) - print("Calling fit") - cutsvd = cuTSVD(n_components = 1) - cutsvd.fit(gdf) - sktsvd = skTSVD(n_components = 1) - sktsvd.fit(X) + else: + cutsvd.fit(X) - for attr in ['singular_values_','components_','explained_variance_ratio_']: + for attr in ['singular_values_', 'components_', + 'explained_variance_ratio_']: with_sign = False if attr in ['components_'] else True - assert array_equal(getattr(cutsvd,attr),getattr(sktsvd,attr), - 0.4,with_sign=with_sign) + assert array_equal(getattr(cutsvd, attr), getattr(sktsvd, attr), + 0.4, with_sign=with_sign) + @pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_tsvd_fit_transform(datatype, input_type): + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], + dtype=datatype) + skpca = skTSVD(n_components=1) + Xsktsvd = skpca.fit_transform(X) -def test_pca_fit_transform(datatype): - gdf = cudf.DataFrame() - gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype) - gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype) + cutsvd = cuTSVD(n_components=1) - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype = datatype) + if input_type == 'dataframe': + gdf = cudf.DataFrame() + gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) + gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) + Xcutsvd = cutsvd.fit_transform(gdf) - print("Calling fit_transform") - cutsvd = cuTSVD(n_components = 1) - Xcutsvd = cutsvd.fit_transform(gdf) - sktsvd = skTSVD(n_components = 1) - Xsktsvd = sktsvd.fit_transform(X) + else: + Xcutsvd = cutsvd.fit_transform(X) - assert array_equal(Xcutsvd, Xsktsvd, - 1e-3,with_sign=False) + assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True) -@pytest.mark.parametrize('datatype', [np.float32, np.float64]) -def test_pca_inverse_transform(datatype): +@pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) +def test_tsvd_inverse_transform(datatype, input_type): gdf = cudf.DataFrame() - gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype) - gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype) + gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) + gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) + cutsvd = cuTSVD(n_components=1) + + if input_type == 'dataframe': + Xcutsvd = cutsvd.fit_transform(gdf) - cutsvd = cuTSVD(n_components = 1) - Xcutsvd = cutsvd.fit_transform(gdf) + else: + X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], + dtype=datatype) + Xcutsvd = cutsvd.fit_transform(X) - print("Calling inverse_transform") input_gdf = cutsvd.inverse_transform(Xcutsvd) - print(input_gdf) - assert array_equal(input_gdf, gdf, - 0.4,with_sign=True) + assert array_equal(input_gdf, gdf, 0.4, with_sign=True) diff --git a/python/cuML/test/test_tsvd_random.py b/python/cuML/test/test_tsvd_random.py deleted file mode 100644 index c8ea5b912e..0000000000 --- a/python/cuML/test/test_tsvd_random.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pandas as pd -from sklearn.decomposition import TruncatedSVD as skTSVD -from test_utils import timer,load_mortgage,pd2cudf,array_equal,parse_args,write_log -import pytest - -@pytest.mark.xfail -def test_tsvd_mortgage(nrows=1000,ncols=100,n_components=10, - algorithm='randomized',random_state=42, - threshold=1e-3,data_source = 'random',use_assert=True, - quarters=8,rows_per_quarter=100000,test_model='cuml'): - print() - if data_source=='random': - X = np.random.rand(nrows,ncols) - elif data_source=='mortgage': - X = load_mortgage(quarters=quarters,rows_per_quarter=rows_per_quarter) - X = X[np.random.randint(0,X.shape[0]-1,nrows),:ncols] - else: - raise NotImplementedError - X = pd.DataFrame({'fea%d'%i:X[:,i] for i in range(X.shape[1])}) - print('%s data'%data_source,X.shape) - test_tsvd_helper(X,n_components,algorithm,random_state,threshold,use_assert,test_model) - -@pytest.mark.skip(reason="helper function, don't test") -def test_tsvd_helper(X,n_components,algorithm,random_state,threshold,use_assert,test_model): - tsvd_imp1 = run_tsvd(X, - n_components,algorithm,random_state,model='sklearn') - print() - if test_model == 'cuml': - X = pd2cudf(X) - elif test_model == 'h2o4gpu': - X = np.array(X).astype(np.float32) - tsvd_imp2 = run_tsvd(X, - n_components,algorithm,random_state,model=test_model) - print() - for attr in ['singular_values_','components_','explained_variance_','explained_variance_ratio_','transformed_result']: - with_sign = False if attr in ['components_','transformed_result'] else True - passed = array_equal(getattr(tsvd_imp1,attr),getattr(tsvd_imp2,attr), - threshold,with_sign=with_sign) - message = 'compare tsvd: %s vs sklearn %s %s'%(test_model,attr,'equal' if passed else 'NOT equal') - print(message) - write_log(message) - if use_assert: - assert passed,message - print() - del tsvd_imp1,tsvd_imp2,X - -@timer -def run_tsvd(X,n_components,algorithm,random_state,model): - if model == 'sklearn': - tsvd = skTSVD(n_components=n_components, - algorithm=algorithm, random_state=random_state) - elif model == 'h2o4gpu': - from h2o4gpu.solvers import TruncatedSVDH2O as h2oTSVD - if algorithm == 'arpack': - algorithm = 'cusolver' - tsvd = h2oTSVD(n_components=n_components, - algorithm=algorithm, random_state=random_state) - elif model == 'cuml': - from cuML import TruncatedSVD as cumlTSVD - tsvd = cumlTSVD(n_components=n_components, - random_state=random_state) - else: - raise NotImplementedError - - @timer - def fit_(tsvd,X,model): - tsvd.fit(X) - return tsvd - @timer - def transform_(tsvd,X,model): - return tsvd.transform(X) - - tsvd = fit_(tsvd,X,model=model) - Xtsvd = transform_(tsvd,X,model=model) - tsvd.transformed_result = lambda: None - setattr(tsvd,'transformed_result',Xtsvd) - return tsvd - - -if __name__ == '__main__': - args = parse_args() - write_log(args) - test_tsvd_mortgage(data_source=args.data,use_assert=args.use_assert,nrows=args.nrows, - ncols=args.ncols,quarters=args.quarters,random_state=args.random_state, - test_model=args.test_model,threshold=args.threshold - ) diff --git a/python/cuML/test/test_utils.py b/python/cuML/test/test_utils.py index 386ffb3c72..a4037b5e8e 100644 --- a/python/cuML/test/test_utils.py +++ b/python/cuML/test/test_utils.py @@ -20,6 +20,7 @@ import multiprocessing import pandas as pd import numpy as np +import pytest try: import cudf from cuml import PCA as cumlPCA @@ -159,6 +160,8 @@ def np2cudf(df): pdf[c] = df[:,c] return pdf +@pytest.mark.skip(reason="helper function, don't test") + def test_cudf(nrows=1000,ncols=1000): x = np.random.rand(nrows,ncols).astype(np.float32) df = pd.DataFrame({'fea%d'%i:x[:,i] for i in range(x.shape[1])}) diff --git a/python/cuML/test/write_script.py b/python/cuML/test/write_script.py deleted file mode 100644 index 06b4b73ad1..0000000000 --- a/python/cuML/test/write_script.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np - -def write_mortgage_script(output='test_mortgage.sh'): - params = { - 'python':['test_pca_random.py','test_tsvd_random.py'], - 'data':['mortgage'], - 'nrows':np.power(2,np.arange(18,24)), - 'ncols':np.power(2,np.arange(7,10)), - 'random_state':[16,42,100], - 'quarters':[16], - 'use_assert':[0], - 'threshold':[1e-3], - 'test_model':['cuml'] - #'test_model':['h2o4gpu'] - } - cols = ['python','data','nrows','ncols', - 'random_state','quarters','use_assert','threshold', - 'test_model', - ] - assert len(cols)==len(params) - write_script(output,params,cols) - -def write_script(output,params,cols): - with open(output,'w') as f: - pass - permutation([],0,cols,params,output) - -def permutation(pre,step,cols,params,output): - if step == len(cols): - print(pre) - line = ' '.join(pre) - with open(output,'a') as f: - f.write('%s\n'%line) - return - if len(pre)==step: - pre.append('') - for v in params[cols[step]]: - if cols[step] == 'python': - pre[step] = '%s %s'%(cols[step],v) - else: - pre[step] = '--%s %s'%(cols[step],v) - permutation(pre,step+1,cols,params,output) - -if __name__ == '__main__': - write_mortgage_script(output='test_mortgage.sh') diff --git a/python/cuML/tsvd/tsvd_wrapper.pyx b/python/cuML/tsvd/tsvd_wrapper.pyx index 10aaf15119..251e6fcc47 100644 --- a/python/cuML/tsvd/tsvd_wrapper.pyx +++ b/python/cuML/tsvd/tsvd_wrapper.pyx @@ -119,13 +119,7 @@ class TruncatedSVD: 'jacobi': COV_EIG_JACOBI }[algorithm] - def _initialize_arrays(self, input_gdf, n_components, n_rows, n_cols): - - x = [] - for col in input_gdf.columns: - x.append(input_gdf[col]._column.dtype) - break - self.gdf_datatype = np.dtype(x[0]) + def _initialize_arrays(self, n_components, n_rows, n_cols): self.trans_input_ = cuda.to_device(np.zeros(n_rows*n_components, dtype=self.gdf_datatype)) @@ -137,8 +131,11 @@ class TruncatedSVD: self.explained_variance_ratio_ = cudf.Series( np.zeros(n_components, dtype=self.gdf_datatype)) + self.mean_ = cudf.Series(np.zeros(n_cols, dtype=self.gdf_datatype)) self.singular_values_ = cudf.Series(np.zeros(n_components, - dtype=self.gdf_datatype)) + dtype=self.gdf_datatype)) + self.noise_variance_ = cudf.Series(np.zeros(1, + dtype=self.gdf_datatype)) def _get_ctype_ptr(self, obj): # The manner to access the pointers in the gdf's might change, so @@ -165,24 +162,38 @@ class TruncatedSVD: """ # c params + + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + self.gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + # PCA expects transpose of the input + X_m = X.as_gpu_matrix() + self.params.n_rows = len(X) + self.params.n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(np.array(X, order='F')) + self.params.n_rows = X.shape[0] + self.params.n_cols = X.shape[1] + + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + input_ptr = self._get_ctype_ptr(X_m) + cpdef c_tsvd.paramsTSVD params params.n_components = self.params.n_components - params.n_rows = len(X) - params.n_cols = len(X._cols) + params.n_rows = self.params.n_rows + params.n_cols = self.params.n_cols params.n_iterations = self.params.iterated_power params.tol = self.params.tol params.algorithm = self.params.svd_solver - # python params - self.params.n_rows = len(X) - self.params.n_cols = len(X._cols) - - self._initialize_arrays(X, self.params.n_components, + self._initialize_arrays(self.params.n_components, self.params.n_rows, self.params.n_cols) - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) - cdef uintptr_t components_ptr = self._get_ctype_ptr(self.components_) cdef uintptr_t explained_var_ptr = self._get_column_ptr( @@ -222,7 +233,6 @@ class TruncatedSVD: singular_vals_ptr, params) - components_gdf = cudf.DataFrame() for i in range(0, params.n_cols): components_gdf[str(i)] = self.components_[i*params.n_components:(i+1)*params.n_components] @@ -234,7 +244,7 @@ class TruncatedSVD: self.singular_values_ptr = singular_vals_ptr del(X_m) - + return self def fit_transform(self, X): """ @@ -279,6 +289,18 @@ class TruncatedSVD: """ + cdef uintptr_t trans_input_ptr + if (isinstance(X, cudf.DataFrame)): + X_m = X.as_gpu_matrix() + elif (isinstance(X, np.ndarray)): + self.gdf_datatype = X.dtype + X_m = cuda.to_device(np.array(X, order='F')) + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + trans_input_ptr = self._get_ctype_ptr(X_m) + cpdef c_tsvd.paramsTSVD params params.n_components = self.params.n_components params.n_rows = len(X) @@ -290,10 +312,10 @@ class TruncatedSVD: break gdf_datatype = np.dtype(x[0]) - input_data = cuda.to_device(np.zeros(params.n_rows*params.n_cols,dtype=gdf_datatype.type)) + input_data = cuda.to_device(np.zeros(params.n_rows*params.n_cols, + dtype=gdf_datatype.type)) cdef uintptr_t input_ptr = input_data.device_ctypes_pointer.value - cdef uintptr_t trans_input_ptr = X.as_gpu_matrix().device_ctypes_pointer.value cdef uintptr_t components_ptr = self.components_ptr if gdf_datatype.type == np.float32: @@ -331,10 +353,29 @@ class TruncatedSVD: """ + cdef uintptr_t input_ptr + if (isinstance(X, cudf.DataFrame)): + gdf_datatype = np.dtype(X[X.columns[0]]._column.dtype) + X_m = X.as_gpu_matrix() + n_rows = len(X) + n_cols = len(X._cols) + + elif (isinstance(X, np.ndarray)): + gdf_datatype = X.dtype + X_m = cuda.to_device(np.array(X, order='F')) + n_rows = X.shape[0] + n_cols = X.shape[1] + + else: + msg = "X matrix format not supported" + raise TypeError(msg) + + input_ptr = self._get_ctype_ptr(X_m) + cpdef c_tsvd.paramsTSVD params params.n_components = self.params.n_components params.n_rows = len(X) - params.n_cols = len(X._cols) + params.n_cols = self.params.n_cols x = [] for col in X.columns: @@ -346,9 +387,6 @@ class TruncatedSVD: np.zeros(params.n_rows*params.n_components, dtype=gdf_datatype.type)) - X_m = X.as_gpu_matrix() - cdef uintptr_t input_ptr = self._get_ctype_ptr(X_m) - cdef uintptr_t trans_input_ptr = self._get_ctype_ptr(trans_input_data) cdef uintptr_t components_ptr = self.components_ptr