Skip to content

Commit

Permalink
Merge pull request #7 from Salonijain27/all_tests
Browse files Browse the repository at this point in the history
All tests
  • Loading branch information
Salonijain27 authored May 3, 2019
2 parents 18c9987 + 20d27ae commit b6b425a
Show file tree
Hide file tree
Showing 12 changed files with 130 additions and 57 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- PR #500: Added CI check for black listed CUDA Runtime API calls
- PR #475: exposing cumlHandle for dbscan from python-side
- PR #395: Edited the CONTRIBUTING.md file
- PR #407: Test files to run stress, correctness and unit tests for cuml algos
- PR #512: generic copy method for copying buffers between device/host
- PR #533: Add cudatoolkit conda dependency
- PR #524: Use cmake find blas and find lapack to pass configure options to faiss
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ nvidia-smi
logger "Activate conda env..."
source activate gdf
conda install -c rapidsai/label/cuda${CUDA_REL} -c rapidsai-nightly/label/cuda${CUDA_REL} cudf=${CUDF_VERSION} rmm=${RMM_VERSION} nvstrings=${NVSTRINGS_VERSION}
conda install -c conda-forge lapack cmake==3.14.3
conda install -c conda-forge lapack cmake==3.14.3 umap-learn

logger "Check versions..."
python --version
Expand Down
18 changes: 18 additions & 0 deletions python/cuml/test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest


def pytest_addoption(parser):
parser.addoption("--run_quality", action="store_true",
default=False, help="run correctness tests")
parser.addoption("--run_stress", action="store_true",
default=False, help="run stress tests")


@pytest.fixture
def run_stress(request):
return request.config.getoption("--run_stress")


@pytest.fixture
def run_quality(request):
return request.config.getoption("--run_quality")
2 changes: 1 addition & 1 deletion python/cuml/test/test_coordinate_descent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_dbscan.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_kalman_filter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_kmeans.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_linear_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_nearest_neighbors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_pca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/test_tsvd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
150 changes: 102 additions & 48 deletions python/cuml/test/test_umap.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018, NVIDIA CORPORATION.
# Copyright (c) 2019, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,44 +12,58 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest

from cuml.manifold.umap import UMAP

# Please install UMAP before running the code
# use 'conda install -c conda-forge umap-learn' command to install it

import pytest
from cuml.test.utils import array_equal

from cuml.manifold.umap import UMAP as UMAP_cuml
import umap
import cudf
import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.manifold.t_sne import trustworthiness
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.datasets.samples_generator import make_blobs

dataset_names = ['iris', 'digits', 'wine', 'blobs']

def test_blobs_cluster():
data, labels = datasets.make_blobs(
n_samples=500, n_features=10, centers=5)
embedding = UMAP().fit_transform(data)
score = adjusted_rand_score(labels,
KMeans(5).fit_predict(embedding))
assert score == 1.0

def test_umap_fit_transform_score(run_stress, run_quality):

def test_umap_transform_on_iris():
iris = datasets.load_iris()
iris_selection = np.random.choice(
[True, False], 150, replace=True, p=[0.75, 0.25])
data = iris.data[iris_selection]
if run_stress:
n_samples = 500000
n_features = 1000

elif run_quality:
n_samples = 5000
n_features = 100

else:
n_samples = 500
n_features = 10

data, labels = make_blobs(n_samples=n_samples, n_features=n_features,
centers=10, random_state=42)

model = umap.UMAP(n_neighbors=10, min_dist=0.1)
cuml_model = UMAP_cuml(n_neighbors=10, min_dist=0.01, verbose=True)

fitter = UMAP(n_neighbors=10, min_dist=0.01, verbose=True)
fitter.fit(data)

new_data = iris.data[~iris_selection]
embedding = fitter.transform(new_data)
embedding = model.fit_transform(data)
cuml_embedding = cuml_model.fit_transform(data)

trust = trustworthiness(new_data, embedding, 10)
assert trust >= 0.90
cuml_score = adjusted_rand_score(labels,
KMeans(10).fit_predict(cuml_embedding))
score = adjusted_rand_score(labels,
KMeans(10).fit_predict(embedding))

assert array_equal(score, cuml_score, 1e-2, with_sign=True)

def test_supervised_umap_trustworthiness_on_iris():
iris = datasets.load_iris()
Expand Down Expand Up @@ -79,53 +93,93 @@ def test_umap_trustworthiness_on_iris():
embedding = UMAP(n_neighbors=10, min_dist=0.01).fit_transform(data)
trust = trustworthiness(iris.data, embedding, 10)

# We are doing a spectral embedding but not a
# multi-component layout (which is marked experimental).
# As a result, our score drops by 0.006.
assert trust >= 0.964

@pytest.mark.parametrize('name', dataset_names)
def test_umap_fit_transform_trust(name, run_stress, run_quality):

def test_umap_trustworthiness_on_iris_random_init():
iris = datasets.load_iris()
data = iris.data
embedding = UMAP(
n_neighbors=10, min_dist=0.01, init="random"
).fit_transform(data)
trust = trustworthiness(iris.data, embedding, 10)
assert trust >= 0.95
if name == 'iris':
iris = datasets.load_iris()
data = iris.data
labels = iris.target

elif name == 'digits':
digits = datasets.load_digits(n_class=5)
data = digits.data
labels = digits.target

elif name == 'wine':
wine = datasets.load_wine()
data = wine.data
labels = wine.target
else:
data, labels = make_blobs(n_samples=5000, n_features=10,
centers=10, random_state=42)

model = umap.UMAP(n_neighbors=10, min_dist=0.01)
cuml_model = UMAP_cuml(n_neighbors=10, min_dist=0.01, verbose=True)
embedding = model.fit_transform(data)
cuml_embedding = cuml_model.fit_transform(data)

trust = trustworthiness(data, embedding, 10)
cuml_trust = trustworthiness(data, cuml_embedding, 10)

assert array_equal(trust, cuml_trust, 1e-2, with_sign=True)


@pytest.mark.parametrize('should_downcast', [True, False])
@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray'])
def test_umap_data_formats(input_type, should_downcast):
def test_umap_data_formats(input_type, should_downcast,
run_stress, run_quality):

dtype = np.float32 if not should_downcast else np.float64
n_samples = 50000
n_feats = 50
if run_stress:
X, y = datasets.make_blobs(n_samples=n_samples*10,
n_features=n_feats, random_state=0)

# For now, FAISS based nearest_neighbors only supports single precision
digits = datasets.load_digits(n_class=9)
X = digits["data"].astype(dtype)
elif run_quality:
X, y = datasets.make_blobs(n_samples=int(n_samples/10),
n_features=n_feats, random_state=0)

umap = UMAP(n_neighbors=3, n_components=2,
should_downcast=should_downcast)
else:
# For now, FAISS based nearest_neighbors only supports single precision
digits = datasets.load_digits(n_class=9)
X = digits["data"].astype(dtype)

if input_type == 'dataframe':
X = cudf.DataFrame.from_pandas(pd.DataFrame(X))
embeds = umap.fit_transform(X)
umap = UMAP_cuml(n_neighbors=3, n_components=2,
should_downcast=should_downcast)

if input_type == 'dataframe':
X_pd = pd.DataFrame(
{'fea%d' % i: X[0:, i] for i in range(X.shape[1])})
X_cudf = cudf.DataFrame.from_pandas(X_pd)
embeds = umap.fit_transform(X_cudf)
assert type(embeds) == cudf.DataFrame

else:
embeds = umap.fit_transform(X)

assert type(embeds) == np.ndarray


@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray'])
def test_umap_downcast_fails(input_type):
def test_umap_downcast_fails(input_type, run_stress, run_quality):
n_samples = 50000
n_feats = 50
if run_stress:
X, y = datasets.make_blobs(n_samples=n_samples*10,
n_features=n_feats, random_state=0)

elif run_quality:
X, y = datasets.make_blobs(n_samples=int(n_samples/10),
n_features=n_feats, random_state=0)

X = np.array([[1.0, 1.0], [50.0, 1.0], [51.0, 1.0]], dtype=np.float64)
else:
X = np.array([[1.0, 1.0], [50.0, 1.0], [51.0, 1.0]],
dtype=np.float64)

# Test fit() fails with double precision when should_downcast set to False
umap = UMAP(should_downcast=False)
umap = UMAP_cuml(should_downcast=False)
if input_type == 'dataframe':
X = cudf.DataFrame.from_pandas(pd.DataFrame(X))

Expand All @@ -135,7 +189,7 @@ def test_umap_downcast_fails(input_type):
# Test fit() fails when downcast corrupted data
X = np.array([[np.finfo(np.float32).max]], dtype=np.float64)

umap = UMAP(should_downcast=True)
umap = UMAP_cuml(should_downcast=True)
if input_type == 'dataframe':
X = cudf.DataFrame.from_pandas(pd.DataFrame(X))

Expand Down

0 comments on commit b6b425a

Please sign in to comment.