Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use hypothesis #5759

Merged
merged 12 commits into from
Jun 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Jenkinsfile-win64
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def TestWin64CPU() {
"""
echo "Installing Python dependencies..."
bat """
conda activate && conda upgrade scikit-learn pandas numpy
conda activate && conda install -y hypothesis && conda upgrade scikit-learn pandas numpy hypothesis
"""
echo "Running Python tests..."
bat "conda activate && python -m pytest -v -s --fulltrace tests\\python"
Expand All @@ -138,7 +138,7 @@ def TestWin64GPU(args) {
"""
echo "Installing Python dependencies..."
bat """
conda activate && conda upgrade scikit-learn pandas numpy
conda activate && conda install -y hypothesis && conda upgrade scikit-learn pandas numpy hypothesis
"""
echo "Running Python tests..."
bat """
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ install:
- if /i "%DO_PYTHON%" == "on" (
conda config --set always_yes true &&
conda update -q conda &&
conda install -y numpy scipy pandas matplotlib pytest scikit-learn graphviz python-graphviz
conda install -y numpy scipy pandas matplotlib pytest scikit-learn graphviz python-graphviz hypothesis
)
- set PATH=C:\Miniconda3-x64\Library\bin\graphviz;%PATH%
# R: based on https://github.com/krlmlr/r-appveyor
Expand Down
35 changes: 25 additions & 10 deletions src/data/data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,30 @@ void CopyInfoImpl(ArrayInterface column, HostDeviceVector<float>* out) {
});
}

void CopyGroupInfoImpl(ArrayInterface column, std::vector<bst_group_t>* out) {
RAMitchell marked this conversation as resolved.
Show resolved Hide resolved
CHECK(column.type[1] == 'i' || column.type[1] == 'u')
<< "Expected integer metainfo";
auto SetDeviceToPtr = [](void* ptr) {
cudaPointerAttributes attr;
dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));
int32_t ptr_device = attr.device;
dh::safe_cuda(cudaSetDevice(ptr_device));
return ptr_device;
};
auto ptr_device = SetDeviceToPtr(column.data);
dh::TemporaryArray<bst_group_t> temp(column.num_rows);
auto d_tmp = temp.data();

dh::LaunchN(ptr_device, column.num_rows, [=] __device__(size_t idx) {
d_tmp[idx] = column.GetElement(idx);
});
auto length = column.num_rows;
out->resize(length + 1);
out->at(0) = 0;
thrust::copy(temp.data(), temp.data() + length, out->begin() + 1);
std::partial_sum(out->begin(), out->end(), out->begin());
}

void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
Json j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
auto const& j_arr = get<Array>(j_interface);
Expand All @@ -53,16 +77,7 @@ void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
} else if (key == "base_margin") {
CopyInfoImpl(array_interface, &base_margin_);
} else if (key == "group") {
// Ranking is not performed on device.
thrust::device_ptr<uint32_t> p_src{
reinterpret_cast<uint32_t*>(array_interface.data)};

auto length = array_interface.num_rows;
group_ptr_.resize(length + 1);
group_ptr_[0] = 0;
thrust::copy(p_src, p_src + length, group_ptr_.begin() + 1);
std::partial_sum(group_ptr_.begin(), group_ptr_.end(), group_ptr_.begin());

CopyGroupInfoImpl(array_interface, &group_ptr_);
return;
} else {
LOG(FATAL) << "Unknown metainfo: " << key;
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ENV GOSU_VERSION 1.10
# Install Python packages in default env
RUN \
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \
recommonmark guzzle_sphinx_theme mock breathe graphviz \
recommonmark guzzle_sphinx_theme mock breathe graphviz hypothesis\
pytest scikit-learn wheel kubernetes urllib3 jsonschema boto3 && \
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
pip install "dask[complete]"
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.cudf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ENV PATH=/opt/python/bin:$PATH
RUN \
conda create -n cudf_test -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 cudf cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

ENV GOSU_VERSION 1.10

Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ENV PATH=/opt/python/bin:$PATH
RUN \
conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 dask dask-cuda numpy pytest scipy scikit-learn pandas \
matplotlib wheel python-kubernetes urllib3 graphviz
matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

ENV GOSU_VERSION 1.10

Expand Down
30 changes: 23 additions & 7 deletions tests/cpp/data/test_metainfo.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons

std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
column["shape"] = Array(j_shape);
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(4)))});
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});
column["version"] = Integer(static_cast<Integer::Int>(1));
column["typestr"] = String(typestr);

Expand Down Expand Up @@ -78,16 +78,32 @@ TEST(MetaInfo, FromInterface) {

TEST(MetaInfo, Group) {
cudaSetDevice(0);
thrust::device_vector<uint32_t> d_data;
std::string str = PrepareData<uint32_t>("<u4", &d_data);

MetaInfo info;

info.SetInfo("group", str.c_str());
auto const& h_group = info.group_ptr_;
ASSERT_EQ(h_group.size(), d_data.size() + 1);
thrust::device_vector<uint32_t> d_uint;
std::string uint_str = PrepareData<uint32_t>("<u4", &d_uint);
info.SetInfo("group", uint_str.c_str());
auto& h_group = info.group_ptr_;
ASSERT_EQ(h_group.size(), d_uint.size() + 1);
for (size_t i = 1; i < h_group.size(); ++i) {
ASSERT_EQ(h_group[i], d_data[i-1] + h_group[i-1]) << "i: " << i;
ASSERT_EQ(h_group[i], d_uint[i - 1] + h_group[i - 1]) << "i: " << i;
}

thrust::device_vector<int64_t> d_int64;
std::string int_str = PrepareData<int64_t>("<i8", &d_int64);
info = MetaInfo();
info.SetInfo("group", int_str.c_str());
h_group = info.group_ptr_;
ASSERT_EQ(h_group.size(), d_uint.size() + 1);
for (size_t i = 1; i < h_group.size(); ++i) {
ASSERT_EQ(h_group[i], d_uint[i - 1] + h_group[i - 1]) << "i: " << i;
}

// Incorrect type
thrust::device_vector<float> d_float;
std::string float_str = PrepareData<float>("<f4", &d_float);
info = MetaInfo();
EXPECT_ANY_THROW(info.SetInfo("group", float_str.c_str()));
}
} // namespace xgboost
70 changes: 45 additions & 25 deletions tests/python-gpu/test_gpu_linear.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,50 @@
import sys
import pytest
import unittest
from hypothesis import strategies, given, settings, assume
import xgboost as xgb
sys.path.append("tests/python")
import testing as tm

sys.path.append('tests/python/')
import test_linear # noqa: E402
import testing as tm # noqa: E402

parameter_strategy = strategies.fixed_dictionaries({
'booster': strategies.just('gblinear'),
'eta': strategies.floats(0.01, 0.25),
'tolerance': strategies.floats(1e-5, 1e-2),
'nthread': strategies.integers(1, 4),
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
'greedy', 'thrifty']),
'top_k': strategies.integers(1, 10),
})

class TestGPULinear(unittest.TestCase):
datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
common_param = {
'booster': ['gblinear'],
'updater': ['gpu_coord_descent'],
'eta': [0.5],
'top_k': [10],
'tolerance': [1e-5],
'alpha': [.1],
'lambda': [0.005],
'coordinate_selection': ['cyclic', 'random', 'greedy']}
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
return result

@pytest.mark.skipif(**tm.no_sklearn())
def test_gpu_coordinate(self):
parameters = self.common_param.copy()
parameters['gpu_id'] = [0]
for param in test_linear.parameter_combinations(parameters):
results = test_linear.run_suite(
param, 100, self.datasets, scale_features=True)
test_linear.assert_regression_result(results, 1e-2)
test_linear.assert_classification_result(results)

class TestGPULinear:
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy)
@settings(deadline=None)
def test_gpu_coordinate(self, param, num_rounds, dataset):
assume(len(dataset.y) > 0)
param['updater'] = 'gpu_coord_descent'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
assert tm.non_increasing(result)

# Loss is not guaranteed to always decrease because of regularisation parameters
# We test a weaker condition that the loss has not increased between the first and last
# iteration
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
strategies.floats(1e-5, 2.0))
@settings(deadline=None)
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
assume(len(dataset.y) > 0)
param['updater'] = 'gpu_coord_descent'
param['alpha'] = alpha
param['lambda'] = lambd
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
assert tm.non_increasing([result[0], result[-1]])
5 changes: 4 additions & 1 deletion tests/python-gpu/test_gpu_pickling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
import numpy as np
import subprocess
import os
import sys
import json
import pytest

sys.path.append("tests/python")
import testing as tm

import xgboost as xgb
from xgboost import XGBClassifier

Expand Down Expand Up @@ -90,7 +94,6 @@ def test_wrap_gpu_id(self):
)
status = subprocess.call(args, env=env)
assert status == 0

os.remove(model_path)

def test_pickled_predictor(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/python-gpu/test_gpu_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ def test_inplace_predict_cudf(self):
rows = 1000
cols = 10
rng = np.random.RandomState(1994)
cp.cuda.runtime.setDevice(0)
X = rng.randn(rows, cols)
X = pd.DataFrame(X)
y = rng.randn(rows)

X = cudf.from_pandas(X)

dtrain = xgb.DMatrix(X, y)
Expand Down
Loading