Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dask] Add shap tests. #6575

Merged
merged 9 commits into from
Jan 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,7 @@ jobs:
python -m pip install wheel setuptools mypy dask[complete] distributed
- name: Run mypy
run: |
cd python-package
# dask is required to pass, others are not
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent
# If any of the above failed, contributor won't see the next error.
mypy . || true
make mypy

doxygen:
runs-on: ubuntu-latest
Expand Down
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,15 @@ cover: check
)
endif


# dask is required to pass, others are not
# If any of the dask tests failed, contributor won't see the other error.
mypy:
cd python-package; \
mypy ./xgboost/dask.py ../tests/python/test_with_dask.py --follow-imports=silent; \
mypy ../tests/python-gpu/test_gpu_with_dask.py --follow-imports=silent; \
mypy . || true ;

clean:
$(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o #xgboost
$(RM) -rf build_tests *.gcov tests/cpp/xgboost_test
Expand Down
69 changes: 68 additions & 1 deletion tests/python/test_with_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
import numpy as np
import json
from typing import List, Tuple, Union, Dict, Optional, Callable, Type
from typing import List, Tuple, Dict, Optional, Type, Any
import asyncio
import tempfile
from sklearn.datasets import make_classification
Expand Down Expand Up @@ -953,6 +953,73 @@ def worker_fn(worker_addr: str, data_ref: Dict) -> None:
# Subtract the on disk resource from each worker
assert cnt - n_workers == n_partitions

def run_shap(self, X: Any, y: Any, params: Dict[str, Any], client: "Client") -> None:
X, y = da.from_array(X), da.from_array(y)

Xy = xgb.dask.DaskDMatrix(client, X, y)
booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster']

test_Xy = xgb.dask.DaskDMatrix(client, X, y)

shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute()
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)

def run_shap_cls_sklearn(self, X: Any, y: Any, client: "Client") -> None:
X, y = da.from_array(X), da.from_array(y)
cls = xgb.dask.DaskXGBClassifier()
cls.client = client
cls.fit(X, y)
booster = cls.get_booster()

test_Xy = xgb.dask.DaskDMatrix(client, X, y)

shap = xgb.dask.predict(client, booster, test_Xy, pred_contribs=True).compute()
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-5, 1e-5)

def test_shap(self, client: "Client") -> None:
from sklearn.datasets import load_boston, load_digits
X, y = load_boston(return_X_y=True)
params = {'objective': 'reg:squarederror'}
self.run_shap(X, y, params, client)

X, y = load_digits(return_X_y=True)
params = {'objective': 'multi:softmax', 'num_class': 10}
self.run_shap(X, y, params, client)
params = {'objective': 'multi:softprob', 'num_class': 10}
self.run_shap(X, y, params, client)

self.run_shap_cls_sklearn(X, y, client)

def run_shap_interactions(
self,
X: Any,
y: Any,
params: Dict[str, Any],
client: "Client"
) -> None:
X, y = da.from_array(X), da.from_array(y)

Xy = xgb.dask.DaskDMatrix(client, X, y)
booster = xgb.dask.train(client, params, Xy, num_boost_round=10)['booster']

test_Xy = xgb.dask.DaskDMatrix(client, X, y)

shap = xgb.dask.predict(
client, booster, test_Xy, pred_interactions=True
).compute()
margin = xgb.dask.predict(client, booster, test_Xy, output_margin=True).compute()
assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
margin,
1e-5, 1e-5)

def test_shap_interactions(self, client: "Client") -> None:
from sklearn.datasets import load_boston
X, y = load_boston(return_X_y=True)
params = {'objective': 'reg:squarederror'}
self.run_shap_interactions(X, y, params, client)

@pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn_io(self, client: 'Client') -> None:
from sklearn.datasets import load_digits
Expand Down