Skip to content

Commit

Permalink
Unpin shap and scipy (#4436)
Browse files Browse the repository at this point in the history
* Unpin scipy and shap

* Change for scipy compatibility

* Changes for shap compatibility

* Deps update check fixes

* Theoretically fix nightly failure

* release notes

* Test fixes

* removed -n 2

* removed -n 2 from git-test-other

* maybe upping pytest-xdist fixes this?

* windows no parallel-cpu and reorder linux test

* python 3.10, 3.11 test

* maybe fix test

* fix dependency checker

* Clarify make command names

* Fix deps install logic

---------

Co-authored-by: MichaelFu512 <MichaelFu512@gmail.com>
Co-authored-by: Michael Fu <michael.fu@alteryx.com>
  • Loading branch information
3 people authored Jun 4, 2024
1 parent a6e14b5 commit 3d5bf45
Show file tree
Hide file tree
Showing 17 changed files with 63 additions and 55 deletions.
4 changes: 2 additions & 2 deletions .github/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ outputs:
- numpy >=1.22.0
- pandas >=1.5.0, <2.1.0
- dask >=2022.2.0, !=2022.10.1
- scipy >=1.5.0, <1.12.0
- scipy >=1.5.0
- scikit-learn >=1.3.2
- scikit-optimize >=0.9.0
- statsmodels >=0.12.2
- colorama >=0.4.4
- cloudpickle >=1.5.0
- click >=8.0.0
- shap >=0.42.0, <0.45.0
- shap >=0.45.0
- texttable >=1.6.2
- woodwork >=0.22.0
- featuretools >=1.16.0
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/latest_dependency_checker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ jobs:
delete-branch: true
base: main
assignees: machineFL
reviewers: jeremyliweishih, chukarsten, MichaelFu512, eccabay, christopherbunn
reviewers: jeremyliweishih, thehomebrewnerd, MichaelFu512, eccabay, christopherbunn
2 changes: 1 addition & 1 deletion .github/workflows/lint_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python_version: ["3.9", "3.10"]
python_version: ["3.9", "3.10", "3.11"]
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand Down
13 changes: 2 additions & 11 deletions .github/workflows/linux_unit_tests_with_latest_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,8 @@ jobs:
strategy:
fail-fast: false
matrix:
include:
- python_version: "3.9"
command: 'git-test-automl'
- python_version: "3.9"
command: 'git-test-prophet'
- python_version: "3.9"
command: 'git-test-modelunderstanding'
- python_version: "3.9"
command: 'git-test-other'
- python_version: "3.9"
command: 'git-test-parallel'
python_version: ['3.9', '3.10', '3.11']
command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-parallel', 'git-test-prophet', 'git-test-other']
steps:
- name: Set up Python ${{ matrix.python_version }}
uses: actions/setup-python@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linux_unit_tests_with_minimum_deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: false
matrix:
python_version: ['3.9']
command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-other', 'git-test-parallel', 'git-test-prophet']
command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-parallel', 'git-test-prophet', 'git-test-other']
steps:
- name: Set up Python ${{ matrix.python_version }}
uses: actions/setup-python@v4
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/windows_unit_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
python_version: ['3.9']
command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-other', 'git-test-parallel', 'git-test-prophet']
command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-parallel', 'git-test-prophet-no-parallel-cpu', 'git-test-other-no-parallel-cpu']
steps:
- name: Download Miniconda
shell: pwsh
Expand Down Expand Up @@ -59,7 +59,7 @@ jobs:
. $env:USERPROFILE\Miniconda3\shell\condabin\conda-hook.ps1
conda activate curr_py
conda install numba -q -y
- if: ${{ matrix.command == 'git-test-prophet' }}
- if: ${{ matrix.command == 'git-test-prophet-no-parallel-cpu' }}
name: Install EvalML with test requirements and prophet
shell: pwsh
run: |
Expand All @@ -69,7 +69,7 @@ jobs:
python -m pip install .[test]
python -m pip install .[prophet]
pip freeze
- if: ${{ matrix.command != 'git-test-prophet' }}
- if: ${{ matrix.command != 'git-test-prophet-no-parallel-cpu' }}
name: Install EvalML with test requirements
shell: pwsh
run: |
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ git-test-prophet:
git-test-integration:
pytest evalml/tests/integration_tests -n 2 --durations 0 --timeout $(TIMEOUT) --cov=evalml --cov-config=./pyproject.toml --junitxml=test-reports/git-test-integration-junit.xml

.PHONY: git-test-other-no-parallel-cpu
git-test-other-no-parallel-cpu:
pytest evalml/tests --ignore evalml/tests/automl_tests/ --ignore evalml/tests/tuner_tests/ --ignore evalml/tests/model_understanding_tests/ --ignore evalml/tests/pipeline_tests/test_pipelines.py --ignore evalml/tests/component_tests/test_prophet_regressor.py --ignore evalml/tests/component_tests/test_components.py --ignore evalml/tests/component_tests/test_utils.py --ignore evalml/tests/integration_tests/ --durations 0 --timeout $(TIMEOUT) --cov=evalml --cov-config=./pyproject.toml --junitxml=test-reports/git-test-other-junit.xml
make doctests

.PHONY: git-test-prophet-no-parallel-cpu
git-test-prophet-no-parallel-cpu:
pytest evalml/tests/component_tests/test_prophet_regressor.py evalml/tests/component_tests/test_components.py evalml/tests/component_tests/test_utils.py evalml/tests/pipeline_tests/test_pipelines.py --durations 0 --timeout $(TIMEOUT) --cov=evalml --cov-config=./pyproject.toml --junitxml=test-reports/git-test-prophet-junit.xml

.PHONY: installdeps
installdeps:
Expand Down
4 changes: 2 additions & 2 deletions core-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
numpy>=1.21.0
pandas>=1.5.0, <2.1.0
scipy>=1.5.0, <1.12.0
scipy>=1.5.0
scikit-learn>=1.3.2
scikit-optimize>=0.9.0
pyzmq>=20.0.0
colorama>=0.4.4
cloudpickle>=1.5.0
click>=8.0.0
shap>=0.42.0
shap>=0.45.0
statsmodels>=0.12.2
texttable>=1.6.2
woodwork>= 0.21.1
Expand Down
9 changes: 5 additions & 4 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ Release Notes
* Removed vowpalwabbit :pr:`4427`
* Uncapped holidays :pr:`4428`
* Unpinned kaleido :pr:`4423`
* Unpinned shap and scipy :pr:`4436`
* Documentation Changes
* Testing Changes
* Run airflow tests in Python 3.9 :pr:`4391`
* Remove iterative test from airflow runs :pr:`4424`
* Update GH actions to improve handling of potentially unsafe variables :pr:`4417`
* Fix install test :pr:`4423`
* Added ability to run airflow tests in Python 3.9 :pr:`4391`
* Removed iterative test from airflow runs :pr:`4424`
* Updated GH actions to improve handling of potentially unsafe variables :pr:`4417`
* Fixed install test :pr:`4423`

.. warning::

Expand Down
5 changes: 3 additions & 2 deletions evalml/data_checks/target_distribution_data_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def _detect_log_distribution_helper(y):
normalization_test = shapiro if len(y) <= 5000 else jarque_bera
normalization_test_string = "shapiro" if len(y) <= 5000 else "jarque_bera"
# Check if a normal distribution is detected with p-value above 0.05
if normalization_test(y).pvalue >= 0.05:
pvalue = normalization_test(y).pvalue
if pvalue >= 0.05 or np.isnan(pvalue):
return False, normalization_test_string, None

y_new = round(y, 6)
Expand All @@ -161,6 +162,6 @@ def _detect_log_distribution_helper(y):

# If the p-value of the log transformed target is greater than or equal to the p-value of the original target
# with outliers dropped, then it would imply that the log transformed target has more of a normal distribution
if norm_test_log.pvalue >= norm_test_og.pvalue:
if round(norm_test_log.pvalue, 6) >= round(norm_test_og.pvalue, 6):
return True, normalization_test_string, norm_test_og
return False, normalization_test_string, norm_test_og
36 changes: 20 additions & 16 deletions evalml/model_understanding/prediction_explanations/_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,6 @@ def _compute_shap_values(pipeline, features, training_data=None):
if ws:
logger.debug(f"_compute_shap_values TreeExplainer: {ws[0].message}")
shap_values = explainer.shap_values(features, check_additivity=False)
# shap only outputs values for positive class for Catboost/Xgboost binary estimators.
# this modifies the output to match the output format of other binary estimators.
# Ok to fill values of negative class with zeros since the negative class will get dropped
# in the UI anyways.
if estimator.model_family in {
ModelFamily.CATBOOST,
ModelFamily.XGBOOST,
} and is_binary(pipeline.problem_type):
shap_values = [np.zeros(shap_values.shape), shap_values]
else:
if training_data is None:
raise ValueError(
Expand Down Expand Up @@ -189,16 +180,29 @@ def _compute_shap_values(pipeline, features, training_data=None):
except IndexError:
expected_value = explainer.expected_value

# classification problem
if isinstance(shap_values, list):
mappings = []
for class_shap_values in shap_values:
mappings.append(_create_dictionary(class_shap_values, feature_names))
return (mappings, expected_value)
# regression problem
elif isinstance(shap_values, np.ndarray):
if is_regression(pipeline.problem_type):
dic = _create_dictionary(shap_values, feature_names)
return (dic, expected_value)
# classification problem
if len(shap_values.shape) == 3:
mappings = []
for class_shap_values in shap_values.T:
mappings.append(_create_dictionary(class_shap_values.T, feature_names))
return (mappings, expected_value)
# shap only outputs values for positive class for boosted binary estimators.
# this modifies the output to match the output format of other binary estimators.
# Ok to fill values of negative class with the positive class since the negative class
# will get dropped in the UI anyways.
if estimator.model_family in {
ModelFamily.CATBOOST,
ModelFamily.XGBOOST,
ModelFamily.LIGHTGBM,
} and is_binary(pipeline.problem_type):
mappings = []
for _ in range(2):
mappings.append(_create_dictionary(shap_values, feature_names))
return (mappings, expected_value)
else:
raise ValueError(f"Unknown shap_values datatype {str(type(shap_values))}!")

Expand Down
7 changes: 5 additions & 2 deletions evalml/tests/automl_tests/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1530,9 +1530,12 @@ def test_results_getter(AutoMLTestEnv, X_y_binary):

assert automl.results["pipeline_results"][0]["mean_cv_score"] == 1.0

with pytest.raises(AttributeError, match="set attribute"):
with pytest.raises(AttributeError) as atr_error:
automl.results = 2.0

assert "set attribute" in str(atr_error.value) or "has no setter" in str(
atr_error.value,
)
automl.results["pipeline_results"][0]["mean_cv_score"] = 2.0
assert automl.results["pipeline_results"][0]["mean_cv_score"] == 1.0

Expand Down Expand Up @@ -4850,7 +4853,7 @@ def test_search_parameters_held_automl(
max_batches=batches,
)
aml.search()
estimator_args = inspect.getargspec(RandomForestClassifier)
estimator_args = inspect.getfullargspec(RandomForestClassifier)
# estimator_args[0] gives the parameter names, while [3] gives the associated values
# estimator_args[0][i + 1] to skip 'self' in the estimator
# we do len - 1 in order to skip the random seed, which isn't present in the row['parameters']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ pmdarima==2.0.4
pyzmq==26.0.3
scikit-learn==1.4.2
scikit-optimize==0.10.1
scipy==1.11.4
scipy==1.13.1
seaborn==0.13.2
shap==0.44.1
shap==0.45.1
sktime==0.28.1
statsmodels==0.14.2
texttable==1.7.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ scikit-learn==1.3.2
scikit-optimize==0.9.0
scipy==1.5.0
seaborn==0.11.1
shap==0.42.0
shap==0.45.0
sktime==0.21.0
statsmodels==0.12.2
texttable==1.6.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ scikit-learn==1.3.2
scikit-optimize==0.9.0
scipy==1.5.0
seaborn==0.11.1
shap==0.42.0
shap==0.45.0
sktime==0.21.0
statsmodels==0.12.2
texttable==1.6.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def test_compute_shap_values_catches_shap_tree_warnings(
def raise_warning_from_shap(estimator, feature_perturbation):
warnings.warn("Shap raised a warning!")
mock = MagicMock()
mock.shap_values.return_value = np.zeros(10)
mock.shap_values.return_value = np.zeros((1, 10, 2))
return mock

mock_tree_explainer.side_effect = raise_warning_from_shap
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ requires-python = ">=3.9,<4"
dependencies = [
"numpy >= 1.22.0",
"pandas >= 1.5.0, <2.1.0",
"scipy >= 1.5.0, < 1.12.0",
"scipy >= 1.5.0",
"scikit-learn >= 1.3.2",
"scikit-optimize >= 0.9.0",
"pyzmq >= 20.0.0",
"colorama >= 0.4.4",
"cloudpickle >= 1.5.0",
"click >= 8.0.0",
"shap >= 0.42.0, < 0.45.0",
"shap >= 0.45.0",
"statsmodels >= 0.12.2",
"texttable >= 1.6.2",
"woodwork[dask] >= 0.22.0",
Expand Down Expand Up @@ -78,9 +78,9 @@ dependencies = [
[project.optional-dependencies]
test = [
"pytest == 7.1.2",
"pytest-xdist == 2.1.0",
"pytest-timeout == 1.4.2",
"pytest-cov == 2.10.1",
"pytest-xdist >= 2.1.0",
"pytest-timeout >= 1.4.2",
"pytest-cov >= 2.10.1",
"nbval == 0.9.3",
"IPython >= 8.10.0, <8.12.1",
"PyYAML == 6.0.1",
Expand Down

0 comments on commit 3d5bf45

Please sign in to comment.