From 656319da94ebec3bd24df03bc7ced8bacb8a1180 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Wed, 6 Mar 2024 15:55:26 +0100 Subject: [PATCH] Take care of deprecation warnings from dependencies (#482) --- .envs/testenv-linux.yml | 1 + .envs/testenv-others.yml | 1 + .envs/testenv-pandas.yml | 31 +++++++++++++++++ .envs/update_envs.py | 10 +++++- .github/workflows/main.yml | 31 +++++++++++++++++ environment.yml | 1 + pyproject.toml | 2 ++ src/estimagic/compat.py | 33 +++++++++++++++++++ src/estimagic/config.py | 19 ++++++++--- src/estimagic/optimization/optimize_result.py | 3 +- .../optimization/scipy_optimizers.py | 2 -- src/estimagic/parameters/block_trees.py | 8 ++--- .../parameters/consolidate_constraints.py | 2 +- src/estimagic/parameters/parameter_groups.py | 4 +-- src/estimagic/visualization/deviation_plot.py | 2 +- .../visualization/estimation_table.py | 22 +++++++------ src/estimagic/visualization/profile_plot.py | 6 ++-- tests/inference/test_bootstrap.py | 4 +-- tests/visualization/test_profile_plot.py | 12 +++---- 19 files changed, 157 insertions(+), 37 deletions(-) create mode 100644 .envs/testenv-pandas.yml create mode 100644 src/estimagic/compat.py diff --git a/.envs/testenv-linux.yml b/.envs/testenv-linux.yml index a4cd42372..44957d9d5 100644 --- a/.envs/testenv-linux.yml +++ b/.envs/testenv-linux.yml @@ -23,6 +23,7 @@ dependencies: - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/.envs/testenv-others.yml b/.envs/testenv-others.yml index b2c78c5c0..4d3159408 100644 --- a/.envs/testenv-others.yml +++ b/.envs/testenv-others.yml @@ -22,6 +22,7 @@ dependencies: - scipy>=1.2.1 # run, tests - sqlalchemy # run, tests - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/.envs/testenv-pandas.yml b/.envs/testenv-pandas.yml new file mode 100644 index 000000000..e98692d46 --- /dev/null +++ b/.envs/testenv-pandas.yml @@ -0,0 +1,31 @@ +--- +name: estimagic +channels: + - conda-forge + - nodefaults +dependencies: + - pandas<2.0.0 + - nlopt # dev, tests + - pip # dev, tests, docs + - pytest # dev, tests + - pytest-cov # tests + - pytest-xdist # dev, tests + - statsmodels # dev, tests + - bokeh<=2.4.3 # run, tests + - click # run, tests + - cloudpickle # run, tests + - joblib # run, tests + - numpy>=1.17.0 # run, tests + - plotly # run, tests + - pybaum >= 0.1.2 # run, tests + - scipy>=1.2.1 # run, tests + - sqlalchemy # run, tests + - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests + - pip: # dev, tests, docs + - DFO-LS # dev, tests + - Py-BOBYQA # dev, tests + - fides==0.7.4 # dev, tests + - kaleido # dev, tests + - simoptlib==1.0.1 # dev, tests + - -e ../ diff --git a/.envs/update_envs.py b/.envs/update_envs.py index 2d9a3bf07..f0eff0d7a 100644 --- a/.envs/update_envs.py +++ b/.envs/update_envs.py @@ -34,13 +34,21 @@ def main(): test_env_others = deepcopy(test_env) test_env_others.insert(_insert_idx, " - cyipopt<=1.2.0") + ## test environment for pandas version 1 + test_env_pandas = deepcopy(test_env) + test_env_pandas = [line for line in test_env_pandas if "pandas" not in line] + test_env_pandas.insert(_insert_idx, " - pandas<2.0.0") + # create docs testing environment docs_env = [line for line in lines if _keep_line(line, "docs")] docs_env.append(" - -e ../") # add local installation # write environments - for name, env in zip(["linux", "others"], [test_env_linux, test_env_others]): + for name, env in zip( + ["linux", "others", "pandas"], + [test_env_linux, test_env_others, test_env_pandas], + ): # Specify newline to avoid wrong line endings on Windows. # See: https://stackoverflow.com/a/69869641 Path(f".envs/testenv-{name}.yml").write_text( diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cae534bd7..9ba44f4a9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,6 +24,7 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' steps: - uses: actions/checkout@v3 - name: create build environment @@ -72,6 +73,36 @@ jobs: run: | micromamba activate estimagic pytest -m "not slow and not jax" + run-tests-with-old-pandas: + # This job is only for testing if estimagic works with older pandas versions, as + # many pandas functions we use will be deprecated in pandas 3. estimagic's behavior + # for older verions is handled in src/estimagic/compat.py. + name: Run tests for ${{ matrix.os}} on ${{ matrix.python-version }} with pandas 1 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + python-version: + - '3.11' + steps: + - uses: actions/checkout@v3 + - name: create build environment + uses: mamba-org/provision-with-micromamba@main + with: + environment-file: ./.envs/testenv-pandas.yml + environment-name: estimagic + cache-env: true + extra-specs: | + python=${{ matrix.python-version }} + - name: run pytest + shell: bash -l {0} + run: | + micromamba activate estimagic + pytest tests/visualization + pytest tests/parameters + pytest tests/inference code-in-docs: name: Run code snippets in documentation runs-on: ubuntu-latest diff --git a/environment.yml b/environment.yml index 4506cb1dd..d908fce43 100644 --- a/environment.yml +++ b/environment.yml @@ -35,6 +35,7 @@ dependencies: - sphinx-panels # docs - sphinxcontrib-bibtex # docs - tranquilo>=0.0.4 # dev, tests + - seaborn # dev, tests - pip: # dev, tests, docs - DFO-LS # dev, tests - Py-BOBYQA # dev, tests diff --git a/pyproject.toml b/pyproject.toml index 5a0d5721f..ad7f7a157 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ filterwarnings = [ "ignore:Method .ptp is deprecated and will be removed in a future version. Use numpy.ptp instead.", "ignore:In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only", "ignore:Please use `MemoizeJac` from the `scipy.optimize` namespace", + "ignore:`scipy.optimize.optimize.MemoizeJac` is deprecated", "ignore:Some algorithms did not converge. Their walltime has been set to a very high value instead of infinity because Timedeltas do notsupport infinite values", "ignore:In a future version, the Index constructor will not infer numeric dtypes when passed object-dtype sequences", "ignore:distutils Version classes are deprecated. Use packaging.version instead", @@ -91,6 +92,7 @@ filterwarnings = [ "ignore:Widget.widget_types is deprecated", "ignore:Widget.widgets is deprecated", "ignore:Parallelization together with", + "ignore:Conversion of an array with ndim > 0 to a scalar is deprecated", ] addopts = ["--doctest-modules"] markers = [ diff --git a/src/estimagic/compat.py b/src/estimagic/compat.py new file mode 100644 index 000000000..8bd69a71c --- /dev/null +++ b/src/estimagic/compat.py @@ -0,0 +1,33 @@ +"""Compatibility module. + +Contains wrapper functions to handle compatibility issues between different versions of +external libraries. + +""" + +from estimagic.config import IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 + + +def pd_df_map(df, func, na_action=None, **kwargs): + """Apply a function to a Dataframe elementwise. + + pandas has depricated the .applymap() function with version 2.1.0. This function + calls either .map() (if pandas version is greater or equal to 2.1.0) or .applymap() + (if pandas version is smaller than 2.1.0). + + Args: + df (pd.DataFrame): A pandas DataFrame. + func (callable): Python function, returns a single value from a single value. + na_action (str): If 'ignore', propagate NaN values, without passing them to + func. If None, pass NaN values to func. Default is None. + **kwargs: Additional keyword arguments to pass as keywords arguments to func. + + Returns: + pd.DataFrame: Transformed DataFrame. + + """ + if IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0: + out = df.map(func, na_action=na_action, **kwargs) + else: + out = df.applymap(func, na_action=na_action, **kwargs) + return out diff --git a/src/estimagic/config.py b/src/estimagic/config.py index dd0fcde1c..efa004724 100644 --- a/src/estimagic/config.py +++ b/src/estimagic/config.py @@ -1,4 +1,6 @@ from pathlib import Path +import pandas as pd +from packaging import version import plotly.express as px @@ -19,9 +21,9 @@ CRITERION_PENALTY_SLOPE = 0.1 CRITERION_PENALTY_CONSTANT = 100 -# ===================================================================================== +# ====================================================================================== # Check Available Packages -# ===================================================================================== +# ====================================================================================== try: from petsc4py import PETSc # noqa: F401 @@ -103,9 +105,18 @@ IS_NUMBA_INSTALLED = True -# ================================================================================= +# ====================================================================================== +# Check if pandas version is newer or equal to version 2.1.0 +# ====================================================================================== + +IS_PANDAS_VERSION_NEWER_OR_EQUAL_TO_2_1_0 = version.parse( + pd.__version__ +) >= version.parse("2.1.0") + + +# ====================================================================================== # Dashboard Defaults -# ================================================================================= +# ====================================================================================== Y_RANGE_PADDING = 0.05 Y_RANGE_PADDING_UNITS = "absolute" diff --git a/src/estimagic/optimization/optimize_result.py b/src/estimagic/optimization/optimize_result.py index e991f5ebb..5b120d623 100644 --- a/src/estimagic/optimization/optimize_result.py +++ b/src/estimagic/optimization/optimize_result.py @@ -5,6 +5,7 @@ import pandas as pd from estimagic.utilities import to_pickle +from estimagic.compat import pd_df_map @dataclass @@ -128,7 +129,7 @@ def _format_convergence_report(report, algorithm): report = pd.DataFrame.from_dict(report) columns = ["one_step", "five_steps"] - table = report[columns].applymap(_format_float).astype(str) + table = pd_df_map(report[columns], _format_float).astype(str) for col in "one_step", "five_steps": table[col] = table[col] + _create_stars(report[col]) diff --git a/src/estimagic/optimization/scipy_optimizers.py b/src/estimagic/optimization/scipy_optimizers.py index 661956f27..143e5d561 100644 --- a/src/estimagic/optimization/scipy_optimizers.py +++ b/src/estimagic/optimization/scipy_optimizers.py @@ -355,7 +355,6 @@ def scipy_truncated_newton( upper_bounds, *, stopping_max_criterion_evaluations=STOPPING_MAX_CRITERION_EVALUATIONS, - stopping_max_iterations=STOPPING_MAX_ITERATIONS, convergence_absolute_criterion_tolerance=CONVERGENCE_ABSOLUTE_CRITERION_TOLERANCE, convergence_absolute_params_tolerance=CONVERGENCE_ABSOLUTE_PARAMS_TOLERANCE, convergence_absolute_gradient_tolerance=CONVERGENCE_ABSOLUTE_GRADIENT_TOLERANCE, @@ -381,7 +380,6 @@ def scipy_truncated_newton( "xtol": convergence_absolute_params_tolerance, "gtol": convergence_absolute_gradient_tolerance, "maxfun": stopping_max_criterion_evaluations, - "maxiter": stopping_max_iterations, "maxCGit": max_hess_evaluations_per_iteration, "stepmx": max_step_for_line_search, "minfev": func_min_estimate, diff --git a/src/estimagic/parameters/block_trees.py b/src/estimagic/parameters/block_trees.py index 526fd276b..b75095e80 100644 --- a/src/estimagic/parameters/block_trees.py +++ b/src/estimagic/parameters/block_trees.py @@ -37,8 +37,8 @@ def matrix_to_block_tree(matrix, outer_tree, inner_tree): shapes_outer = [np.shape(a) for a in flat_outer_np] shapes_inner = [np.shape(a) for a in flat_inner_np] - block_bounds_outer = np.cumsum([int(np.product(s)) for s in shapes_outer[:-1]]) - block_bounds_inner = np.cumsum([int(np.product(s)) for s in shapes_inner[:-1]]) + block_bounds_outer = np.cumsum([int(np.prod(s)) for s in shapes_outer[:-1]]) + block_bounds_inner = np.cumsum([int(np.prod(s)) for s in shapes_inner[:-1]]) blocks = [] for leaf_outer, s1, submat in zip( @@ -94,8 +94,8 @@ def hessian_to_block_tree(hessian, f_tree, params_tree): shapes_f = [np.shape(a) for a in flat_f_np] shapes_p = [np.shape(a) for a in flat_p_np] - block_bounds_f = np.cumsum([int(np.product(s)) for s in shapes_f[:-1]]) - block_bounds_p = np.cumsum([int(np.product(s)) for s in shapes_p[:-1]]) + block_bounds_f = np.cumsum([int(np.prod(s)) for s in shapes_f[:-1]]) + block_bounds_p = np.cumsum([int(np.prod(s)) for s in shapes_p[:-1]]) sub_block_trees = [] for s0, subarr in zip(shapes_f, np.split(hessian, block_bounds_f, axis=0)): diff --git a/src/estimagic/parameters/consolidate_constraints.py b/src/estimagic/parameters/consolidate_constraints.py index 875edd2c6..f71ab1cb9 100644 --- a/src/estimagic/parameters/consolidate_constraints.py +++ b/src/estimagic/parameters/consolidate_constraints.py @@ -592,7 +592,7 @@ def _drop_redundant_linear_constraints(weights, rhs): new_rhs (pd.DataFrame) """ - weights["dupl_group"] = weights.groupby(list(weights.columns)).grouper.group_info[0] + weights["dupl_group"] = weights.groupby(list(weights.columns)).ngroup() rhs["dupl_group"] = weights["dupl_group"] weights.set_index("dupl_group", inplace=True) diff --git a/src/estimagic/parameters/parameter_groups.py b/src/estimagic/parameters/parameter_groups.py index cba3cb536..75d840965 100644 --- a/src/estimagic/parameters/parameter_groups.py +++ b/src/estimagic/parameters/parameter_groups.py @@ -35,10 +35,10 @@ def get_params_groups_and_short_names(params, free_mask, max_group_size=8): names.append(name) # if every parameter has its own group, they should all actually be in one group - if len(pd.unique(groups)) == len(groups): + if len(set(groups)) == len(groups): groups = ["Parameters"] * len(groups) - counts = pd.value_counts(groups) + counts = pd.Series(groups).value_counts() to_be_split = counts[counts > max_group_size] for group_name, n_occurrences in to_be_split.items(): split_group_names = _split_long_group( diff --git a/src/estimagic/visualization/deviation_plot.py b/src/estimagic/visualization/deviation_plot.py index f7ac205d7..5756ba3ef 100644 --- a/src/estimagic/visualization/deviation_plot.py +++ b/src/estimagic/visualization/deviation_plot.py @@ -68,7 +68,7 @@ def deviation_plot( names=["problem", "algorithm", runtime_measure], ) ) - .fillna(method="ffill") + .ffill() .reset_index() ) average_deviations = ( diff --git a/src/estimagic/visualization/estimation_table.py b/src/estimagic/visualization/estimation_table.py index 48d611c7e..39dcdd6a3 100644 --- a/src/estimagic/visualization/estimation_table.py +++ b/src/estimagic/visualization/estimation_table.py @@ -3,6 +3,7 @@ from functools import partial from pathlib import Path from warnings import warn +from estimagic.compat import pd_df_map import numpy as np import pandas as pd @@ -305,7 +306,7 @@ def render_latex( ci_in_body = False if ci_in_body: - body.loc[("",)] = body.loc[("",)].applymap("{{{}}}".format).values + body.loc[("",)] = pd_df_map(body.loc[("",)], "{{{}}}".format).values if body.columns.nlevels > 1: column_groups = body.columns.get_level_values(0) else: @@ -1383,22 +1384,23 @@ def _apply_number_format(df_raw, number_format, format_integers): if isinstance(processed_format, (list, tuple)): df_formatted = df_raw.copy(deep=True).astype("float") for formatter in processed_format[:-1]: - df_formatted = df_formatted.applymap(formatter.format).astype("float") - df_formatted = df_formatted.astype("float").applymap( - processed_format[-1].format + df_formatted = pd_df_map(df_formatted, formatter.format).astype("float") + df_formatted = pd_df_map( + df_formatted.astype("float"), processed_format[-1].format ) elif isinstance(processed_format, str): - df_formatted = df_raw.astype("str").applymap( - partial(_format_non_scientific_numbers, format_string=processed_format) + df_formatted = pd_df_map( + df_raw.astype("str"), + partial(_format_non_scientific_numbers, format_string=processed_format), ) elif callable(processed_format): - df_formatted = df_raw.applymap(processed_format) + df_formatted = pd_df_map(df_raw, processed_format) # Don't format integers: set to original value if not format_integers: - integer_locs = df_raw.applymap(_is_integer) - df_formatted[integer_locs] = ( - df_raw[integer_locs].astype(float).applymap("{:.0f}".format) + integer_locs = pd_df_map(df_raw, _is_integer) + df_formatted[integer_locs] = pd_df_map( + df_raw[integer_locs].astype(float), "{:.0f}".format ) return df_formatted diff --git a/src/estimagic/visualization/profile_plot.py b/src/estimagic/visualization/profile_plot.py index 992b16e3f..7c46a0ef8 100644 --- a/src/estimagic/visualization/profile_plot.py +++ b/src/estimagic/visualization/profile_plot.py @@ -160,13 +160,13 @@ def create_solution_times(df, runtime_measure, converged_info, return_tidy=True) problem, algorithm and runtime_measure. The values are either the number of evaluations or the walltime each algorithm needed to achieve the desired precision. If the desired precision was not achieved the value is - set to np.inf (for n_evaluations) or 7000 days (for walltime since there - no infinite value is allowed). + set to np.inf. """ solution_times = df.groupby(["problem", "algorithm"])[runtime_measure].max() solution_times = solution_times.unstack() - solution_times[~converged_info] = np.inf + # We convert the dtype to float to support the use of np.inf + solution_times = solution_times.astype(float).where(converged_info, other=np.inf) if not return_tidy: solution_times = solution_times.stack().reset_index() diff --git a/tests/inference/test_bootstrap.py b/tests/inference/test_bootstrap.py index ab17ef937..726a7d72a 100644 --- a/tests/inference/test_bootstrap.py +++ b/tests/inference/test_bootstrap.py @@ -65,9 +65,9 @@ def expected(): def seaborn_example(): out = {} - df = sns.load_dataset("exercise", index_col=0) + raw = sns.load_dataset("exercise", index_col=0) replacements = {"1 min": 1, "15 min": 15, "30 min": 30} - df = df.replace({"time": replacements}) + df = raw.assign(time=raw.time.cat.rename_categories(replacements).astype(int)) df["constant"] = 1 lower_ci = pd.Series([90.709236, 0.151193], index=["constant", "time"]) diff --git a/tests/visualization/test_profile_plot.py b/tests/visualization/test_profile_plot.py index 2d3a7fabb..ff1cc393a 100644 --- a/tests/visualization/test_profile_plot.py +++ b/tests/visualization/test_profile_plot.py @@ -57,8 +57,8 @@ def test_create_solution_times_n_evaluations(): ) expected = pd.DataFrame( { - "algo1": [1, 5], - "algo2": [3, np.inf], + "algo1": [1.0, 5], + "algo2": [3.0, np.inf], }, index=pd.Index(["prob1", "prob2"], name="problem"), ) @@ -95,8 +95,8 @@ def test_create_solution_times_n_batches(): ) expected = pd.DataFrame( { - "algo1": [1, 1], - "algo2": [2, np.inf], + "algo1": [1.0, 1], + "algo2": [2.0, np.inf], }, index=pd.Index(["prob1", "prob2"], name="problem"), ) @@ -131,8 +131,8 @@ def test_create_solution_times_walltime(): ) expected = pd.DataFrame( { - "algo1": [1, 5], - "algo2": [3, np.inf], + "algo1": [1.0, 5], + "algo2": [3.0, np.inf], }, index=pd.Index(["prob1", "prob2"], name="problem"), )