Assorted backports for 2.2.x (#59785)

* Backport PR #59065: ENH: Fix Python 3.13 test failures & enable CI * Remove deprecated plot_date calls (#58484) * Remove deprecated plot_date calls These were deprecated in Matplotlib 3.9. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit c9bc480) * Pick out fastparquet xfails for green CI * pin pytz to fix test_arrays.py * more workflow tweaks for pytz and Python 3.13 * fix typing and tune tests for copy on write * remove WASM stuff * more arm skips * go for green --------- Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> Co-authored-by: Elliott Sales de Andrade <quantum.analyst@gmail.com>
pandas-dev · Sep 18, 2024 · f7b6378 · f7b6378
1 parent 2127b42
commit f7b6378
Show file tree

Hide file tree

Showing 28 changed files with 83 additions and 69 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -14,10 +14,10 @@ jobs:
     steps:
       - checkout
       - run: .circleci/setup_env.sh
-      - run: >
-          PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
-          LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD
+      - run: |
           sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
+          PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
+          LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
           ci/run_tests.sh
   linux-musl:
     docker:
@@ -35,7 +35,7 @@ jobs:
           /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
-          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
+          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil "pytz<2024.2" pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
           python -m pip list --no-cache-dir
       - run: |

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -257,7 +257,7 @@ jobs:
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
-          python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
+          python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil "pytz<2024.2" pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
           python -m pip list --no-cache-dir
           export PANDAS_CI=1
@@ -295,7 +295,7 @@ jobs:
           /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
-          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
+          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil "pytz<2024.2" pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
           python -m pip list --no-cache-dir
 
@@ -329,7 +329,7 @@ jobs:
     #    To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
     #    to the corresponding posix/windows-macos/sdist etc. workflows.
     # Feel free to modify this comment as necessary.
-    if: false # Uncomment this to freeze the workflow, comment it to unfreeze
+    # if: false # Uncomment this to freeze the workflow, comment it to unfreeze
     defaults:
       run:
         shell: bash -eou pipefail {0}
@@ -361,15 +361,15 @@ jobs:
       - name: Set up Python Dev Version
         uses: actions/setup-python@v5
         with:
-          python-version: '3.12-dev'
+          python-version: '3.13-dev'
 
       - name: Build Environment
         run: |
           python --version
           python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
           python -m pip install versioneer[toml]
-          python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
+          python -m pip install python-dateutil "pytz<2024.2" tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
           python -m pip install -ve . --no-build-isolation --no-index --no-deps --config-settings=setup-args="--werror"
           python -m pip list
 

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -170,13 +170,13 @@ jobs:
         shell: pwsh
         run: |
           $TST_CMD = @"
-          python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0;
+          python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytz<2024.2;
           python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
           python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`';
           "@
           # add rc to the end of the image name if the Python version is unreleased
-          docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
-          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
+          docker pull python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
+          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.13' && '3.13-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
 
       - uses: actions/upload-artifact@v4
         with:

diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
@@ -20,7 +20,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml
@@ -22,7 +22,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml
@@ -21,7 +21,8 @@ dependencies:
 
   # pandas dependencies
   - python-dateutil
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
   - pip
 
   - pip:

diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml
@@ -19,7 +19,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy<2
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
   - pip
 
   - pip:

diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml
@@ -20,7 +20,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml
@@ -20,7 +20,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
@@ -20,7 +20,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz<2024.2
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml
@@ -22,6 +22,7 @@ dependencies:
   # required
   - numpy
   - python-dateutil
+  # pytz 2024.2 timezones cause wrong results
   - pytz
   - pip:
     - tzdata>=2022.7
diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml
@@ -21,7 +21,8 @@ dependencies:
   # required dependencies
   - python-dateutil
   - numpy
-  - pytz
+  # pytz 2024.2 timezones cause wrong results
+  - pytz < 2024.2
 
   # optional dependencies
   - beautifulsoup4>=4.11.2

diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c
@@ -410,8 +410,8 @@ static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
   npyarr->type_num = PyArray_DESCR(obj)->type_num;
 
   if (GET_TC(tc)->transpose) {
-    npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
-    npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
+    npyarr->dim = PyArray_DIM(obj, (int)npyarr->ndim);
+    npyarr->stride = PyArray_STRIDE(obj, (int)npyarr->ndim);
     npyarr->stridedim = npyarr->ndim;
     npyarr->index[npyarr->ndim] = 0;
     npyarr->inc = -1;
@@ -452,8 +452,8 @@ static void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
     return;
   }
   const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array;
-  npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim);
-  npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim);
+  npyarr->dim = PyArray_DIM(arrayobj, (int)npyarr->stridedim);
+  npyarr->stride = PyArray_STRIDE(arrayobj, (int)npyarr->stridedim);
   npyarr->dataptr += npyarr->stride;
 
   NpyArr_freeItemValue(obj, tc);
@@ -524,8 +524,8 @@ static int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) {
   }
   const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array;
 
-  npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim);
-  npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim);
+  npyarr->dim = PyArray_DIM(arrayobj, (int)npyarr->stridedim);
+  npyarr->stride = PyArray_STRIDE(arrayobj, (int)npyarr->stridedim);
   npyarr->index[npyarr->stridedim] = 0;
 
   ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -4960,7 +4960,12 @@ cpdef to_offset(freq, bint is_period=False):
     if result is None:
         raise ValueError(INVALID_FREQ_ERR_MSG.format(freq))
 
-    if is_period and not hasattr(result, "_period_dtype_code"):
+    try:
+        has_period_dtype_code = hasattr(result, "_period_dtype_code")
+    except ValueError:
+        has_period_dtype_code = False
+
+    if is_period and not has_period_dtype_code:
         if isinstance(freq, str):
             raise ValueError(f"{result.name} is not supported as period frequency")
         else:

diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
@@ -11,7 +11,7 @@
 from pandas.util._exceptions import find_stack_level
 
 if TYPE_CHECKING:
-    import google.auth
+    from google.auth.credentials import Credentials
 
     from pandas import DataFrame
 
@@ -37,7 +37,7 @@ def read_gbq(
     dialect: str | None = None,
     location: str | None = None,
     configuration: dict[str, Any] | None = None,
-    credentials: google.auth.credentials.Credentials | None = None,
+    credentials: Credentials | None = None,
     use_bqstorage_api: bool | None = None,
     max_results: int | None = None,
     progress_bar_type: str | None = None,
@@ -230,7 +230,7 @@ def to_gbq(
     table_schema: list[dict[str, str]] | None = None,
     location: str | None = None,
     progress_bar: bool = True,
-    credentials: google.auth.credentials.Credentials | None = None,
+    credentials: Credentials | None = None,
 ) -> None:
     warnings.warn(
         "to_gbq is deprecated and will be removed in a future version. "

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2816,7 +2816,9 @@ def test_rolling_wrong_param_min_period():
     test_df = DataFrame([name_l, val_l]).T
     test_df.columns = ["name", "val"]
 
-    result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'"
+    result_error_msg = (
+        r"^[a-zA-Z._]*\(\) got an unexpected keyword argument 'min_period'"
+    )
     with pytest.raises(TypeError, match=result_error_msg):
         test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()
 

diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py
@@ -190,7 +190,6 @@ def test_construction_overflow(self):
         expected = (50 + np.iinfo(np.int64).max) / 2
         assert result == expected
 
-    @pytest.mark.xfail(not IS64, reason="GH 23440")
     @pytest.mark.parametrize(
         "left, right, expected",
         [

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -452,6 +452,7 @@ def test_sort_values_invalid_na_position(index_with_missing, na_position):
         index_with_missing.sort_values(na_position=na_position)
 
 
+@pytest.mark.fails_arm_wheels
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
 @pytest.mark.parametrize("na_position", ["first", "last"])
 def test_sort_values_with_missing(index_with_missing, na_position, request):

diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py
@@ -2,7 +2,6 @@
 import pytest
 
 from pandas._libs import index as libindex
-from pandas.compat import IS64
 
 import pandas as pd
 from pandas import (
@@ -210,7 +209,6 @@ def test_mi_intervalindex_slicing_with_scalar(self):
         expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(not IS64, reason="GH 23440")
     @pytest.mark.parametrize(
         "base",
         [101, 1010],

diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import IS64
-
 from pandas import (
     Index,
     Interval,
@@ -211,7 +209,6 @@ def test_loc_getitem_missing_key_error_message(
             obj.loc[[4, 5, 6]]
 
 
-@pytest.mark.xfail(not IS64, reason="GH 23440")
 @pytest.mark.parametrize(
     "intervals",
     [

diff --git a/pandas/tests/io/parser/test_dialect.py b/pandas/tests/io/parser/test_dialect.py
@@ -26,7 +26,7 @@ def custom_dialect():
         "escapechar": "~",
         "delimiter": ":",
         "skipinitialspace": False,
-        "quotechar": "~",
+        "quotechar": "`",
         "quoting": 3,
     }
     return dialect_name, dialect_kwargs

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -485,7 +485,10 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
                 df.to_csv(path, compression=compression_, encoding=encoding)
 
             # reading should fail (otherwise we wouldn't need the warning)
-            msg = r"UTF-\d+ stream does not start with BOM"
+            msg = (
+                r"UTF-\d+ stream does not start with BOM|"
+                r"'utf-\d+' codec can't decode byte"
+            )
             with pytest.raises(UnicodeError, match=msg):
                 pd.read_csv(path, compression=compression_, encoding=encoding)
 

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -16,7 +16,6 @@
     pa_version_under11p0,
     pa_version_under13p0,
     pa_version_under15p0,
-    pa_version_under17p0,
 )
 
 import pandas as pd
@@ -449,12 +448,8 @@ def test_read_filters(self, engine, tmp_path):
             repeat=1,
         )
 
-    def test_write_index(self, engine, using_copy_on_write, request):
+    def test_write_index(self, engine):
         check_names = engine != "fastparquet"
-        if using_copy_on_write and engine == "fastparquet":
-            request.applymarker(
-                pytest.mark.xfail(reason="fastparquet write into index")
-            )
 
         df = pd.DataFrame({"A": [1, 2, 3]})
         check_round_trip(df, engine)
@@ -1064,9 +1059,6 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa):
             expected=expected,
         )
 
-    @pytest.mark.xfail(
-        pa_version_under17p0, reason="pa.pandas_compat passes 'datetime64' to .astype"
-    )
     def test_columns_dtypes_not_invalid(self, pa):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
 
@@ -1314,7 +1306,10 @@ def test_empty_dataframe(self, fp):
         expected = df.copy()
         check_round_trip(df, fp, expected=expected)
 
-    @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
+    @pytest.mark.xfail(
+        _HAVE_FASTPARQUET and Version(fastparquet.__version__) > Version("2022.12"),
+        reason="fastparquet bug, see https://github.com/dask/fastparquet/issues/929",
+    )
     def test_timezone_aware_index(self, fp, timezone_aware_date_list):
         idx = 5 * [timezone_aware_date_list]
 

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -1044,7 +1044,7 @@ def test_utf16_encoding(xml_baby_names, parser):
         UnicodeError,
         match=(
             "UTF-16 stream does not start with BOM|"
-            "'utf-16-le' codec can't decode byte"
+            "'utf-16(-le)?' codec can't decode byte"
         ),
     ):
         read_xml(xml_baby_names, encoding="UTF-16", parser=parser)