Skip to content

Commit

Permalink
Merge branch 'main' into bug-cut
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Sep 18, 2023
2 parents 9975a42 + 1496630 commit ee3c4b2
Show file tree
Hide file tree
Showing 62 changed files with 935 additions and 283 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ jobs:
run: |
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.0.1 meson-python==0.13.1
python -m pip install --no-cache-dir versioneer[toml] "cython<3.0.1" numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip list --no-cache-dir
export PANDAS_CI=1
Expand Down Expand Up @@ -271,8 +271,8 @@ jobs:
run: |
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
. ~/virtualenvs/pandas-dev/bin/activate
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.0.1
python -m pip install --no-cache-dir versioneer[toml] "cython<3.0.1" numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
python -m pip install --no-cache-dir --no-build-isolation -e .
python -m pip list --no-cache-dir
Expand Down Expand Up @@ -342,10 +342,10 @@ jobs:
- name: Build Environment
run: |
python --version
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
python -m pip install versioneer[toml]
python -m pip install python-dateutil pytz tzdata "cython<3.0.1" hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip install -ve . --no-build-isolation --no-index
python -m pip list
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-311-numpydev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ dependencies:

# build dependencies
- versioneer[toml]
- meson[ninja]=1.0.1
- meson[ninja]=1.2.1
- meson-python=0.13.1
- cython>=0.29.33

# test dependencies
- pytest>=7.3.2
Expand All @@ -25,7 +26,6 @@ dependencies:
- pip

- pip:
- "cython<3.0.1"
- "--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
- "--pre"
- "numpy"
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- meson[ninja]=1.0.1
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.2.1
- cython>=0.29.33
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/actions-pypy-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=0.29.33, <3.0.1
- meson[ninja]=1.0.1
- cython>=0.29.33
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down
23 changes: 23 additions & 0 deletions doc/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,29 @@ Sparse-dtype specific methods and attributes are provided under the
Series.sparse.from_coo
Series.sparse.to_coo


.. _api.series.struct:

Struct accessor
~~~~~~~~~~~~~~~

Arrow struct-dtype specific methods and attributes are provided under the
``Series.struct`` accessor.

.. autosummary::
:toctree: api/
:template: autosummary/accessor_attribute.rst

Series.struct.dtypes

.. autosummary::
:toctree: api/
:template: autosummary/accessor_method.rst

Series.struct.field
Series.struct.explode


.. _api.series.flags:

Flags
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/gotchas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ constructors using something similar to the following:
.. ipython:: python
x = np.array(list(range(10)), ">i4") # big endian
newx = x.byteswap().newbyteorder() # force native byteorder
newx = x.byteswap().view(x.dtype.newbyteorder()) # force native byteorder
s = pd.Series(newx)
See `the NumPy documentation on byte order
Expand Down
6 changes: 3 additions & 3 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ Limit the number of NA values filled
df.ffill(limit=1)
NA values can be replaced with corresponding value from a :class:`Series`` or :class:`DataFrame``
NA values can be replaced with corresponding value from a :class:`Series` or :class:`DataFrame`
where the index and column aligns between the original object and the filled object.

.. ipython:: python
Expand Down Expand Up @@ -660,7 +660,7 @@ Pass a list of regular expressions that will replace matches with a scalar.

.. ipython:: python
df.replace([r"\s*\.\s*", r"a|b"], np.nan, regex=True)
df.replace([r"\s*\.\s*", r"a|b"], "placeholder", regex=True)
All of the regular expression examples can also be passed with the
``to_replace`` argument as the ``regex`` argument. In this case the ``value``
Expand All @@ -669,7 +669,7 @@ dictionary.

.. ipython:: python
df.replace(regex=[r"\s*\.\s*", r"a|b"], value=np.nan)
df.replace(regex=[r"\s*\.\s*", r"a|b"], value="placeholder")
.. note::

Expand Down
38 changes: 35 additions & 3 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,34 @@ There are two advantages of this engine:
For more, see :ref:`io.calamine` in the user guide on IO tools.

.. _whatsnew_220.enhancements.struct_accessor:

Series.struct accessor to with PyArrow structured data
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``Series.struct`` accessor provides attributes and methods for processing
data with ``struct[pyarrow]`` dtype Series. For example,
:meth:`Series.struct.explode` converts PyArrow structured data to a pandas
DataFrame. (:issue:`54938`)

.. ipython:: python
import pyarrow as pa
series = pd.Series(
[
{"project": "pandas", "version": "2.2.0"},
{"project": "numpy", "version": "1.25.2"},
{"project": "pyarrow", "version": "13.0.0"},
],
dtype=pd.ArrowDtype(
pa.struct([
("project", pa.string()),
("version", pa.string()),
])
),
)
series.struct.explode()
.. _whatsnew_220.enhancements.enhancement2:

enhancement2
Expand Down Expand Up @@ -162,13 +190,15 @@ Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
- Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)

.. ---------------------------------------------------------------------------
.. _whatsnew_220.performance:
Expand All @@ -177,6 +207,7 @@ Performance improvements
~~~~~~~~~~~~~~~~~~~~~~~~
- Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
- Performance improvement in :meth:`DataFrame.groupby` when aggregating pyarrow timestamp and duration dtypes (:issue:`55031`)
- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
- Performance improvement in :meth:`Index.difference` (:issue:`55108`)
- Performance improvement when indexing with more than 4 keys (:issue:`54550`)
Expand Down Expand Up @@ -228,13 +259,14 @@ Strings

Interval
^^^^^^^^
- Bug in :class:`Interval` ``__repr__`` not displaying UTC offsets for :class:`Timestamp` bounds. Additionally the hour, minute and second components will now be shown. (:issue:`55015`)
- Bug in :meth:`IntervalIndex.get_indexer` with datetime or timedelta intervals incorrectly matching on integer targets (:issue:`47772`)
- Bug in :meth:`IntervalIndex.get_indexer` with timezone-aware datetime intervals incorrectly matching on a sequence of timezone-naive targets (:issue:`47772`)
-

Indexing
^^^^^^^^
-
- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
-

Missing
Expand All @@ -260,7 +292,7 @@ Period

Plotting
^^^^^^^^
-
- Bug in :meth:`DataFrame.plot.box` with ``vert=False`` and a matplotlib ``Axes`` created with ``sharey=True`` (:issue:`54941`)
-

Groupby/resample/rolling
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:
# build dependencies
- versioneer[toml]
- cython=0.29.33
- meson[ninja]=1.0.1
- meson[ninja]=1.2.1
- meson-python=0.13.1

# test dependencies
Expand Down Expand Up @@ -85,7 +85,7 @@ dependencies:
- google-auth
- natsort # DataFrame.sort_values doctest
- numpydoc
- pydata-sphinx-theme
- pydata-sphinx-theme=0.13
- pytest-cython # doctest
- sphinx
- sphinx-design
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ project(
'c', 'cpp', 'cython',
version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(),
license: 'BSD-3',
meson_version: '>=1.0.1',
meson_version: '>=1.2.1',
default_options: [
'buildtype=release',
'c_std=c99'
Expand Down
11 changes: 8 additions & 3 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,11 @@ cdef class BlockValuesRefs:
else:
self.referenced_blocks = []

def _clear_dead_references(self) -> None:
self.referenced_blocks = [
ref for ref in self.referenced_blocks if ref() is not None
]

def add_reference(self, blk: Block) -> None:
"""Adds a new reference to our reference collection.

Expand All @@ -905,6 +910,7 @@ cdef class BlockValuesRefs:
blk : Block
The block that the new references should point to.
"""
self._clear_dead_references()
self.referenced_blocks.append(weakref.ref(blk))

def add_index_reference(self, index: object) -> None:
Expand All @@ -915,6 +921,7 @@ cdef class BlockValuesRefs:
index : Index
The index that the new reference should point to.
"""
self._clear_dead_references()
self.referenced_blocks.append(weakref.ref(index))

def has_reference(self) -> bool:
Expand All @@ -927,8 +934,6 @@ cdef class BlockValuesRefs:
-------
bool
"""
self.referenced_blocks = [
ref for ref in self.referenced_blocks if ref() is not None
]
self._clear_dead_references()
# Checking for more references than block pointing to itself
return len(self.referenced_blocks) > 1
21 changes: 3 additions & 18 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -478,31 +478,16 @@ cdef class Interval(IntervalMixin):
args = (self.left, self.right, self.closed)
return (type(self), args)

def _repr_base(self):
left = self.left
right = self.right

# TODO: need more general formatting methodology here
if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
left = left._short_repr
right = right._short_repr

return left, right

def __repr__(self) -> str:

left, right = self._repr_base()
disp = str if isinstance(left, np.generic) else repr
disp = str if isinstance(self.left, (np.generic, _Timestamp)) else repr
name = type(self).__name__
repr_str = f"{name}({disp(left)}, {disp(right)}, closed={repr(self.closed)})"
repr_str = f"{name}({disp(self.left)}, {disp(self.right)}, closed={repr(self.closed)})" # noqa: E501
return repr_str

def __str__(self) -> str:

left, right = self._repr_base()
start_symbol = "[" if self.closed_left else "("
end_symbol = "]" if self.closed_right else ")"
return f"{start_symbol}{left}, {right}{end_symbol}"
return f"{start_symbol}{self.left}, {self.right}{end_symbol}"

def __add__(self, y):
if (
Expand Down
Loading

0 comments on commit ee3c4b2

Please sign in to comment.