Skip to content

Commit

Permalink
Merge branch 'main' into ref-format
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Oct 10, 2023
2 parents d4d91df + 1025151 commit 072db8f
Show file tree
Hide file tree
Showing 12 changed files with 62 additions and 57 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Fixed regressions
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
- Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
- Fixed regression in :meth:`DataFrame.resample` which was extrapolating back to ``origin`` when ``origin`` was outside its bounds (:issue:`55064`)
- Fixed regression in :meth:`DataFrame.sort_index` which was not sorting correctly when the index was a sliced :class:`MultiIndex` (:issue:`55379`)

.. ---------------------------------------------------------------------------
.. _whatsnew_212.bug_fixes:
Expand Down
43 changes: 4 additions & 39 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

import numpy as np

from pandas._config import get_option

from pandas._libs import lib
from pandas._libs.interval import (
VALID_CLOSED,
Expand Down Expand Up @@ -1233,43 +1231,10 @@ def value_counts(self, dropna: bool = True) -> Series:
# ---------------------------------------------------------------------
# Rendering Methods

def _format_data(self) -> str:
# TODO: integrate with categorical and make generic
n = len(self)
max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)

formatter = str

if n == 0:
summary = "[]"
elif n == 1:
first = formatter(self[0])
summary = f"[{first}]"
elif n == 2:
first = formatter(self[0])
last = formatter(self[-1])
summary = f"[{first}, {last}]"
else:
if n > max_seq_items:
n = min(max_seq_items // 2, 10)
head = [formatter(x) for x in self[:n]]
tail = [formatter(x) for x in self[-n:]]
head_str = ", ".join(head)
tail_str = ", ".join(tail)
summary = f"[{head_str} ... {tail_str}]"
else:
tail = [formatter(x) for x in self]
tail_str = ", ".join(tail)
summary = f"[{tail_str}]"

return summary

def __repr__(self) -> str:
data = self._format_data()
class_name = f"<{type(self).__name__}>\n"

template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
return template
def _formatter(self, boxed: bool = False):
# returning 'str' here causes us to render as e.g. "(0, 1]" instead of
# "Interval(0, 1, closed='right')"
return str

# ---------------------------------------------------------------------
# Vectorized Interval Properties/Attributes
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ def __init__(self) -> None:
# with a label, but the underlying variable is -127 to 100
# we're going to drop the label and cast to int
self.DTYPE_MAP = dict(
list(zip(range(1, 245), [np.dtype("a" + str(i)) for i in range(1, 245)]))
[(i, np.dtype(f"S{i}")) for i in range(1, 245)]
+ [
(251, np.dtype(np.int8)),
(252, np.dtype(np.int16)),
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_frame_from_records_utc(self):

def test_from_records_to_records(self):
# from numpy documentation
arr = np.zeros((2,), dtype=("i4,f4,a10"))
arr = np.zeros((2,), dtype=("i4,f4,S10"))
arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]

DataFrame.from_records(arr)
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/frame/methods/test_select_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,7 @@ def test_select_dtypes_datetime_with_tz(self):
expected = df3.reindex(columns=[])
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"dtype", [str, "str", np.bytes_, "S1", "unicode", np.str_, "U1"]
)
@pytest.mark.parametrize("dtype", [str, "str", np.bytes_, "S1", np.str_, "U1"])
@pytest.mark.parametrize("arg", ["include", "exclude"])
def test_select_dtypes_str_raises(self, dtype, arg):
df = DataFrame(
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/frame/methods/test_sort_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,3 +955,42 @@ def test_sort_index_multiindex_sort_remaining(self, ascending):
)

tm.assert_frame_equal(result, expected)


def test_sort_index_with_sliced_multiindex():
# GH 55379
mi = MultiIndex.from_tuples(
[
("a", "10"),
("a", "18"),
("a", "25"),
("b", "16"),
("b", "26"),
("a", "45"),
("b", "28"),
("a", "5"),
("a", "50"),
("a", "51"),
("b", "4"),
],
names=["group", "str"],
)

df = DataFrame({"x": range(len(mi))}, index=mi)
result = df.iloc[0:6].sort_index()

expected = DataFrame(
{"x": [0, 1, 2, 5, 3, 4]},
index=MultiIndex.from_tuples(
[
("a", "10"),
("a", "18"),
("a", "25"),
("a", "45"),
("b", "16"),
("b", "26"),
],
names=["group", "str"],
),
)
tm.assert_frame_equal(result, expected)
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_to_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def test_to_records_with_categorical(self):
),
# Pass in a dtype instance.
(
{"column_dtypes": np.dtype("unicode")},
{"column_dtypes": np.dtype(np.str_)},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/indexes/base_class/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,23 +182,23 @@ def test_symmetric_difference(self):
"intersection",
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
dtype=[("num", int), ("let", "S1")],
),
False,
),
(
"intersection",
np.array(
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
dtype=[("num", int), ("let", "S1")],
),
None,
),
(
"union",
np.array(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
dtype=[("num", int), ("let", "S1")],
),
None,
),
Expand All @@ -208,13 +208,13 @@ def test_tuple_union_bug(self, method, expected, sort):
index1 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
dtype=[("num", int), ("let", "S1")],
)
)
index2 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
dtype=[("num", int), ("let", "S1")],
)
)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def test_blocks_compat_GH9037(self):
)

# JSON deserialisation always creates unicode strings
df_mixed.columns = df_mixed.columns.astype("unicode")
df_mixed.columns = df_mixed.columns.astype(np.str_)
data = StringIO(df_mixed.to_json(orient="split"))
df_roundtrip = read_json(data, orient="split")
tm.assert_frame_equal(
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,16 +303,18 @@ def test_store_dropna(tmp_path, setup_path):
tm.assert_frame_equal(df_without_missing, reloaded)


def test_keyword_deprecation():
def test_keyword_deprecation(tmp_path, setup_path):
# GH 54229
path = tmp_path / setup_path

msg = (
"Starting with pandas version 3.0 all arguments of to_hdf except for the "
"argument 'path_or_buf' will be keyword-only."
)
df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}])

with tm.assert_produces_warning(FutureWarning, match=msg):
df.to_hdf("example", "key")
df.to_hdf(path, "key")


def test_to_hdf_with_min_itemsize(tmp_path, setup_path):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ def test_append_length0_frame(self, sort):
tm.assert_frame_equal(df5, expected)

def test_append_records(self):
arr1 = np.zeros((2,), dtype=("i4,f4,a10"))
arr1 = np.zeros((2,), dtype=("i4,f4,S10"))
arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]

arr2 = np.zeros((3,), dtype=("i4,f4,a10"))
arr2 = np.zeros((3,), dtype=("i4,f4,S10"))
arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")]

df1 = DataFrame(arr1)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,12 @@ def test_astype_unicode(self):
# bytes with obj.decode() instead of str(obj)
item = "野菜食べないとやばい"
ser = Series([item.encode()])
result = ser.astype("unicode")
result = ser.astype(np.str_)
expected = Series([item])
tm.assert_series_equal(result, expected)

for ser in test_series:
res = ser.astype("unicode")
res = ser.astype(np.str_)
expec = ser.map(str)
tm.assert_series_equal(res, expec)

Expand Down

0 comments on commit 072db8f

Please sign in to comment.