Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST (string dtype): fix IO dtype_backend tests for storage of str dtype of columns' Index #59509

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,9 @@ def test_dtype_backend_string(self, read_ext, string_storage, tmp_excel):
"b": Series(["x", None], dtype=pd.StringDtype(string_storage)),
}
)
tm.assert_frame_equal(result, expected)
# the storage of the str columns' Index is also affected by the
# string_storage setting -> ignore that for checking the result
tm.assert_frame_equal(result, expected, check_column_type=False)
Comment on lines -715 to +717
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes the tests to essentially just ignore the issue.

We could also verify it explicitly (although it's not the main thing that this test is meant to test) by doing something like:

if using_infer_string:
    expected.columns = expected.columns.astype(pd.StringDtype(string_storage, na_value=np.nan))

Which, now I comment that here, is maybe also perfectly fine (not too verbose, compared to the comment I had to add :))


@pytest.mark.parametrize("dtypes, exp_value", [({}, 1), ({"a.1": "int64"}, 1)])
def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2194,7 +2194,9 @@ def test_read_json_dtype_backend(
if orient == "values":
expected.columns = list(range(8))

tm.assert_frame_equal(result, expected)
# the storage of the str columns' Index is also affected by the
# string_storage setting -> ignore that for checking the result
tm.assert_frame_equal(result, expected, check_column_type=False)

@pytest.mark.parametrize("orient", ["split", "records", "index"])
def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/io/parser/dtypes/test_dtypes_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,12 +470,12 @@ def test_dtype_backend_string(all_parsers, string_storage):
"""
result = parser.read_csv(StringIO(data), dtype_backend="numpy_nullable")

expected = DataFrame(
{
"a": pd.array(["a", "b"], dtype=pd.StringDtype(string_storage)),
"b": pd.array(["x", pd.NA], dtype=pd.StringDtype(string_storage)),
}
)
expected = DataFrame(
{
"a": pd.array(["a", "b"], dtype=pd.StringDtype(string_storage)),
"b": pd.array(["x", pd.NA], dtype=pd.StringDtype(string_storage)),
},
)
Comment on lines +473 to +478
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What this is changing is that the expected result is also created in the with pd.option_context("mode.string_storage", string_storage): context. What this actually changes is that when future.infer_string is enabled, that affects the storage used by the Index object with str dtype used for the columns of the resulting dataframe.

tm.assert_frame_equal(result, expected)


Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/parser/test_read_fwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,9 @@ def test_dtype_backend(string_storage, dtype_backend):
)
expected["i"] = ArrowExtensionArray(pa.array([None, None]))

tm.assert_frame_equal(result, expected)
# the storage of the str columns' Index is also affected by the
# string_storage setting -> ignore that for checking the result
tm.assert_frame_equal(result, expected, check_column_type=False)


def test_invalid_dtype_backend():
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ def test_dtype_backend(self, string_storage, dtype_backend, flavor_read_html):
}
)

tm.assert_frame_equal(result, expected)
# the storage of the str columns' Index is also affected by the
# string_storage setting -> ignore that for checking the result
tm.assert_frame_equal(result, expected, check_column_type=False)

@pytest.mark.network
@pytest.mark.single_cpu
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/xml/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2050,7 +2050,9 @@ def test_read_xml_nullable_dtypes(
)
expected["g"] = ArrowExtensionArray(pa.array([None, None]))

tm.assert_frame_equal(result, expected)
# the storage of the str columns' Index is also affected by the
# string_storage setting -> ignore that for checking the result
tm.assert_frame_equal(result, expected, check_column_type=False)


def test_invalid_dtype_backend():
Expand Down