From 0a18ad7eb63ba055e63cd566084b0d970d300553 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Wed, 10 May 2023 18:01:48 -0700 Subject: [PATCH] API/BUG: Make `to_json` `index=` arg consistent with `orient` arg (#52143) * API/BUG: Make to_json index= consistent with orient - split and table allow index=True/False - records and values only allow index=False - index and columns only allow index=True - raise for contradictions in the latter two - see #25513 * style: lint * style: make mypy happy * review: simplify * review: clarify and consolidate branches * style: add explainer comment * doc: change error message in _json * docs: update whatsnew 2.1.0 * docs: sort whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/generic.py | 11 ++++++----- pandas/io/json/_json.py | 19 ++++++++++++++----- pandas/tests/io/json/test_pandas.py | 23 ++++++++++++++++++++--- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e3bfd0cf4150f..5b62883c2741e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -92,13 +92,13 @@ Other enhancements - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide ` (:issue:`48347`) - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`) - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) +- Improved error handling when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) -- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 017fb44413c8f..93fecc4a7b096 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2306,7 +2306,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool_t = False, compression: CompressionOptions = "infer", - index: bool_t = True, + index: bool_t | None = None, indent: int | None = None, storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", @@ -2375,10 +2375,11 @@ def to_json( .. versionchanged:: 1.4.0 Zstandard support. - index : bool, default True - Whether to include the index values in the JSON string. Not - including the index (``index=False``) is only supported when - orient is 'split' or 'table'. + index : bool or None, default None + The index is only used when 'orient' is 'split', 'index', 'column', + or 'table'. Of these, 'index' and 'column' do not support + `index=False`. + indent : int, optional Length of whitespace used to indent each record. diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 8775c65f140a8..5c2fba814375f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -100,7 +100,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., compression: CompressionOptions = ..., - index: bool = ..., + index: bool | None = ..., indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., @@ -120,7 +120,7 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., compression: CompressionOptions = ..., - index: bool = ..., + index: bool | None = ..., indent: int = ..., storage_options: StorageOptions = ..., mode: Literal["a", "w"] = ..., @@ -139,15 +139,24 @@ def to_json( default_handler: Callable[[Any], JSONSerializable] | None = None, lines: bool = False, compression: CompressionOptions = "infer", - index: bool = True, + index: bool | None = None, indent: int = 0, storage_options: StorageOptions = None, mode: Literal["a", "w"] = "w", ) -> str | None: - if not index and orient not in ["split", "table"]: + if orient in ["records", "values"] and index is True: raise ValueError( - "'index=False' is only valid when 'orient' is 'split' or 'table'" + "'index=True' is only valid when 'orient' is 'split', 'table', " + "'index', or 'columns'." ) + elif orient in ["index", "columns"] and index is False: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split', 'table', " + "'records', or 'values'." + ) + elif index is None: + # will be ignored for orient='records' and 'values' + index = True if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 788a6e97e3d0f..e93cd836fa307 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1472,17 +1472,34 @@ def test_index_false_to_json_table(self, data): assert result == expected - @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"]) + @pytest.mark.parametrize("orient", ["index", "columns"]) def test_index_false_error_to_json(self, orient): - # GH 17394 + # GH 17394, 25513 # Testing error message from to_json with index=False df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) - msg = "'index=False' is only valid when 'orient' is 'split' or 'table'" + msg = ( + "'index=False' is only valid when 'orient' is 'split', " + "'table', 'records', or 'values'" + ) with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=False) + @pytest.mark.parametrize("orient", ["records", "values"]) + def test_index_true_error_to_json(self, orient): + # GH 25513 + # Testing error message from to_json with index=True + + df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) + + msg = ( + "'index=True' is only valid when 'orient' is 'split', " + "'table', 'index', or 'columns'" + ) + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient, index=True) + @pytest.mark.parametrize("orient", ["split", "table"]) @pytest.mark.parametrize("index", [True, False]) def test_index_false_from_json_to_json(self, orient, index):