From 0a18ad7eb63ba055e63cd566084b0d970d300553 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dmtryshmtv@gmail.com>
Date: Wed, 10 May 2023 18:01:48 -0700
Subject: [PATCH] API/BUG: Make `to_json` `index=` arg consistent with `orient`
 arg (#52143)

* API/BUG: Make to_json index= consistent with orient
- split and table allow index=True/False
- records and values only allow index=False
- index and columns only allow index=True
- raise for contradictions in the latter two
- see #25513

* style: lint

* style: make mypy happy

* review: simplify

* review: clarify and consolidate branches

* style: add explainer comment

* doc: change error message in _json

* docs: update whatsnew 2.1.0

* docs: sort whatsnew
---
 doc/source/whatsnew/v2.1.0.rst      |  2 +-
 pandas/core/generic.py              | 11 ++++++-----
 pandas/io/json/_json.py             | 19 ++++++++++++++-----
 pandas/tests/io/json/test_pandas.py | 23 ++++++++++++++++++++---
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index e3bfd0cf4150f..5b62883c2741e 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -92,13 +92,13 @@ Other enhancements
 - Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
 - Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
 - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`)
+- Improved error handling when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`)
 - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
 - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
 - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
 - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
 - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
 - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 017fb44413c8f..93fecc4a7b096 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2306,7 +2306,7 @@ def to_json(
         default_handler: Callable[[Any], JSONSerializable] | None = None,
         lines: bool_t = False,
         compression: CompressionOptions = "infer",
-        index: bool_t = True,
+        index: bool_t | None = None,
         indent: int | None = None,
         storage_options: StorageOptions = None,
         mode: Literal["a", "w"] = "w",
@@ -2375,10 +2375,11 @@ def to_json(
 
             .. versionchanged:: 1.4.0 Zstandard support.
 
-        index : bool, default True
-            Whether to include the index values in the JSON string. Not
-            including the index (``index=False``) is only supported when
-            orient is 'split' or 'table'.
+        index : bool or None, default None
+            The index is only used when 'orient' is 'split', 'index', 'column',
+            or 'table'. Of these, 'index' and 'column' do not support
+            `index=False`.
+
         indent : int, optional
            Length of whitespace used to indent each record.
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 8775c65f140a8..5c2fba814375f 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -100,7 +100,7 @@ def to_json(
     default_handler: Callable[[Any], JSONSerializable] | None = ...,
     lines: bool = ...,
     compression: CompressionOptions = ...,
-    index: bool = ...,
+    index: bool | None = ...,
     indent: int = ...,
     storage_options: StorageOptions = ...,
     mode: Literal["a", "w"] = ...,
@@ -120,7 +120,7 @@ def to_json(
     default_handler: Callable[[Any], JSONSerializable] | None = ...,
     lines: bool = ...,
     compression: CompressionOptions = ...,
-    index: bool = ...,
+    index: bool | None = ...,
     indent: int = ...,
     storage_options: StorageOptions = ...,
     mode: Literal["a", "w"] = ...,
@@ -139,15 +139,24 @@ def to_json(
     default_handler: Callable[[Any], JSONSerializable] | None = None,
     lines: bool = False,
     compression: CompressionOptions = "infer",
-    index: bool = True,
+    index: bool | None = None,
     indent: int = 0,
     storage_options: StorageOptions = None,
     mode: Literal["a", "w"] = "w",
 ) -> str | None:
-    if not index and orient not in ["split", "table"]:
+    if orient in ["records", "values"] and index is True:
         raise ValueError(
-            "'index=False' is only valid when 'orient' is 'split' or 'table'"
+            "'index=True' is only valid when 'orient' is 'split', 'table', "
+            "'index', or 'columns'."
         )
+    elif orient in ["index", "columns"] and index is False:
+        raise ValueError(
+            "'index=False' is only valid when 'orient' is 'split', 'table', "
+            "'records', or 'values'."
+        )
+    elif index is None:
+        # will be ignored for orient='records' and 'values'
+        index = True
 
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 788a6e97e3d0f..e93cd836fa307 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1472,17 +1472,34 @@ def test_index_false_to_json_table(self, data):
 
         assert result == expected
 
-    @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"])
+    @pytest.mark.parametrize("orient", ["index", "columns"])
     def test_index_false_error_to_json(self, orient):
-        # GH 17394
+        # GH 17394, 25513
         # Testing error message from to_json with index=False
 
         df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
 
-        msg = "'index=False' is only valid when 'orient' is 'split' or 'table'"
+        msg = (
+            "'index=False' is only valid when 'orient' is 'split', "
+            "'table', 'records', or 'values'"
+        )
         with pytest.raises(ValueError, match=msg):
             df.to_json(orient=orient, index=False)
 
+    @pytest.mark.parametrize("orient", ["records", "values"])
+    def test_index_true_error_to_json(self, orient):
+        # GH 25513
+        # Testing error message from to_json with index=True
+
+        df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
+
+        msg = (
+            "'index=True' is only valid when 'orient' is 'split', "
+            "'table', 'index', or 'columns'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(orient=orient, index=True)
+
     @pytest.mark.parametrize("orient", ["split", "table"])
     @pytest.mark.parametrize("index", [True, False])
     def test_index_false_from_json_to_json(self, orient, index):