From e644273bad5c4b59edd16b4cb30f370f71e03b12 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 14 Apr 2023 21:29:35 -0400
Subject: [PATCH 1/3] BUG: pd.NA showing up as NaN in Categorical repr

---
 doc/source/whatsnew/v2.1.0.rst               |  1 +
 pandas/core/arrays/categorical.py            | 30 +++++++++++++++-----
 pandas/tests/arrays/categorical/test_repr.py | 17 +++++++++++
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index e860d59f2e5bd..51be96542ee38 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -272,6 +272,7 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
+- Bug in :meth:`Categorical.__repr__` and :meth:`Series.__repr`, where :class:`Categorical`'s having categories backed by a :class:`pandas.api.extensions.ExtensionDtype` had null values show up as "NaN" instead of ``ExtensionDtype.na_value`` (:issue:`52681`)
 - Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`).
 -
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index adb083c16a838..cbbcb2e43fb53 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1599,7 +1599,14 @@ def _internal_get_values(self):
         if needs_i8_conversion(self.categories.dtype):
             return self.categories.take(self._codes, fill_value=NaT)
         elif is_integer_dtype(self.categories) and -1 in self._codes:
-            return self.categories.astype("object").take(self._codes, fill_value=np.nan)
+            fill_value = self.categories.dtype.na_value
+            if is_extension_array_dtype(self.categories.dtype):
+                # Nullable integer dtype
+                # Don't astype to object
+                return self.categories.take(self._codes, fill_value=fill_value)
+            return self.categories.astype("object").take(
+                self._codes, fill_value=fill_value
+            )
         return np.array(self)
 
     def check_for_ordered(self, op) -> None:
@@ -1911,14 +1918,18 @@ def _formatter(self, boxed: bool = False):
         # Defer to CategoricalFormatter's formatter.
         return None
 
-    def _tidy_repr(self, max_vals: int = 10, footer: bool = True) -> str:
+    def _tidy_repr(
+        self, max_vals: int = 10, footer: bool = True, na_rep: str = "NaN"
+    ) -> str:
         """
         a short repr displaying only max_vals and an optional (but default
         footer)
         """
         num = max_vals // 2
-        head = self[:num]._get_repr(length=False, footer=False)
-        tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False)
+        head = self[:num]._get_repr(length=False, footer=False, na_rep=na_rep)
+        tail = self[-(max_vals - num) :]._get_repr(
+            length=False, footer=False, na_rep=na_rep
+        )
 
         result = f"{head[:-1]}, ..., {tail[1:]}"
         if footer:
@@ -2001,12 +2012,17 @@ def __repr__(self) -> str:
         String representation.
         """
         _maxlen = 10
+        na_repr = "NaN"
+        if is_extension_array_dtype(self.categories.dtype):
+            na_repr = repr(self.categories.dtype.na_value)
         if len(self._codes) > _maxlen:
-            result = self._tidy_repr(_maxlen)
+            result = self._tidy_repr(_maxlen, na_rep=na_repr)
         elif len(self._codes) > 0:
-            result = self._get_repr(length=len(self) > _maxlen)
+            result = self._get_repr(length=len(self) > _maxlen, na_rep=na_repr)
         else:
-            msg = self._get_repr(length=False, footer=True).replace("\n", ", ")
+            msg = self._get_repr(length=False, footer=True, na_rep=na_repr).replace(
+                "\n", ", "
+            )
             result = f"[], {msg}"
 
         return result
diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
index ffc44b30a3870..1ee90d53f9509 100644
--- a/pandas/tests/arrays/categorical/test_repr.py
+++ b/pandas/tests/arrays/categorical/test_repr.py
@@ -1,15 +1,19 @@
 import numpy as np
+import pytest
 
 from pandas import (
+    NA,
     Categorical,
     CategoricalDtype,
     CategoricalIndex,
     Series,
+    array,
     date_range,
     option_context,
     period_range,
     timedelta_range,
 )
+import pandas._testing as tm
 
 
 class TestCategoricalReprWithFactor:
@@ -253,6 +257,19 @@ def test_categorical_repr_int_with_nan(self):
 Categories (2, int64): [1, 2]"""
         assert repr(s) == s_exp
 
+    @pytest.mark.parametrize("values_dtype", tm.ALL_INT_EA_DTYPES)
+    def test_categorical_repr_nullable_int_NA(self, values_dtype):
+        arr = array([1, 2, np.nan], dtype=values_dtype)
+        c = Categorical(arr)
+        c_exp = f"""[1, 2, {NA}]\nCategories (2, {values_dtype}): [1, 2]"""
+        assert repr(c) == c_exp
+
+        s = Series([1, 2, np.nan], dtype=values_dtype).astype("category")
+        s_exp = """0       1\n1       2\n2    <NA>
+dtype: category
+Categories (2, Int64): [1, 2]"""
+        assert repr(s) == s_exp
+
     def test_categorical_repr_period(self):
         idx = period_range("2011-01-01 09:00", freq="H", periods=5)
         c = Categorical(idx)

From 70d9ddffaff10761cc7db93c4641ddb4247e23db Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sat, 15 Apr 2023 07:05:47 -0400
Subject: [PATCH 2/3] fix tests and address code review

---
 pandas/core/arrays/categorical.py            | 8 +++-----
 pandas/tests/arrays/categorical/test_repr.py | 4 ++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index cbbcb2e43fb53..e5628930cd2e5 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1599,14 +1599,12 @@ def _internal_get_values(self):
         if needs_i8_conversion(self.categories.dtype):
             return self.categories.take(self._codes, fill_value=NaT)
         elif is_integer_dtype(self.categories) and -1 in self._codes:
-            fill_value = self.categories.dtype.na_value
-            if is_extension_array_dtype(self.categories.dtype):
+            if isinstance(self.categories.dtype, ExtensionDtype):
                 # Nullable integer dtype
                 # Don't astype to object
+                fill_value = self.categories.dtype.na_value
                 return self.categories.take(self._codes, fill_value=fill_value)
-            return self.categories.astype("object").take(
-                self._codes, fill_value=fill_value
-            )
+            return self.categories.astype("object").take(self._codes, fill_value=np.nan)
         return np.array(self)
 
     def check_for_ordered(self, op) -> None:
diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
index 1ee90d53f9509..ffb8162b7964b 100644
--- a/pandas/tests/arrays/categorical/test_repr.py
+++ b/pandas/tests/arrays/categorical/test_repr.py
@@ -265,9 +265,9 @@ def test_categorical_repr_nullable_int_NA(self, values_dtype):
         assert repr(c) == c_exp
 
         s = Series([1, 2, np.nan], dtype=values_dtype).astype("category")
-        s_exp = """0       1\n1       2\n2    <NA>
+        s_exp = f"""0       1\n1       2\n2    <NA>
 dtype: category
-Categories (2, Int64): [1, 2]"""
+Categories (2, {values_dtype}): [1, 2]"""
         assert repr(s) == s_exp
 
     def test_categorical_repr_period(self):

From 2d8c80b81b792c1c8cd0fce7e41b5d2b83ccc2cb Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Sat, 15 Apr 2023 08:20:06 -0400
Subject: [PATCH 3/3] fix np.nan showing up as nan

---
 pandas/core/arrays/categorical.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index e5628930cd2e5..606b247442a43 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2011,8 +2011,10 @@ def __repr__(self) -> str:
         """
         _maxlen = 10
         na_repr = "NaN"
-        if is_extension_array_dtype(self.categories.dtype):
-            na_repr = repr(self.categories.dtype.na_value)
+        if isinstance(self.categories.dtype, ExtensionDtype):
+            # np.nan should show up as NaN, not as nan
+            if self.categories.dtype.na_value is not np.nan:
+                na_repr = repr(self.categories.dtype.na_value)
         if len(self._codes) > _maxlen:
             result = self._tidy_repr(_maxlen, na_rep=na_repr)
         elif len(self._codes) > 0: