pandas-dev · lithomas1 · Apr 15, 2023 · Apr 15, 2023 · Apr 15, 2023 · jbrockmendel
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -272,6 +272,7 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
+- Bug in :meth:`Categorical.__repr__` and :meth:`Series.__repr`, where :class:`Categorical`'s having categories backed by a :class:`pandas.api.extensions.ExtensionDtype` had null values show up as "NaN" instead of ``ExtensionDtype.na_value`` (:issue:`52681`)
 - Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`).
 -
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1599,6 +1599,11 @@ def _internal_get_values(self):
         if needs_i8_conversion(self.categories.dtype):
             return self.categories.take(self._codes, fill_value=NaT)
         elif is_integer_dtype(self.categories) and -1 in self._codes:
+            if isinstance(self.categories.dtype, ExtensionDtype):
+                # Nullable integer dtype
+                # Don't astype to object
+                fill_value = self.categories.dtype.na_value
+                return self.categories.take(self._codes, fill_value=fill_value)
             return self.categories.astype("object").take(self._codes, fill_value=np.nan)
         return np.array(self)
 
@@ -1911,14 +1916,18 @@ def _formatter(self, boxed: bool = False):
         # Defer to CategoricalFormatter's formatter.
         return None
 
-    def _tidy_repr(self, max_vals: int = 10, footer: bool = True) -> str:
+    def _tidy_repr(
+        self, max_vals: int = 10, footer: bool = True, na_rep: str = "NaN"
+    ) -> str:
         """
         a short repr displaying only max_vals and an optional (but default
         footer)
         """
         num = max_vals // 2
-        head = self[:num]._get_repr(length=False, footer=False)
-        tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False)
+        head = self[:num]._get_repr(length=False, footer=False, na_rep=na_rep)
+        tail = self[-(max_vals - num) :]._get_repr(
+            length=False, footer=False, na_rep=na_rep
+        )
 
         result = f"{head[:-1]}, ..., {tail[1:]}"
         if footer:
@@ -2001,12 +2010,19 @@ def __repr__(self) -> str:
         String representation.
         """
         _maxlen = 10
+        na_repr = "NaN"
+        if isinstance(self.categories.dtype, ExtensionDtype):
+            # np.nan should show up as NaN, not as nan
+            if self.categories.dtype.na_value is not np.nan:
+                na_repr = repr(self.categories.dtype.na_value)
         if len(self._codes) > _maxlen:
-            result = self._tidy_repr(_maxlen)
+            result = self._tidy_repr(_maxlen, na_rep=na_repr)
         elif len(self._codes) > 0:
-            result = self._get_repr(length=len(self) > _maxlen)
+            result = self._get_repr(length=len(self) > _maxlen, na_rep=na_repr)
         else:
-            msg = self._get_repr(length=False, footer=True).replace("\n", ", ")
+            msg = self._get_repr(length=False, footer=True, na_rep=na_repr).replace(
+                "\n", ", "
+            )
             result = f"[], {msg}"
 
         return result

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -1,15 +1,19 @@
 import numpy as np
+import pytest
 
 from pandas import (
+    NA,
     Categorical,
     CategoricalDtype,
     CategoricalIndex,
     Series,
+    array,
     date_range,
     option_context,
     period_range,
     timedelta_range,
 )
+import pandas._testing as tm
 
 
 class TestCategoricalReprWithFactor:
@@ -253,6 +257,19 @@ def test_categorical_repr_int_with_nan(self):
 Categories (2, int64): [1, 2]"""
         assert repr(s) == s_exp
 
+    @pytest.mark.parametrize("values_dtype", tm.ALL_INT_EA_DTYPES)
+    def test_categorical_repr_nullable_int_NA(self, values_dtype):
+        arr = array([1, 2, np.nan], dtype=values_dtype)
+        c = Categorical(arr)
+        c_exp = f"""[1, 2, {NA}]\nCategories (2, {values_dtype}): [1, 2]"""
+        assert repr(c) == c_exp
+
+        s = Series([1, 2, np.nan], dtype=values_dtype).astype("category")
+        s_exp = f"""0       1\n1       2\n2    <NA>
+dtype: category
+Categories (2, {values_dtype}): [1, 2]"""
+        assert repr(s) == s_exp
+
     def test_categorical_repr_period(self):
         idx = period_range("2011-01-01 09:00", freq="H", periods=5)
         c = Categorical(idx)