diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 24dcb8c9687..2332f7f236b 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -24,7 +24,11 @@ New Features
 ~~~~~~~~~~~~
 - New "random" method for converting to and from 360_day calendars (:pull:`8603`).
   By `Pascal Bourgault <https://github.com/aulemahal>`_.
-
+- Xarray now makes a best attempt not to coerce :py:class:`pandas.api.extensions.ExtensionArray` to a numpy array
+  by supporting 1D `ExtensionArray` objects internally where possible.  Thus, `Dataset`s initialized with a `pd.Catgeorical`,
+  for example, will retain the object.  However, one cannot do operations that are not possible on the `ExtensionArray`
+  then, such as broadcasting.
+  By `Ilan Gold <https://github.com/ilan-gold>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -36,6 +40,12 @@ Bug fixes
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
+- Migrates ``formatting_html`` functionality for `DataTree` into ``xarray/core`` (:pull: `8930`)
+  By `Eni Awowale <https://github.com/eni-awowale>`_, `Julia Signell <https://github.com/jsignell>`_
+  and `Tom Nicholas <https://github.com/TomNicholas>`_.
+- Migrates ``datatree_mapping`` functionality into ``xarray/core`` (:pull:`8948`)
+  By `Matt Savoie <https://github.com/flamingbear>`_ `Owen Littlejohns
+  <https://github.com/owenlittlejohns>` and `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 
 .. _whats-new.2024.03.0:
diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
index 5c0f14976e6..3d87fcce1d9 100644
--- a/properties/test_pandas_roundtrip.py
+++ b/properties/test_pandas_roundtrip.py
@@ -17,7 +17,9 @@
 from hypothesis import given  # isort:skip
 
 numeric_dtypes = st.one_of(
-    npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()
+    npst.unsigned_integer_dtypes(endianness="="),
+    npst.integer_dtypes(endianness="="),
+    npst.floating_dtypes(endianness="="),
 )
 
 numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt))
diff --git a/pyproject.toml b/pyproject.toml
index bdbfd9b52ab..8cbd395b2a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ accel = ["scipy", "bottleneck", "numbagg", "flox", "opt_einsum"]
 complete = ["xarray[accel,io,parallel,viz,dev]"]
 dev = [
   "hypothesis",
+  "mypy",
   "pre-commit",
   "pytest",
   "pytest-cov",
@@ -86,8 +87,8 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
 [tool.mypy]
 enable_error_code = "redundant-self"
 exclude = [
-	'xarray/util/generate_.*\.py',
-	'xarray/datatree_/.*\.py',
+  'xarray/util/generate_.*\.py',
+  'xarray/datatree_/.*\.py',
 ]
 files = "xarray"
 show_error_codes = true
@@ -98,8 +99,8 @@ warn_unused_ignores = true
 
 # Ignore mypy errors for modules imported from datatree_.
 [[tool.mypy.overrides]]
-module = "xarray.datatree_.*"
 ignore_errors = true
+module = "xarray.datatree_.*"
 
 # Much of the numerical computing stack doesn't have type annotations yet.
 [[tool.mypy.overrides]]
@@ -129,6 +130,7 @@ module = [
   "opt_einsum.*",
   "pandas.*",
   "pooch.*",
+  "pyarrow.*",
   "pydap.*",
   "pytest.*",
   "scipy.*",
@@ -255,6 +257,9 @@ target-version = "py39"
 # E402: module level import not at top of file
 # E501: line too long - let black worry about that
 # E731: do not assign a lambda expression, use a def
+extend-safe-fixes = [
+  "TID252", # absolute imports
+]
 ignore = [
   "E402",
   "E501",
@@ -268,9 +273,6 @@ select = [
   "I", # isort
   "UP", # Pyupgrade
 ]
-extend-safe-fixes = [
-  "TID252", # absolute imports
-]
 
 [tool.ruff.lint.per-file-ignores]
 # don't enforce absolute imports
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 900c10026fb..044273afc35 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -24,6 +24,7 @@
 from typing import IO, TYPE_CHECKING, Any, Callable, Generic, Literal, cast, overload
 
 import numpy as np
+from pandas.api.types import is_extension_array_dtype
 
 # remove once numpy 2.0 is the oldest supported version
 try:
@@ -6853,10 +6854,13 @@ def reduce(
                 if (
                     # Some reduction functions (e.g. std, var) need to run on variables
                     # that don't have the reduce dims: PR5393
-                    not reduce_dims
-                    or not numeric_only
-                    or np.issubdtype(var.dtype, np.number)
-                    or (var.dtype == np.bool_)
+                    not is_extension_array_dtype(var.dtype)
+                    and (
+                        not reduce_dims
+                        or not numeric_only
+                        or np.issubdtype(var.dtype, np.number)
+                        or (var.dtype == np.bool_)
+                    )
                 ):
                     # prefer to aggregate over axis=None rather than
                     # axis=(0, 1) if they will be equivalent, because
@@ -7169,13 +7173,37 @@ def to_pandas(self) -> pd.Series | pd.DataFrame:
         )
 
     def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
-        columns = [k for k in self.variables if k not in self.dims]
+        columns_in_order = [k for k in self.variables if k not in self.dims]
+        non_extension_array_columns = [
+            k
+            for k in columns_in_order
+            if not is_extension_array_dtype(self.variables[k].data)
+        ]
+        extension_array_columns = [
+            k
+            for k in columns_in_order
+            if is_extension_array_dtype(self.variables[k].data)
+        ]
         data = [
             self._variables[k].set_dims(ordered_dims).values.reshape(-1)
-            for k in columns
+            for k in non_extension_array_columns
         ]
         index = self.coords.to_index([*ordered_dims])
-        return pd.DataFrame(dict(zip(columns, data)), index=index)
+        broadcasted_df = pd.DataFrame(
+            dict(zip(non_extension_array_columns, data)), index=index
+        )
+        for extension_array_column in extension_array_columns:
+            extension_array = self.variables[extension_array_column].data.array
+            index = self[self.variables[extension_array_column].dims[0]].data
+            extension_array_df = pd.DataFrame(
+                {extension_array_column: extension_array},
+                index=self[self.variables[extension_array_column].dims[0]].data,
+            )
+            extension_array_df.index.name = self.variables[extension_array_column].dims[
+                0
+            ]
+            broadcasted_df = broadcasted_df.join(extension_array_df)
+        return broadcasted_df[columns_in_order]
 
     def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFrame:
         """Convert this dataset into a pandas.DataFrame.
@@ -7322,11 +7350,13 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:
                 "cannot convert a DataFrame with a non-unique MultiIndex into xarray"
             )
 
-        # Cast to a NumPy array first, in case the Series is a pandas Extension
-        # array (which doesn't have a valid NumPy dtype)
-        # TODO: allow users to control how this casting happens, e.g., by
-        # forwarding arguments to pandas.Series.to_numpy?
-        arrays = [(k, np.asarray(v)) for k, v in dataframe.items()]
+        arrays = []
+        extension_arrays = []
+        for k, v in dataframe.items():
+            if not is_extension_array_dtype(v):
+                arrays.append((k, np.asarray(v)))
+            else:
+                extension_arrays.append((k, v))
 
         indexes: dict[Hashable, Index] = {}
         index_vars: dict[Hashable, Variable] = {}
@@ -7340,6 +7370,8 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:
                 xr_idx = PandasIndex(lev, dim)
                 indexes[dim] = xr_idx
                 index_vars.update(xr_idx.create_variables())
+            arrays += [(k, np.asarray(v)) for k, v in extension_arrays]
+            extension_arrays = []
         else:
             index_name = idx.name if idx.name is not None else "index"
             dims = (index_name,)
@@ -7353,7 +7385,9 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:
             obj._set_sparse_data_from_dataframe(idx, arrays, dims)
         else:
             obj._set_numpy_data_from_dataframe(idx, arrays, dims)
-        return obj
+        for name, extension_array in extension_arrays:
+            obj[name] = (dims, extension_array)
+        return obj[dataframe.columns] if len(dataframe.columns) else obj
 
     def to_dask_dataframe(
         self, dim_order: Sequence[Hashable] | None = None, set_index: bool = False
diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py
index 1b06d87c9fb..57fd7222898 100644
--- a/xarray/core/datatree.py
+++ b/xarray/core/datatree.py
@@ -18,6 +18,14 @@
 from xarray.core.coordinates import DatasetCoordinates
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset, DataVariables
+from xarray.core.datatree_mapping import (
+    TreeIsomorphismError,
+    check_isomorphic,
+    map_over_subtree,
+)
+from xarray.core.formatting_html import (
+    datatree_repr as datatree_repr_html,
+)
 from xarray.core.indexes import Index, Indexes
 from xarray.core.merge import dataset_update_method
 from xarray.core.options import OPTIONS as XR_OPTS
@@ -33,14 +41,6 @@
 from xarray.core.variable import Variable
 from xarray.datatree_.datatree.common import TreeAttrAccessMixin
 from xarray.datatree_.datatree.formatting import datatree_repr
-from xarray.datatree_.datatree.formatting_html import (
-    datatree_repr as datatree_repr_html,
-)
-from xarray.datatree_.datatree.mapping import (
-    TreeIsomorphismError,
-    check_isomorphic,
-    map_over_subtree,
-)
 from xarray.datatree_.datatree.ops import (
     DataTreeArithmeticMixin,
     MappedDatasetMethodsMixin,
diff --git a/xarray/datatree_/datatree/mapping.py b/xarray/core/datatree_mapping.py
similarity index 94%
rename from xarray/datatree_/datatree/mapping.py
rename to xarray/core/datatree_mapping.py
index 9546905e1ac..714921d2a90 100644
--- a/xarray/datatree_/datatree/mapping.py
+++ b/xarray/core/datatree_mapping.py
@@ -4,10 +4,9 @@
 import sys
 from itertools import repeat
 from textwrap import dedent
-from typing import TYPE_CHECKING, Callable, Tuple
+from typing import TYPE_CHECKING, Callable
 
 from xarray import DataArray, Dataset
-
 from xarray.core.iterators import LevelOrderIter
 from xarray.core.treenode import NodePath, TreeNode
 
@@ -84,14 +83,13 @@ def diff_treestructure(a: DataTree, b: DataTree, require_names_equal: bool) -> s
     for node_a, node_b in zip(LevelOrderIter(a), LevelOrderIter(b)):
         path_a, path_b = node_a.path, node_b.path
 
-        if require_names_equal:
-            if node_a.name != node_b.name:
-                diff = dedent(
-                    f"""\
+        if require_names_equal and node_a.name != node_b.name:
+            diff = dedent(
+                f"""\
                 Node '{path_a}' in the left object has name '{node_a.name}'
                 Node '{path_b}' in the right object has name '{node_b.name}'"""
-                )
-                return diff
+            )
+            return diff
 
         if len(node_a.children) != len(node_b.children):
             diff = dedent(
@@ -125,7 +123,7 @@ def map_over_subtree(func: Callable) -> Callable:
     func : callable
         Function to apply to datasets with signature:
 
-        `func(*args, **kwargs) -> Union[Dataset, Iterable[Dataset]]`.
+        `func(*args, **kwargs) -> Union[DataTree, Iterable[DataTree]]`.
 
         (i.e. func must accept at least one Dataset and return at least one Dataset.)
         Function will not be applied to any nodes without datasets.
@@ -154,7 +152,7 @@ def map_over_subtree(func: Callable) -> Callable:
     # TODO inspect function to work out immediately if the wrong number of arguments were passed for it?
 
     @functools.wraps(func)
-    def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]:
+    def _map_over_subtree(*args, **kwargs) -> DataTree | tuple[DataTree, ...]:
         """Internal function which maps func over every node in tree, returning a tree of the results."""
         from xarray.core.datatree import DataTree
 
@@ -259,7 +257,7 @@ def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]:
     return _map_over_subtree
 
 
-def _handle_errors_with_path_context(path):
+def _handle_errors_with_path_context(path: str):
     """Wraps given function so that if it fails it also raises path to node on which it failed."""
 
     def decorator(func):
@@ -267,11 +265,10 @@ def wrapper(*args, **kwargs):
             try:
                 return func(*args, **kwargs)
             except Exception as e:
-                if sys.version_info >= (3, 11):
-                    # Add the context information to the error message
-                    e.add_note(
-                        f"Raised whilst mapping function over node with path {path}"
-                    )
+                # Add the context information to the error message
+                add_note(
+                    e, f"Raised whilst mapping function over node with path {path}"
+                )
                 raise
 
         return wrapper
@@ -287,7 +284,9 @@ def add_note(err: BaseException, msg: str) -> None:
         err.add_note(msg)
 
 
-def _check_single_set_return_values(path_to_node, obj):
+def _check_single_set_return_values(
+    path_to_node: str, obj: Dataset | DataArray | tuple[Dataset | DataArray]
+):
     """Check types returned from single evaluation of func, and return number of return values received from func."""
     if isinstance(obj, (Dataset, DataArray)):
         return 1
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index ef497e78ebf..d95dfa566cc 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -32,6 +32,7 @@
 from numpy import concatenate as _concatenate
 from numpy.lib.stride_tricks import sliding_window_view  # noqa
 from packaging.version import Version
+from pandas.api.types import is_extension_array_dtype
 
 from xarray.core import dask_array_ops, dtypes, nputils
 from xarray.core.options import OPTIONS
@@ -156,7 +157,7 @@ def isnull(data):
         return full_like(data, dtype=bool, fill_value=False)
     else:
         # at this point, array should have dtype=object
-        if isinstance(data, np.ndarray):
+        if isinstance(data, np.ndarray) or is_extension_array_dtype(data):
             return pandas_isnull(data)
         else:
             # Not reachable yet, but intended for use with other duck array
@@ -221,9 +222,19 @@ def asarray(data, xp=np):
 
 def as_shared_dtype(scalars_or_arrays, xp=np):
     """Cast a arrays to a shared dtype using xarray's type promotion rules."""
-    array_type_cupy = array_type("cupy")
-    if array_type_cupy and any(
-        isinstance(x, array_type_cupy) for x in scalars_or_arrays
+    if any(is_extension_array_dtype(x) for x in scalars_or_arrays):
+        extension_array_types = [
+            x.dtype for x in scalars_or_arrays if is_extension_array_dtype(x)
+        ]
+        if len(extension_array_types) == len(scalars_or_arrays) and all(
+            isinstance(x, type(extension_array_types[0])) for x in extension_array_types
+        ):
+            return scalars_or_arrays
+        raise ValueError(
+            f"Cannot cast arrays to shared type, found array types {[x.dtype for x in scalars_or_arrays]}"
+        )
+    elif array_type_cupy := array_type("cupy") and any(  # noqa: F841
+        isinstance(x, array_type_cupy) for x in scalars_or_arrays  # noqa: F821
     ):
         import cupy as cp
 
diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py
new file mode 100644
index 00000000000..6521e425615
--- /dev/null
+++ b/xarray/core/extension_array.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Callable, Generic
+
+import numpy as np
+import pandas as pd
+from pandas.api.types import is_extension_array_dtype
+
+from xarray.core.types import DTypeLikeSave, T_ExtensionArray
+
+HANDLED_EXTENSION_ARRAY_FUNCTIONS: dict[Callable, Callable] = {}
+
+
+def implements(numpy_function):
+    """Register an __array_function__ implementation for MyArray objects."""
+
+    def decorator(func):
+        HANDLED_EXTENSION_ARRAY_FUNCTIONS[numpy_function] = func
+        return func
+
+    return decorator
+
+
+@implements(np.issubdtype)
+def __extension_duck_array__issubdtype(
+    extension_array_dtype: T_ExtensionArray, other_dtype: DTypeLikeSave
+) -> bool:
+    return False  # never want a function to think a pandas extension dtype is a subtype of numpy
+
+
+@implements(np.broadcast_to)
+def __extension_duck_array__broadcast(arr: T_ExtensionArray, shape: tuple):
+    if shape[0] == len(arr) and len(shape) == 1:
+        return arr
+    raise NotImplementedError("Cannot broadcast 1d-only pandas categorical array.")
+
+
+@implements(np.stack)
+def __extension_duck_array__stack(arr: T_ExtensionArray, axis: int):
+    raise NotImplementedError("Cannot stack 1d-only pandas categorical array.")
+
+
+@implements(np.concatenate)
+def __extension_duck_array__concatenate(
+    arrays: Sequence[T_ExtensionArray], axis: int = 0, out=None
+) -> T_ExtensionArray:
+    return type(arrays[0])._concat_same_type(arrays)
+
+
+@implements(np.where)
+def __extension_duck_array__where(
+    condition: np.ndarray, x: T_ExtensionArray, y: T_ExtensionArray
+) -> T_ExtensionArray:
+    if (
+        isinstance(x, pd.Categorical)
+        and isinstance(y, pd.Categorical)
+        and x.dtype != y.dtype
+    ):
+        x = x.add_categories(set(y.categories).difference(set(x.categories)))
+        y = y.add_categories(set(x.categories).difference(set(y.categories)))
+    return pd.Series(x).where(condition, pd.Series(y)).array
+
+
+class PandasExtensionArray(Generic[T_ExtensionArray]):
+    array: T_ExtensionArray
+
+    def __init__(self, array: T_ExtensionArray):
+        """NEP-18 compliant wrapper for pandas extension arrays.
+
+        Parameters
+        ----------
+        array : T_ExtensionArray
+            The array to be wrapped upon e.g,. :py:class:`xarray.Variable` creation.
+        ```
+        """
+        if not isinstance(array, pd.api.extensions.ExtensionArray):
+            raise TypeError(f"{array} is not an pandas ExtensionArray.")
+        self.array = array
+
+    def __array_function__(self, func, types, args, kwargs):
+        def replace_duck_with_extension_array(args) -> list:
+            args_as_list = list(args)
+            for index, value in enumerate(args_as_list):
+                if isinstance(value, PandasExtensionArray):
+                    args_as_list[index] = value.array
+                elif isinstance(
+                    value, tuple
+                ):  # should handle more than just tuple? iterable?
+                    args_as_list[index] = tuple(
+                        replace_duck_with_extension_array(value)
+                    )
+                elif isinstance(value, list):
+                    args_as_list[index] = replace_duck_with_extension_array(value)
+            return args_as_list
+
+        args = tuple(replace_duck_with_extension_array(args))
+        if func not in HANDLED_EXTENSION_ARRAY_FUNCTIONS:
+            return func(*args, **kwargs)
+        res = HANDLED_EXTENSION_ARRAY_FUNCTIONS[func](*args, **kwargs)
+        if is_extension_array_dtype(res):
+            return type(self)[type(res)](res)
+        return res
+
+    def __array_ufunc__(ufunc, method, *inputs, **kwargs):
+        return ufunc(*inputs, **kwargs)
+
+    def __repr__(self):
+        return f"{type(self)}(array={repr(self.array)})"
+
+    def __getattr__(self, attr: str) -> object:
+        return getattr(self.array, attr)
+
+    def __getitem__(self, key) -> PandasExtensionArray[T_ExtensionArray]:
+        item = self.array[key]
+        if is_extension_array_dtype(item):
+            return type(self)(item)
+        if np.isscalar(item):
+            return type(self)(type(self.array)([item]))
+        return item
+
+    def __setitem__(self, key, val):
+        self.array[key] = val
+
+    def __eq__(self, other):
+        if np.isscalar(other):
+            other = type(self)(type(self.array)([other]))
+        if isinstance(other, PandasExtensionArray):
+            return self.array == other.array
+        return self.array == other
+
+    def __ne__(self, other):
+        return ~(self == other)
+
+    def __len__(self):
+        return len(self.array)
diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
index 2c76b182207..9bf5befbe3f 100644
--- a/xarray/core/formatting_html.py
+++ b/xarray/core/formatting_html.py
@@ -2,9 +2,11 @@
 
 import uuid
 from collections import OrderedDict
+from collections.abc import Mapping
 from functools import lru_cache, partial
 from html import escape
 from importlib.resources import files
+from typing import TYPE_CHECKING
 
 from xarray.core.formatting import (
     inline_index_repr,
@@ -18,6 +20,9 @@
     ("xarray.static.css", "style.css"),
 )
 
+if TYPE_CHECKING:
+    from xarray.core.datatree import DataTree
+
 
 @lru_cache(None)
 def _load_static_files():
@@ -341,3 +346,129 @@ def dataset_repr(ds) -> str:
     ]
 
     return _obj_repr(ds, header_components, sections)
+
+
+def summarize_datatree_children(children: Mapping[str, DataTree]) -> str:
+    N_CHILDREN = len(children) - 1
+
+    # Get result from datatree_node_repr and wrap it
+    lines_callback = lambda n, c, end: _wrap_datatree_repr(
+        datatree_node_repr(n, c), end=end
+    )
+
+    children_html = "".join(
+        (
+            lines_callback(n, c, end=False)  # Long lines
+            if i < N_CHILDREN
+            else lines_callback(n, c, end=True)
+        )  # Short lines
+        for i, (n, c) in enumerate(children.items())
+    )
+
+    return "".join(
+        [
+            "<div style='display: inline-grid; grid-template-columns: 100%; grid-column: 1 / -1'>",
+            children_html,
+            "</div>",
+        ]
+    )
+
+
+children_section = partial(
+    _mapping_section,
+    name="Groups",
+    details_func=summarize_datatree_children,
+    max_items_collapse=1,
+    expand_option_name="display_expand_groups",
+)
+
+
+def datatree_node_repr(group_title: str, dt: DataTree) -> str:
+    header_components = [f"<div class='xr-obj-type'>{escape(group_title)}</div>"]
+
+    ds = dt.ds
+
+    sections = [
+        children_section(dt.children),
+        dim_section(ds),
+        coord_section(ds.coords),
+        datavar_section(ds.data_vars),
+        attr_section(ds.attrs),
+    ]
+
+    return _obj_repr(ds, header_components, sections)
+
+
+def _wrap_datatree_repr(r: str, end: bool = False) -> str:
+    """
+    Wrap HTML representation with a tee to the left of it.
+
+    Enclosing HTML tag is a <div> with :code:`display: inline-grid` style.
+
+    Turns:
+    [    title    ]
+    |   details   |
+    |_____________|
+
+    into (A):
+    |─ [    title    ]
+    |  |   details   |
+    |  |_____________|
+
+    or (B):
+    └─ [    title    ]
+       |   details   |
+       |_____________|
+
+    Parameters
+    ----------
+    r: str
+        HTML representation to wrap.
+    end: bool
+        Specify if the line on the left should continue or end.
+
+        Default is True.
+
+    Returns
+    -------
+    str
+        Wrapped HTML representation.
+
+        Tee color is set to the variable :code:`--xr-border-color`.
+    """
+    # height of line
+    end = bool(end)
+    height = "100%" if end is False else "1.2em"
+    return "".join(
+        [
+            "<div style='display: inline-grid; grid-template-columns: 0px 20px auto; width: 100%;'>",
+            "<div style='",
+            "grid-column-start: 1;",
+            "border-right: 0.2em solid;",
+            "border-color: var(--xr-border-color);",
+            f"height: {height};",
+            "width: 0px;",
+            "'>",
+            "</div>",
+            "<div style='",
+            "grid-column-start: 2;",
+            "grid-row-start: 1;",
+            "height: 1em;",
+            "width: 20px;",
+            "border-bottom: 0.2em solid;",
+            "border-color: var(--xr-border-color);",
+            "'>",
+            "</div>",
+            "<div style='",
+            "grid-column-start: 3;",
+            "'>",
+            r,
+            "</div>",
+            "</div>",
+        ]
+    )
+
+
+def datatree_repr(dt: DataTree) -> str:
+    obj_type = f"datatree.{type(dt).__name__}"
+    return datatree_node_repr(obj_type, dt)
diff --git a/xarray/core/iterators.py b/xarray/core/iterators.py
index 5c0c0f652e8..dd5fa7ee97a 100644
--- a/xarray/core/iterators.py
+++ b/xarray/core/iterators.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from collections import abc
 from collections.abc import Iterator
 from typing import Callable
 
@@ -9,7 +8,7 @@
 """These iterators are copied from anytree.iterators, with minor modifications."""
 
 
-class LevelOrderIter(abc.Iterator):
+class LevelOrderIter(Iterator):
     """Iterate over tree applying level-order strategy starting at `node`.
     This is the iterator used by `DataTree` to traverse nodes.
 
diff --git a/xarray/core/types.py b/xarray/core/types.py
index 242aed240fb..17b06eb0805 100644
--- a/xarray/core/types.py
+++ b/xarray/core/types.py
@@ -173,6 +173,9 @@ def copy(
 # hopefully in the future we can narrow this down more:
 T_DuckArray = TypeVar("T_DuckArray", bound=Any, covariant=True)
 
+# For typing pandas extension arrays.
+T_ExtensionArray = TypeVar("T_ExtensionArray", bound=pd.api.extensions.ExtensionArray)
+
 
 ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"]
 VarCompatible = Union["Variable", "ScalarOrArray"]
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index e89cf95411c..2229eaa2d24 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -13,11 +13,13 @@
 import numpy as np
 import pandas as pd
 from numpy.typing import ArrayLike
+from pandas.api.types import is_extension_array_dtype
 
 import xarray as xr  # only for Dataset and DataArray
 from xarray.core import common, dtypes, duck_array_ops, indexing, nputils, ops, utils
 from xarray.core.arithmetic import VariableArithmetic
 from xarray.core.common import AbstractArray
+from xarray.core.extension_array import PandasExtensionArray
 from xarray.core.indexing import (
     BasicIndexer,
     OuterIndexer,
@@ -47,6 +49,7 @@
 NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
     indexing.ExplicitlyIndexed,
     pd.Index,
+    pd.api.extensions.ExtensionArray,
 )
 # https://github.com/python/mypy/issues/224
 BASIC_INDEXING_TYPES = integer_types + (slice,)
@@ -184,6 +187,8 @@ def _maybe_wrap_data(data):
     """
     if isinstance(data, pd.Index):
         return PandasIndexingAdapter(data)
+    if isinstance(data, pd.api.extensions.ExtensionArray):
+        return PandasExtensionArray[type(data)](data)
     return data
 
 
@@ -2570,6 +2575,11 @@ def chunk(  # type: ignore[override]
         dask.array.from_array
         """
 
+        if is_extension_array_dtype(self):
+            raise ValueError(
+                f"{self} was found to be a Pandas ExtensionArray.  Please convert to numpy first."
+            )
+
         if from_array_kwargs is None:
             from_array_kwargs = {}
 
diff --git a/xarray/datatree_/datatree/__init__.py b/xarray/datatree_/datatree/__init__.py
index f2603b64641..3159d612913 100644
--- a/xarray/datatree_/datatree/__init__.py
+++ b/xarray/datatree_/datatree/__init__.py
@@ -1,11 +1,8 @@
 # import public API
-from .mapping import TreeIsomorphismError, map_over_subtree
 from xarray.core.treenode import InvalidTreeError, NotFoundInTreeError
 
 
 __all__ = (
-    "TreeIsomorphismError",
     "InvalidTreeError",
     "NotFoundInTreeError",
-    "map_over_subtree",
 )
diff --git a/xarray/datatree_/datatree/formatting.py b/xarray/datatree_/datatree/formatting.py
index 9ebee72d4ef..fdd23933ae6 100644
--- a/xarray/datatree_/datatree/formatting.py
+++ b/xarray/datatree_/datatree/formatting.py
@@ -2,7 +2,7 @@
 
 from xarray.core.formatting import _compat_to_str, diff_dataset_repr
 
-from xarray.datatree_.datatree.mapping import diff_treestructure
+from xarray.core.datatree_mapping import diff_treestructure
 from xarray.datatree_.datatree.render import RenderTree
 
 if TYPE_CHECKING:
diff --git a/xarray/datatree_/datatree/formatting_html.py b/xarray/datatree_/datatree/formatting_html.py
deleted file mode 100644
index 547b567a396..00000000000
--- a/xarray/datatree_/datatree/formatting_html.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from functools import partial
-from html import escape
-from typing import Any, Mapping
-
-from xarray.core.formatting_html import (
-    _mapping_section,
-    _obj_repr,
-    attr_section,
-    coord_section,
-    datavar_section,
-    dim_section,
-)
-
-
-def summarize_children(children: Mapping[str, Any]) -> str:
-    N_CHILDREN = len(children) - 1
-
-    # Get result from node_repr and wrap it
-    lines_callback = lambda n, c, end: _wrap_repr(node_repr(n, c), end=end)
-
-    children_html = "".join(
-        lines_callback(n, c, end=False)  # Long lines
-        if i < N_CHILDREN
-        else lines_callback(n, c, end=True)  # Short lines
-        for i, (n, c) in enumerate(children.items())
-    )
-
-    return "".join(
-        [
-            "<div style='display: inline-grid; grid-template-columns: 100%'>",
-            children_html,
-            "</div>",
-        ]
-    )
-
-
-children_section = partial(
-    _mapping_section,
-    name="Groups",
-    details_func=summarize_children,
-    max_items_collapse=1,
-    expand_option_name="display_expand_groups",
-)
-
-
-def node_repr(group_title: str, dt: Any) -> str:
-    header_components = [f"<div class='xr-obj-type'>{escape(group_title)}</div>"]
-
-    ds = dt.ds
-
-    sections = [
-        children_section(dt.children),
-        dim_section(ds),
-        coord_section(ds.coords),
-        datavar_section(ds.data_vars),
-        attr_section(ds.attrs),
-    ]
-
-    return _obj_repr(ds, header_components, sections)
-
-
-def _wrap_repr(r: str, end: bool = False) -> str:
-    """
-    Wrap HTML representation with a tee to the left of it.
-
-    Enclosing HTML tag is a <div> with :code:`display: inline-grid` style.
-
-    Turns:
-    [    title    ]
-    |   details   |
-    |_____________|
-
-    into (A):
-    |─ [    title    ]
-    |  |   details   |
-    |  |_____________|
-
-    or (B):
-    └─ [    title    ]
-       |   details   |
-       |_____________|
-
-    Parameters
-    ----------
-    r: str
-        HTML representation to wrap.
-    end: bool
-        Specify if the line on the left should continue or end.
-
-        Default is True.
-
-    Returns
-    -------
-    str
-        Wrapped HTML representation.
-
-        Tee color is set to the variable :code:`--xr-border-color`.
-    """
-    # height of line
-    end = bool(end)
-    height = "100%" if end is False else "1.2em"
-    return "".join(
-        [
-            "<div style='display: inline-grid;'>",
-            "<div style='",
-            "grid-column-start: 1;",
-            "border-right: 0.2em solid;",
-            "border-color: var(--xr-border-color);",
-            f"height: {height};",
-            "width: 0px;",
-            "'>",
-            "</div>",
-            "<div style='",
-            "grid-column-start: 2;",
-            "grid-row-start: 1;",
-            "height: 1em;",
-            "width: 20px;",
-            "border-bottom: 0.2em solid;",
-            "border-color: var(--xr-border-color);",
-            "'>",
-            "</div>",
-            "<div style='",
-            "grid-column-start: 3;",
-            "'>",
-            "<ul class='xr-sections'>",
-            r,
-            "</ul>" "</div>",
-            "</div>",
-        ]
-    )
-
-
-def datatree_repr(dt: Any) -> str:
-    obj_type = f"datatree.{type(dt).__name__}"
-    return node_repr(obj_type, dt)
diff --git a/xarray/datatree_/datatree/ops.py b/xarray/datatree_/datatree/ops.py
index d6ac4f83e7c..83b9d1b275a 100644
--- a/xarray/datatree_/datatree/ops.py
+++ b/xarray/datatree_/datatree/ops.py
@@ -2,7 +2,7 @@
 
 from xarray import Dataset
 
-from .mapping import map_over_subtree
+from xarray.core.datatree_mapping import map_over_subtree
 
 """
 Module which specifies the subset of xarray.Dataset's API which we wish to copy onto DataTree.
diff --git a/xarray/datatree_/datatree/tests/test_formatting_html.py b/xarray/datatree_/datatree/tests/test_formatting_html.py
deleted file mode 100644
index 98cdf02bff4..00000000000
--- a/xarray/datatree_/datatree/tests/test_formatting_html.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import pytest
-import xarray as xr
-
-from xarray.core.datatree import DataTree
-from xarray.datatree_.datatree import formatting_html
-
-
-@pytest.fixture(scope="module", params=["some html", "some other html"])
-def repr(request):
-    return request.param
-
-
-class Test_summarize_children:
-    """
-    Unit tests for summarize_children.
-    """
-
-    func = staticmethod(formatting_html.summarize_children)
-
-    @pytest.fixture(scope="class")
-    def childfree_tree_factory(self):
-        """
-        Fixture for a child-free DataTree factory.
-        """
-        from random import randint
-
-        def _childfree_tree_factory():
-            return DataTree(
-                data=xr.Dataset({"z": ("y", [randint(1, 100) for _ in range(3)])})
-            )
-
-        return _childfree_tree_factory
-
-    @pytest.fixture(scope="class")
-    def childfree_tree(self, childfree_tree_factory):
-        """
-        Fixture for a child-free DataTree.
-        """
-        return childfree_tree_factory()
-
-    @pytest.fixture(scope="function")
-    def mock_node_repr(self, monkeypatch):
-        """
-        Apply mocking for node_repr.
-        """
-
-        def mock(group_title, dt):
-            """
-            Mock with a simple result
-            """
-            return group_title + " " + str(id(dt))
-
-        monkeypatch.setattr(formatting_html, "node_repr", mock)
-
-    @pytest.fixture(scope="function")
-    def mock_wrap_repr(self, monkeypatch):
-        """
-        Apply mocking for _wrap_repr.
-        """
-
-        def mock(r, *, end, **kwargs):
-            """
-            Mock by appending "end" or "not end".
-            """
-            return r + " " + ("end" if end else "not end") + "//"
-
-        monkeypatch.setattr(formatting_html, "_wrap_repr", mock)
-
-    def test_empty_mapping(self):
-        """
-        Test with an empty mapping of children.
-        """
-        children = {}
-        assert self.func(children) == (
-            "<div style='display: inline-grid; grid-template-columns: 100%'>" "</div>"
-        )
-
-    def test_one_child(self, childfree_tree, mock_wrap_repr, mock_node_repr):
-        """
-        Test with one child.
-
-        Uses a mock of _wrap_repr and node_repr to essentially mock
-        the inline lambda function "lines_callback".
-        """
-        # Create mapping of children
-        children = {"a": childfree_tree}
-
-        # Expect first line to be produced from the first child, and
-        # wrapped as the last child
-        first_line = f"a {id(children['a'])} end//"
-
-        assert self.func(children) == (
-            "<div style='display: inline-grid; grid-template-columns: 100%'>"
-            f"{first_line}"
-            "</div>"
-        )
-
-    def test_two_children(self, childfree_tree_factory, mock_wrap_repr, mock_node_repr):
-        """
-        Test with two level deep children.
-
-        Uses a mock of _wrap_repr and node_repr to essentially mock
-        the inline lambda function "lines_callback".
-        """
-
-        # Create mapping of children
-        children = {"a": childfree_tree_factory(), "b": childfree_tree_factory()}
-
-        # Expect first line to be produced from the first child, and
-        # wrapped as _not_ the last child
-        first_line = f"a {id(children['a'])} not end//"
-
-        # Expect second line to be produced from the second child, and
-        # wrapped as the last child
-        second_line = f"b {id(children['b'])} end//"
-
-        assert self.func(children) == (
-            "<div style='display: inline-grid; grid-template-columns: 100%'>"
-            f"{first_line}"
-            f"{second_line}"
-            "</div>"
-        )
-
-
-class Test__wrap_repr:
-    """
-    Unit tests for _wrap_repr.
-    """
-
-    func = staticmethod(formatting_html._wrap_repr)
-
-    def test_end(self, repr):
-        """
-        Test with end=True.
-        """
-        r = self.func(repr, end=True)
-        assert r == (
-            "<div style='display: inline-grid;'>"
-            "<div style='"
-            "grid-column-start: 1;"
-            "border-right: 0.2em solid;"
-            "border-color: var(--xr-border-color);"
-            "height: 1.2em;"
-            "width: 0px;"
-            "'>"
-            "</div>"
-            "<div style='"
-            "grid-column-start: 2;"
-            "grid-row-start: 1;"
-            "height: 1em;"
-            "width: 20px;"
-            "border-bottom: 0.2em solid;"
-            "border-color: var(--xr-border-color);"
-            "'>"
-            "</div>"
-            "<div style='"
-            "grid-column-start: 3;"
-            "'>"
-            "<ul class='xr-sections'>"
-            f"{repr}"
-            "</ul>"
-            "</div>"
-            "</div>"
-        )
-
-    def test_not_end(self, repr):
-        """
-        Test with end=False.
-        """
-        r = self.func(repr, end=False)
-        assert r == (
-            "<div style='display: inline-grid;'>"
-            "<div style='"
-            "grid-column-start: 1;"
-            "border-right: 0.2em solid;"
-            "border-color: var(--xr-border-color);"
-            "height: 100%;"
-            "width: 0px;"
-            "'>"
-            "</div>"
-            "<div style='"
-            "grid-column-start: 2;"
-            "grid-row-start: 1;"
-            "height: 1em;"
-            "width: 20px;"
-            "border-bottom: 0.2em solid;"
-            "border-color: var(--xr-border-color);"
-            "'>"
-            "</div>"
-            "<div style='"
-            "grid-column-start: 3;"
-            "'>"
-            "<ul class='xr-sections'>"
-            f"{repr}"
-            "</ul>"
-            "</div>"
-            "</div>"
-        )
diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py
index d2503dfd535..449d0c793cc 100644
--- a/xarray/testing/strategies.py
+++ b/xarray/testing/strategies.py
@@ -45,7 +45,7 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]:
     Generates only those numpy dtypes which xarray can handle.
 
     Use instead of hypothesis.extra.numpy.scalar_dtypes in order to exclude weirder dtypes such as unicode, byte_string, array, or nested dtypes.
-    Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows.
+    Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows.  Checks only native endianness.
 
     Requires the hypothesis package to be installed.
 
@@ -56,10 +56,10 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]:
     # TODO should this be exposed publicly?
     # We should at least decide what the set of numpy dtypes that xarray officially supports is.
     return (
-        npst.integer_dtypes()
-        | npst.unsigned_integer_dtypes()
-        | npst.floating_dtypes()
-        | npst.complex_number_dtypes()
+        npst.integer_dtypes(endianness="=")
+        | npst.unsigned_integer_dtypes(endianness="=")
+        | npst.floating_dtypes(endianness="=")
+        | npst.complex_number_dtypes(endianness="=")
         # | npst.datetime64_dtypes()
         # | npst.timedelta64_dtypes()
         # | npst.unicode_string_dtypes()
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 5007db9eeb2..3ce788dfb7f 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -18,6 +18,7 @@
 from xarray import Dataset
 from xarray.core import utils
 from xarray.core.duck_array_ops import allclose_or_equiv  # noqa: F401
+from xarray.core.extension_array import PandasExtensionArray
 from xarray.core.indexing import ExplicitlyIndexed
 from xarray.core.options import set_options
 from xarray.core.variable import IndexVariable
@@ -52,7 +53,9 @@ def assert_writeable(ds):
     readonly = [
         name
         for name, var in ds.variables.items()
-        if not isinstance(var, IndexVariable) and not var.data.flags.writeable
+        if not isinstance(var, IndexVariable)
+        and not isinstance(var.data, PandasExtensionArray)
+        and not var.data.flags.writeable
     ]
     assert not readonly, readonly
 
@@ -112,6 +115,7 @@ def _importorskip(
 has_fsspec, requires_fsspec = _importorskip("fsspec")
 has_iris, requires_iris = _importorskip("iris")
 has_numbagg, requires_numbagg = _importorskip("numbagg", "0.4.0")
+has_pyarrow, requires_pyarrow = _importorskip("pyarrow")
 with warnings.catch_warnings():
     warnings.filterwarnings(
         "ignore",
@@ -307,6 +311,7 @@ def create_test_data(
     seed: int | None = None,
     add_attrs: bool = True,
     dim_sizes: tuple[int, int, int] = _DEFAULT_TEST_DIM_SIZES,
+    use_extension_array: bool = False,
 ) -> Dataset:
     rs = np.random.RandomState(seed)
     _vars = {
@@ -329,7 +334,16 @@ def create_test_data(
         obj[v] = (dims, data)
         if add_attrs:
             obj[v].attrs = {"foo": "variable"}
-
+    if use_extension_array:
+        obj["var4"] = (
+            "dim1",
+            pd.Categorical(
+                np.random.choice(
+                    list(string.ascii_lowercase[: np.random.randint(5)]),
+                    size=dim_sizes[0],
+                )
+            ),
+        )
     if dim_sizes == _DEFAULT_TEST_DIM_SIZES:
         numbers_values = np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64")
     else:
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 0cf4cc03a09..1ddb5a569bd 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -152,6 +152,21 @@ def test_concat_missing_var() -> None:
     assert_identical(actual, expected)
 
 
+def test_concat_categorical() -> None:
+    data1 = create_test_data(use_extension_array=True)
+    data2 = create_test_data(use_extension_array=True)
+    concatenated = concat([data1, data2], dim="dim1")
+    assert (
+        concatenated["var4"]
+        == type(data2["var4"].variable.data.array)._concat_same_type(
+            [
+                data1["var4"].variable.data.array,
+                data2["var4"].variable.data.array,
+            ]
+        )
+    ).all()
+
+
 def test_concat_missing_multiple_consecutive_var() -> None:
     datasets = create_concat_datasets(3, seed=123)
     expected = concat(datasets, dim="day")
@@ -451,8 +466,11 @@ def test_concat_fill_missing_variables(
 
 class TestConcatDataset:
     @pytest.fixture
-    def data(self) -> Dataset:
-        return create_test_data().drop_dims("dim3")
+    def data(self, request) -> Dataset:
+        use_extension_array = request.param if hasattr(request, "param") else False
+        return create_test_data(use_extension_array=use_extension_array).drop_dims(
+            "dim3"
+        )
 
     def rectify_dim_order(self, data, dataset) -> Dataset:
         # return a new dataset with all variable dimensions transposed into
@@ -464,7 +482,9 @@ def rectify_dim_order(self, data, dataset) -> Dataset:
         )
 
     @pytest.mark.parametrize("coords", ["different", "minimal"])
-    @pytest.mark.parametrize("dim", ["dim1", "dim2"])
+    @pytest.mark.parametrize(
+        "dim,data", [["dim1", True], ["dim2", False]], indirect=["data"]
+    )
     def test_concat_simple(self, data, dim, coords) -> None:
         datasets = [g for _, g in data.groupby(dim, squeeze=False)]
         assert_identical(data, concat(datasets, dim, coords=coords))
@@ -492,6 +512,7 @@ def test_concat_merge_variables_present_in_some_datasets(self, data) -> None:
         expected = data.copy().assign(foo=(["dim1", "bar"], foo))
         assert_identical(expected, actual)
 
+    @pytest.mark.parametrize("data", [False], indirect=["data"])
     def test_concat_2(self, data) -> None:
         dim = "dim2"
         datasets = [g.squeeze(dim) for _, g in data.groupby(dim, squeeze=False)]
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index e2a64964775..a948fafc815 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -4614,10 +4614,12 @@ def test_to_and_from_dataframe(self) -> None:
         x = np.random.randn(10)
         y = np.random.randn(10)
         t = list("abcdefghij")
-        ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
+        cat = pd.Categorical(["a", "b"] * 5)
+        ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t), "cat": ("t", cat)})
         expected = pd.DataFrame(
             np.array([x, y]).T, columns=["a", "b"], index=pd.Index(t, name="t")
         )
+        expected["cat"] = cat
         actual = ds.to_dataframe()
         # use the .equals method to check all DataFrame metadata
         assert expected.equals(actual), (expected, actual)
@@ -4628,23 +4630,31 @@ def test_to_and_from_dataframe(self) -> None:
 
         # check roundtrip
         assert_identical(ds, Dataset.from_dataframe(actual))
-
+        assert isinstance(ds["cat"].variable.data.dtype, pd.CategoricalDtype)
         # test a case with a MultiIndex
         w = np.random.randn(2, 3)
-        ds = Dataset({"w": (("x", "y"), w)})
+        cat = pd.Categorical(["a", "a", "c"])
+        ds = Dataset({"w": (("x", "y"), w), "cat": ("y", cat)})
         ds["y"] = ("y", list("abc"))
         exp_index = pd.MultiIndex.from_arrays(
             [[0, 0, 0, 1, 1, 1], ["a", "b", "c", "a", "b", "c"]], names=["x", "y"]
         )
-        expected = pd.DataFrame(w.reshape(-1), columns=["w"], index=exp_index)
+        expected = pd.DataFrame(
+            {"w": w.reshape(-1), "cat": pd.Categorical(["a", "a", "c", "a", "a", "c"])},
+            index=exp_index,
+        )
         actual = ds.to_dataframe()
         assert expected.equals(actual)
 
         # check roundtrip
+        # from_dataframe attempts to broadcast across because it doesn't know better, so cat must be converted
+        ds["cat"] = (("x", "y"), np.stack((ds["cat"].to_numpy(), ds["cat"].to_numpy())))
         assert_identical(ds.assign_coords(x=[0, 1]), Dataset.from_dataframe(actual))
 
         # Check multiindex reordering
         new_order = ["x", "y"]
+        # revert broadcasting fix above for 1d arrays
+        ds["cat"] = ("y", cat)
         actual = ds.to_dataframe(dim_order=new_order)
         assert expected.equals(actual)
 
@@ -4653,7 +4663,11 @@ def test_to_and_from_dataframe(self) -> None:
             [["a", "a", "b", "b", "c", "c"], [0, 1, 0, 1, 0, 1]], names=["y", "x"]
         )
         expected = pd.DataFrame(
-            w.transpose().reshape(-1), columns=["w"], index=exp_index
+            {
+                "w": w.transpose().reshape(-1),
+                "cat": pd.Categorical(["a", "a", "a", "a", "c", "c"]),
+            },
+            index=exp_index,
         )
         actual = ds.to_dataframe(dim_order=new_order)
         assert expected.equals(actual)
@@ -4706,7 +4720,7 @@ def test_to_and_from_dataframe(self) -> None:
         expected = pd.DataFrame([[]], index=idx)
         assert expected.equals(actual), (expected, actual)
 
-    def test_from_dataframe_categorical(self) -> None:
+    def test_from_dataframe_categorical_index(self) -> None:
         cat = pd.CategoricalDtype(
             categories=["foo", "bar", "baz", "qux", "quux", "corge"]
         )
@@ -4721,7 +4735,7 @@ def test_from_dataframe_categorical(self) -> None:
         assert len(ds["i1"]) == 2
         assert len(ds["i2"]) == 2
 
-    def test_from_dataframe_categorical_string_categories(self) -> None:
+    def test_from_dataframe_categorical_index_string_categories(self) -> None:
         cat = pd.CategoricalIndex(
             pd.Categorical.from_codes(
                 np.array([1, 1, 0, 2]),
@@ -5449,18 +5463,22 @@ def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None:
         assert list(actual) == expected
 
     def test_reduce_non_numeric(self) -> None:
-        data1 = create_test_data(seed=44)
+        data1 = create_test_data(seed=44, use_extension_array=True)
         data2 = create_test_data(seed=44)
-        add_vars = {"var4": ["dim1", "dim2"], "var5": ["dim1"]}
+        add_vars = {"var5": ["dim1", "dim2"], "var6": ["dim1"]}
         for v, dims in sorted(add_vars.items()):
             size = tuple(data1.sizes[d] for d in dims)
             data = np.random.randint(0, 100, size=size).astype(np.str_)
             data1[v] = (dims, data, {"foo": "variable"})
-
-        assert "var4" not in data1.mean() and "var5" not in data1.mean()
+        # var4 is extension array categorical and should be dropped
+        assert (
+            "var4" not in data1.mean()
+            and "var5" not in data1.mean()
+            and "var6" not in data1.mean()
+        )
         assert_equal(data1.mean(), data2.mean())
         assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1"))
-        assert "var4" not in data1.mean(dim="dim2") and "var5" in data1.mean(dim="dim2")
+        assert "var5" not in data1.mean(dim="dim2") and "var6" in data1.mean(dim="dim2")
 
     @pytest.mark.filterwarnings(
         "ignore:Once the behaviour of DataArray:DeprecationWarning"
diff --git a/xarray/datatree_/datatree/tests/test_mapping.py b/xarray/tests/test_datatree_mapping.py
similarity index 98%
rename from xarray/datatree_/datatree/tests/test_mapping.py
rename to xarray/tests/test_datatree_mapping.py
index c6cd04887c0..16ca726759d 100644
--- a/xarray/datatree_/datatree/tests/test_mapping.py
+++ b/xarray/tests/test_datatree_mapping.py
@@ -1,9 +1,13 @@
 import numpy as np
 import pytest
-import xarray as xr
 
+import xarray as xr
 from xarray.core.datatree import DataTree
-from xarray.datatree_.datatree.mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree
+from xarray.core.datatree_mapping import (
+    TreeIsomorphismError,
+    check_isomorphic,
+    map_over_subtree,
+)
 from xarray.datatree_.datatree.testing import assert_equal
 
 empty = xr.Dataset()
@@ -12,7 +16,7 @@
 class TestCheckTreesIsomorphic:
     def test_not_a_tree(self):
         with pytest.raises(TypeError, match="not a tree"):
-            check_isomorphic("s", 1)
+            check_isomorphic("s", 1)  # type: ignore[arg-type]
 
     def test_different_widths(self):
         dt1 = DataTree.from_dict(d={"a": empty})
@@ -69,7 +73,7 @@ def test_not_isomorphic_complex_tree(self, create_test_datatree):
     def test_checking_from_root(self, create_test_datatree):
         dt1 = create_test_datatree()
         dt2 = create_test_datatree()
-        real_root = DataTree(name="real root")
+        real_root: DataTree = DataTree(name="real root")
         dt2.name = "not_real_root"
         dt2.parent = real_root
         with pytest.raises(TreeIsomorphismError):
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index df1ab1f40f9..26821c69495 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -27,6 +27,7 @@
     timedelta_to_numeric,
     where,
 )
+from xarray.core.extension_array import PandasExtensionArray
 from xarray.namedarray.pycompat import array_type
 from xarray.testing import assert_allclose, assert_equal, assert_identical
 from xarray.tests import (
@@ -38,11 +39,55 @@
     requires_bottleneck,
     requires_cftime,
     requires_dask,
+    requires_pyarrow,
 )
 
 dask_array_type = array_type("dask")
 
 
+@pytest.fixture
+def categorical1():
+    return pd.Categorical(["cat1", "cat2", "cat2", "cat1", "cat2"])
+
+
+@pytest.fixture
+def categorical2():
+    return pd.Categorical(["cat2", "cat1", "cat2", "cat3", "cat1"])
+
+
+try:
+    import pyarrow as pa
+
+    @pytest.fixture
+    def arrow1():
+        return pd.arrays.ArrowExtensionArray(
+            pa.array([{"x": 1, "y": True}, {"x": 2, "y": False}])
+        )
+
+    @pytest.fixture
+    def arrow2():
+        return pd.arrays.ArrowExtensionArray(
+            pa.array([{"x": 3, "y": False}, {"x": 4, "y": True}])
+        )
+
+except ImportError:
+    pass
+
+
+@pytest.fixture
+def int1():
+    return pd.arrays.IntegerArray(
+        np.array([1, 2, 3, 4, 5]), np.array([True, False, False, True, True])
+    )
+
+
+@pytest.fixture
+def int2():
+    return pd.arrays.IntegerArray(
+        np.array([6, 7, 8, 9, 10]), np.array([True, True, False, True, False])
+    )
+
+
 class TestOps:
     @pytest.fixture(autouse=True)
     def setUp(self):
@@ -119,6 +164,51 @@ def test_where_type_promotion(self):
         assert result.dtype == np.float32
         assert_array_equal(result, np.array([1, np.nan], dtype=np.float32))
 
+    def test_where_extension_duck_array(self, categorical1, categorical2):
+        where_res = where(
+            np.array([True, False, True, False, False]),
+            PandasExtensionArray(categorical1),
+            PandasExtensionArray(categorical2),
+        )
+        assert isinstance(where_res, PandasExtensionArray)
+        assert (
+            where_res == pd.Categorical(["cat1", "cat1", "cat2", "cat3", "cat1"])
+        ).all()
+
+    def test_concatenate_extension_duck_array(self, categorical1, categorical2):
+        concate_res = concatenate(
+            [PandasExtensionArray(categorical1), PandasExtensionArray(categorical2)]
+        )
+        assert isinstance(concate_res, PandasExtensionArray)
+        assert (
+            concate_res
+            == type(categorical1)._concat_same_type((categorical1, categorical2))
+        ).all()
+
+    @requires_pyarrow
+    def test_duck_extension_array_pyarrow_concatenate(self, arrow1, arrow2):
+        concatenated = concatenate(
+            (PandasExtensionArray(arrow1), PandasExtensionArray(arrow2))
+        )
+        assert concatenated[2]["x"] == 3
+        assert concatenated[3]["y"]
+
+    def test___getitem__extension_duck_array(self, categorical1):
+        extension_duck_array = PandasExtensionArray(categorical1)
+        assert (extension_duck_array[0:2] == categorical1[0:2]).all()
+        assert isinstance(extension_duck_array[0:2], PandasExtensionArray)
+        assert extension_duck_array[0] == categorical1[0]
+        assert isinstance(extension_duck_array[0], PandasExtensionArray)
+        mask = [True, False, True, False, True]
+        assert (extension_duck_array[mask] == categorical1[mask]).all()
+
+    def test__setitem__extension_duck_array(self, categorical1):
+        extension_duck_array = PandasExtensionArray(categorical1)
+        extension_duck_array[2] = "cat1"  # already existing category
+        assert extension_duck_array[2] == "cat1"
+        with pytest.raises(TypeError, match="Cannot setitem on a Categorical"):
+            extension_duck_array[2] = "cat4"  # new category
+
     def test_stack_type_promotion(self):
         result = stack([1, "b"])
         assert_array_equal(result, np.array([1, "b"], dtype=object))
@@ -932,3 +1022,21 @@ def test_push_dask():
                 dask.array.from_array(array, chunks=(1, 2, 3, 2, 2, 1, 1)), axis=0, n=n
             )
         np.testing.assert_equal(actual, expected)
+
+
+def test_duck_extension_array_equality(categorical1, int1):
+    int_duck_array = PandasExtensionArray(int1)
+    categorical_duck_array = PandasExtensionArray(categorical1)
+    assert (int_duck_array != categorical_duck_array).all()
+    assert (categorical_duck_array == categorical1).all()
+    assert (int_duck_array[0:2] == int1[0:2]).all()
+
+
+def test_duck_extension_array_repr(int1):
+    int_duck_array = PandasExtensionArray(int1)
+    assert repr(int1) in repr(int_duck_array)
+
+
+def test_duck_extension_array_attr(int1):
+    int_duck_array = PandasExtensionArray(int1)
+    assert (~int_duck_array.fillna(10)).all()
diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py
index 6540406e914..ada7f75b21b 100644
--- a/xarray/tests/test_formatting_html.py
+++ b/xarray/tests/test_formatting_html.py
@@ -7,6 +7,7 @@
 import xarray as xr
 from xarray.core import formatting_html as fh
 from xarray.core.coordinates import Coordinates
+from xarray.core.datatree import DataTree
 
 
 @pytest.fixture
@@ -196,3 +197,197 @@ def test_nonstr_variable_repr_html() -> None:
         html = v._repr_html_().strip()
     assert "<dt><span>22 :</span></dt><dd>bar</dd>" in html
     assert "<li><span>10</span>: 3</li></ul>" in html
+
+
+@pytest.fixture(scope="module", params=["some html", "some other html"])
+def repr(request):
+    return request.param
+
+
+class Test_summarize_datatree_children:
+    """
+    Unit tests for summarize_datatree_children.
+    """
+
+    func = staticmethod(fh.summarize_datatree_children)
+
+    @pytest.fixture(scope="class")
+    def childfree_tree_factory(self):
+        """
+        Fixture for a child-free DataTree factory.
+        """
+        from random import randint
+
+        def _childfree_tree_factory():
+            return DataTree(
+                data=xr.Dataset({"z": ("y", [randint(1, 100) for _ in range(3)])})
+            )
+
+        return _childfree_tree_factory
+
+    @pytest.fixture(scope="class")
+    def childfree_tree(self, childfree_tree_factory):
+        """
+        Fixture for a child-free DataTree.
+        """
+        return childfree_tree_factory()
+
+    @pytest.fixture(scope="function")
+    def mock_datatree_node_repr(self, monkeypatch):
+        """
+        Apply mocking for datatree_node_repr.
+        """
+
+        def mock(group_title, dt):
+            """
+            Mock with a simple result
+            """
+            return group_title + " " + str(id(dt))
+
+        monkeypatch.setattr(fh, "datatree_node_repr", mock)
+
+    @pytest.fixture(scope="function")
+    def mock_wrap_datatree_repr(self, monkeypatch):
+        """
+        Apply mocking for _wrap_datatree_repr.
+        """
+
+        def mock(r, *, end, **kwargs):
+            """
+            Mock by appending "end" or "not end".
+            """
+            return r + " " + ("end" if end else "not end") + "//"
+
+        monkeypatch.setattr(fh, "_wrap_datatree_repr", mock)
+
+    def test_empty_mapping(self):
+        """
+        Test with an empty mapping of children.
+        """
+        children: dict[str, DataTree] = {}
+        assert self.func(children) == (
+            "<div style='display: inline-grid; grid-template-columns: 100%; grid-column: 1 / -1'>"
+            "</div>"
+        )
+
+    def test_one_child(
+        self, childfree_tree, mock_wrap_datatree_repr, mock_datatree_node_repr
+    ):
+        """
+        Test with one child.
+
+        Uses a mock of _wrap_datatree_repr and _datatree_node_repr to essentially mock
+        the inline lambda function "lines_callback".
+        """
+        # Create mapping of children
+        children = {"a": childfree_tree}
+
+        # Expect first line to be produced from the first child, and
+        # wrapped as the last child
+        first_line = f"a {id(children['a'])} end//"
+
+        assert self.func(children) == (
+            "<div style='display: inline-grid; grid-template-columns: 100%; grid-column: 1 / -1'>"
+            f"{first_line}"
+            "</div>"
+        )
+
+    def test_two_children(
+        self, childfree_tree_factory, mock_wrap_datatree_repr, mock_datatree_node_repr
+    ):
+        """
+        Test with two level deep children.
+
+        Uses a mock of _wrap_datatree_repr and datatree_node_repr to essentially mock
+        the inline lambda function "lines_callback".
+        """
+
+        # Create mapping of children
+        children = {"a": childfree_tree_factory(), "b": childfree_tree_factory()}
+
+        # Expect first line to be produced from the first child, and
+        # wrapped as _not_ the last child
+        first_line = f"a {id(children['a'])} not end//"
+
+        # Expect second line to be produced from the second child, and
+        # wrapped as the last child
+        second_line = f"b {id(children['b'])} end//"
+
+        assert self.func(children) == (
+            "<div style='display: inline-grid; grid-template-columns: 100%; grid-column: 1 / -1'>"
+            f"{first_line}"
+            f"{second_line}"
+            "</div>"
+        )
+
+
+class Test__wrap_datatree_repr:
+    """
+    Unit tests for _wrap_datatree_repr.
+    """
+
+    func = staticmethod(fh._wrap_datatree_repr)
+
+    def test_end(self, repr):
+        """
+        Test with end=True.
+        """
+        r = self.func(repr, end=True)
+        assert r == (
+            "<div style='display: inline-grid; grid-template-columns: 0px 20px auto; width: 100%;'>"
+            "<div style='"
+            "grid-column-start: 1;"
+            "border-right: 0.2em solid;"
+            "border-color: var(--xr-border-color);"
+            "height: 1.2em;"
+            "width: 0px;"
+            "'>"
+            "</div>"
+            "<div style='"
+            "grid-column-start: 2;"
+            "grid-row-start: 1;"
+            "height: 1em;"
+            "width: 20px;"
+            "border-bottom: 0.2em solid;"
+            "border-color: var(--xr-border-color);"
+            "'>"
+            "</div>"
+            "<div style='"
+            "grid-column-start: 3;"
+            "'>"
+            f"{repr}"
+            "</div>"
+            "</div>"
+        )
+
+    def test_not_end(self, repr):
+        """
+        Test with end=False.
+        """
+        r = self.func(repr, end=False)
+        assert r == (
+            "<div style='display: inline-grid; grid-template-columns: 0px 20px auto; width: 100%;'>"
+            "<div style='"
+            "grid-column-start: 1;"
+            "border-right: 0.2em solid;"
+            "border-color: var(--xr-border-color);"
+            "height: 100%;"
+            "width: 0px;"
+            "'>"
+            "</div>"
+            "<div style='"
+            "grid-column-start: 2;"
+            "grid-row-start: 1;"
+            "height: 1em;"
+            "width: 20px;"
+            "border-bottom: 0.2em solid;"
+            "border-color: var(--xr-border-color);"
+            "'>"
+            "</div>"
+            "<div style='"
+            "grid-column-start: 3;"
+            "'>"
+            f"{repr}"
+            "</div>"
+            "</div>"
+        )
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 5adc41fcfdf..90f385e7621 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -36,6 +36,7 @@ def dataset() -> xr.Dataset:
         {
             "foo": (("x", "y", "z"), np.random.randn(3, 4, 2)),
             "baz": ("x", ["e", "f", "g"]),
+            "cat": ("y", pd.Categorical(["cat1", "cat2", "cat2", "cat1"])),
         },
         {"x": ("x", ["a", "b", "c"], {"name": "x"}), "y": [1, 2, 3, 4], "z": [1, 2]},
     )
@@ -80,6 +81,7 @@ def test_groupby_dims_property(dataset, recwarn) -> None:
     )
     assert len(recwarn) == 0
 
+    dataset = dataset.drop_vars(["cat"])
     stacked = dataset.stack({"xy": ("x", "y")})
     assert tuple(stacked.groupby("xy", squeeze=False).dims) == tuple(
         stacked.isel(xy=[0]).dims
@@ -92,7 +94,7 @@ def test_groupby_sizes_property(dataset) -> None:
         assert dataset.groupby("x").sizes == dataset.isel(x=1).sizes
     with pytest.warns(UserWarning, match="The `squeeze` kwarg"):
         assert dataset.groupby("y").sizes == dataset.isel(y=1).sizes
-
+    dataset = dataset.drop("cat")
     stacked = dataset.stack({"xy": ("x", "y")})
     with pytest.warns(UserWarning, match="The `squeeze` kwarg"):
         assert stacked.groupby("xy").sizes == stacked.isel(xy=0).sizes
@@ -762,6 +764,8 @@ def test_groupby_getitem(dataset) -> None:
         assert_identical(dataset.foo.sel(x="a"), dataset.foo.groupby("x")["a"])
     with pytest.warns(UserWarning, match="The `squeeze` kwarg"):
         assert_identical(dataset.foo.sel(z=1), dataset.foo.groupby("z")[1])
+    with pytest.warns(UserWarning, match="The `squeeze` kwarg"):
+        assert_identical(dataset.cat.sel(y=1), dataset.cat.groupby("y")[1])
 
     assert_identical(dataset.sel(x=["a"]), dataset.groupby("x", squeeze=False)["a"])
     assert_identical(dataset.sel(z=[1]), dataset.groupby("z", squeeze=False)[1])
@@ -771,6 +775,12 @@ def test_groupby_getitem(dataset) -> None:
     )
     assert_identical(dataset.foo.sel(z=[1]), dataset.foo.groupby("z", squeeze=False)[1])
 
+    assert_identical(dataset.cat.sel(y=[1]), dataset.cat.groupby("y", squeeze=False)[1])
+    with pytest.raises(
+        NotImplementedError, match="Cannot broadcast 1d-only pandas categorical array."
+    ):
+        dataset.groupby("boo", squeeze=False)
+    dataset = dataset.drop_vars(["cat"])
     actual = (
         dataset.groupby("boo", squeeze=False)["f"].unstack().transpose("x", "y", "z")
     )
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index c6597d5abb0..52935e9714e 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -37,7 +37,7 @@ def test_merge_arrays(self):
         assert_identical(actual, expected)
 
     def test_merge_datasets(self):
-        data = create_test_data(add_attrs=False)
+        data = create_test_data(add_attrs=False, use_extension_array=True)
 
         actual = xr.merge([data[["var1"]], data[["var2"]]])
         expected = data[["var1", "var2"]]
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index d9289aa6674..8a9345e74d4 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1576,6 +1576,20 @@ def test_transpose_0d(self):
             actual = variable.transpose()
             assert_identical(actual, variable)
 
+    def test_pandas_cateogrical_dtype(self):
+        data = pd.Categorical(np.arange(10, dtype="int64"))
+        v = self.cls("x", data)
+        print(v)  # should not error
+        assert pd.api.types.is_extension_array_dtype(v.dtype)
+
+    def test_pandas_cateogrical_no_chunk(self):
+        data = pd.Categorical(np.arange(10, dtype="int64"))
+        v = self.cls("x", data)
+        with pytest.raises(
+            ValueError, match=r".*was found to be a Pandas ExtensionArray.*"
+        ):
+            v.chunk((5,))
+
     def test_squeeze(self):
         v = Variable(["x", "y"], [[1]])
         assert_identical(Variable([], 1), v.squeeze())
@@ -2373,6 +2387,11 @@ def test_multiindex(self):
     def test_pad(self, mode, xr_arg, np_arg):
         super().test_pad(mode, xr_arg, np_arg)
 
+    def test_pandas_cateogrical_dtype(self):
+        data = pd.Categorical(np.arange(10, dtype="int64"))
+        with pytest.raises(ValueError, match="was found to be a Pandas ExtensionArray"):
+            self.cls("x", data)
+
 
 @requires_sparse
 class TestVariableWithSparse: