pandas-dev · TomAugspurger · Apr 23, 2018 · Apr 23, 2018 · jreback · Apr 24, 2018
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -249,7 +249,7 @@ class MethodLookup(object):
     goal_time = 0.2
 
     def setup_cache(self):
-        s = Series()
+        s = Series(dtype='float64')
         return s
 
     def time_lookup_iloc(self, s):

diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
@@ -196,17 +196,17 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0.
 
 .. ipython:: python
 
-   pd.Series([np.nan]).sum()
-   
-   pd.Series([]).sum()
+   pd.Series([np.nan], dtype='float').sum()
+
+   pd.Series([], dtype='float').sum()
 
 The product of an empty or all-NA Series or column of a DataFrame is 1.
 
 .. ipython:: python
 
-   pd.Series([np.nan]).prod()
-   
-   pd.Series([]).prod()
+   pd.Series([np.nan], dtype='float').prod()
+
+   pd.Series([], dtype='float').prod()
 
 
 NA values in GroupBy

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -705,7 +705,7 @@ A ``Series`` will now correctly promote its dtype for assignment with incompat v
 
 .. ipython:: python
 
-   s = pd.Series()
+   s = pd.Series(dtype='float')
 
 **Previous behavior**:
 

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -411,14 +411,14 @@ Note that this also changes the sum of an empty ``Series``. Previously this alwa
 
 .. code-block:: ipython
 
-   In [1]: pd.Series([]).sum()
+   In [1]: pd.Series([], dtype='float').sum()
    Out[1]: 0
 
 but for consistency with the all-NaN case, this was changed to return NaN as well:
 
 .. ipython:: python
 
-   pd.Series([]).sum()
+   pd.Series([], dtype='float').sum()
 
 
 .. _whatsnew_0210.api_breaking.loc:

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -38,7 +38,7 @@ The default sum for empty or all-*NA* ``Series`` is now ``0``.
 
 .. code-block:: ipython
 
-   In [1]: pd.Series([]).sum()
+   In [1]: pd.Series([], dtype='float').sum()
    Out[1]: nan
 
    In [2]: pd.Series([np.nan]).sum()
@@ -48,7 +48,7 @@ The default sum for empty or all-*NA* ``Series`` is now ``0``.
 
 .. ipython:: python
 
-   pd.Series([]).sum()
+   pd.Series([], dtype='float').sum()
    pd.Series([np.nan]).sum()
 
 The default behavior is the same as pandas 0.20.3 with bottleneck installed. It
@@ -60,7 +60,7 @@ keyword.
 
 .. ipython:: python
 
-   pd.Series([]).sum(min_count=1)
+   pd.Series([], dtype='float').sum(min_count=1)
 
 Thanks to the ``skipna`` parameter, the ``.sum`` on an all-*NA*
 series is conceptually the same as the ``.sum`` of an empty one with
@@ -78,9 +78,9 @@ returning ``1`` instead.
 
 .. ipython:: python
 
-   pd.Series([]).prod()
+   pd.Series([], dtype='float').prod()
    pd.Series([np.nan]).prod()
-   pd.Series([]).prod(min_count=1)
+   pd.Series([], dtype='float').prod(min_count=1)
 
 These changes affect :meth:`DataFrame.sum` and :meth:`DataFrame.prod` as well.
 Finally, a few less obvious places in pandas are affected by this change.

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -862,6 +862,7 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+- The inferred dtype for an empty Series will change from ``float`` to ``object`` (:issue:`17261`).
 - ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`).
 - ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`).
 - ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -324,6 +324,11 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
                 null_mask = isna(values)
                 if null_mask.any():
                     values = [values[idx] for idx in np.where(~null_mask)[0]]
+                    if len(values) == 0:
+                        # This avoids the FutureWarning in sanitize_array about
+                        # inferring float -> object. I suppose that float is
+                        # the correct dtype to infer for an all NaN array.
+                        sanitize_dtype = 'float'
                 values = _sanitize_array(values, None, dtype=sanitize_dtype)
 
         if dtype.categories is None:

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -954,7 +954,11 @@ def _map_values(self, mapper, na_action=None):
                 # we specify the keys here to handle the
                 # possibility that they are tuples
                 from pandas import Series
-                mapper = Series(mapper)
+                if len(mapper) == 0:
+                    mapper_dtype = 'float'
+                else:
+                    mapper_dtype = None
+                mapper = Series(mapper, dtype=mapper_dtype)
 
         if isinstance(mapper, ABCSeries):
             # Since values were input this means we came from either

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5399,7 +5399,11 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
             if self.ndim == 1:
                 if isinstance(value, (dict, ABCSeries)):
                     from pandas import Series
-                    value = Series(value)
+                    if len(value) == 0:
+                        series_dtype = 'float'
+                    else:
+                        series_dtype = None
+                    value = Series(value, dtype=series_dtype)
                 elif not is_list_like(value):
                     pass
                 else:
@@ -9376,13 +9380,13 @@ def _doc_parms(cls):
 --------
 By default, the sum of an empty or all-NA Series is ``0``.
 
->>> pd.Series([]).sum()  # min_count=0 is the default
+>>> pd.Series([], dtype='float').sum()  # min_count=0 is the default
 0.0
 
 This can be controlled with the ``min_count`` parameter. For example, if
 you'd like the sum of an empty series to be NaN, pass ``min_count=1``.
 
->>> pd.Series([]).sum(min_count=1)
+>>> pd.Series([], dtype='float').sum(min_count=1)
 nan
 
 Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
@@ -9400,12 +9404,12 @@ def _doc_parms(cls):
 --------
 By default, the product of an empty or all-NA Series is ``1``
 
->>> pd.Series([]).prod()
+>>> pd.Series([], dtype='float').prod()
 1.0
 
 This can be controlled with the ``min_count`` parameter
 
->>> pd.Series([]).prod(min_count=1)
+>>> pd.Series([], dtype='float').prod(min_count=1)
 nan
 
 Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -42,7 +42,6 @@
     is_datetime64_any_dtype,
     is_datetime64tz_dtype,
     is_timedelta64_dtype,
-    is_hashable,
     needs_i8_conversion,
     is_iterator, is_list_like,
     is_scalar)
@@ -1313,33 +1312,9 @@ def _get_names(self):
         return FrozenList((self.name, ))
 
     def _set_names(self, values, level=None):
-        """
-        Set new names on index. Each name has to be a hashable type.
-
-        Parameters
-        ----------
-        values : str or sequence
-            name(s) to set
-        level : int, level name, or sequence of int/level names (default None)
-            If the index is a MultiIndex (hierarchical), level(s) to set (None
-            for all levels).  Otherwise level must be None
-
-        Raises
-        ------
-        TypeError if each name is not hashable.
-        """
-        if not is_list_like(values):
-            raise ValueError('Names must be a list-like')
         if len(values) != 1:
             raise ValueError('Length of new names must be 1, got %d' %
                              len(values))
-
-        # GH 20527
-        # All items in 'name' need to be hashable:
-        for name in values:
-            if not is_hashable(name):
-                raise TypeError('{}.name must be a hashable type'
-                                .format(self.__class__.__name__))
         self.name = values[0]
 
     names = property(fset=_set_names, fget=_get_names)
@@ -1365,9 +1340,9 @@ def set_names(self, names, level=None, inplace=False):
         Examples
         --------
         >>> Index([1, 2, 3, 4]).set_names('foo')
-        Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
+        Int64Index([1, 2, 3, 4], dtype='int64')
         >>> Index([1, 2, 3, 4]).set_names(['foo'])
-        Int64Index([1, 2, 3, 4], dtype='int64', name='foo')
+        Int64Index([1, 2, 3, 4], dtype='int64')
         >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'),
                                           (2, u'one'), (2, u'two')],
                                           names=['foo', 'bar'])
@@ -1380,7 +1355,6 @@ def set_names(self, names, level=None, inplace=False):
                    labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                    names=[u'baz', u'bar'])
         """
-
         if level is not None and self.nlevels == 1:
             raise ValueError('Level must be None for non-MultiIndex')
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -16,7 +16,6 @@
     _ensure_platform_int,
     is_categorical_dtype,
     is_object_dtype,
-    is_hashable,
     is_iterator,
     is_list_like,
     pandas_dtype,
@@ -635,29 +634,12 @@ def _get_names(self):
 
     def _set_names(self, names, level=None, validate=True):
         """
-        Set new names on index. Each name has to be a hashable type.
-
-        Parameters
-        ----------
-        values : str or sequence
-            name(s) to set
-        level : int, level name, or sequence of int/level names (default None)
-            If the index is a MultiIndex (hierarchical), level(s) to set (None
-            for all levels).  Otherwise level must be None
-        validate : boolean, default True
-            validate that the names match level lengths
-
-        Raises
-        ------
-        TypeError if each name is not hashable.
-
-        Notes
-        -----
         sets names on levels. WARNING: mutates!
 
         Note that you generally want to set this *after* changing levels, so
         that it only acts on copies
         """
+
         # GH 15110
         # Don't allow a single string for names in a MultiIndex
         if names is not None and not is_list_like(names):
@@ -680,20 +662,10 @@ def _set_names(self, names, level=None, validate=True):
 
         # set the name
         for l, name in zip(level, names):
-            if name is not None:
-
-                # GH 20527
-                # All items in 'names' need to be hashable:
-                if not is_hashable(name):
-                    raise TypeError('{}.name must be a hashable type'
-                                    .format(self.__class__.__name__))
-
-                if name in used:
-                    raise ValueError(
-                        'Duplicated level name: "{}", assigned to '
-                        'level {}, is already used for level '
-                        '{}.'.format(name, l, used[name]))
-
+            if name is not None and name in used:
+                raise ValueError('Duplicated level name: "{}", assigned to '
+                                 'level {}, is already used for level '
+                                 '{}.'.format(name, l, used[name]))
             self.levels[l].rename(name, inplace=True)
             used[name] = l
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4084,20 +4084,30 @@ def _try_cast(arr, take_fast_path):
             subarr = data.copy()
         return subarr
 
-    elif isinstance(data, (list, tuple)) and len(data) > 0:
-        if dtype is not None:
-            try:
-                subarr = _try_cast(data, False)
-            except Exception:
-                if raise_cast_failure:  # pragma: no cover
-                    raise
-                subarr = np.array(data, dtype=object, copy=copy)
-                subarr = lib.maybe_convert_objects(subarr)
+    elif isinstance(data, (list, tuple)):
+        if len(data) > 0:
+            if dtype is not None:
+                try:
+                    subarr = _try_cast(data, False)
+                except Exception:
+                    if raise_cast_failure:  # pragma: no cover
+                        raise
+                    subarr = np.array(data, dtype=object, copy=copy)
+                    subarr = lib.maybe_convert_objects(subarr)
 
+            else:
+                subarr = maybe_convert_platform(data)
+            subarr = maybe_cast_to_datetime(subarr, dtype)
         else:
-            subarr = maybe_convert_platform(data)
-
-        subarr = maybe_cast_to_datetime(subarr, dtype)
+            # subarr = np.array([], dtype=dtype or 'object')
+            if dtype is None:
+                msg = ("Inferring 'float' dtype for a length-zero array.\n"
+                       "In a future version of pandas this will change to "
+                       "'object' dtype.\n\tTo maintain the previous behavior, "
+                       "use 'dtype=float'.\n\tTo adopt the new behavior, use "
+                       "'dtype=object'.")
+                warnings.warn(msg, FutureWarning, stacklevel=3)
+            subarr = _try_cast(data, False)
 
     elif isinstance(data, range):
         # GH 16804

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -58,7 +58,7 @@ def _maybe_cache(arg, format, cache, tz, convert_listlike):
         Cache of converted, unique dates. Can be empty
     """
     from pandas import Series
-    cache_array = Series()
+    cache_array = Series(dtype='float')
     if cache:
         # Perform a quicker unique check
         from pandas import Index

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -1656,7 +1656,7 @@ def maybe_color_bp(self, bp):
     def _make_plot(self):
         if self.subplots:
             from pandas.core.series import Series
-            self._return_obj = Series()
+            self._return_obj = Series(dtype='float')
 
             for i, (label, y) in enumerate(self._iter_data()):
                 ax = self._get_ax(i)
@@ -2603,7 +2603,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
         axes = _flatten(axes)
 
         from pandas.core.series import Series
-        ret = Series()
+        ret = Series(dtype='float')
         for (key, group), ax in zip(grouped, axes):
             d = group.boxplot(ax=ax, column=column, fontsize=fontsize,
                               rot=rot, grid=grid, **kwds)

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -1173,7 +1173,7 @@ def test_is_scalar_pandas_scalars(self):
         assert is_scalar(DateOffset(days=1))
 
     def test_is_scalar_pandas_containers(self):
-        assert not is_scalar(Series())
+        assert not is_scalar(Series(dtype='float'))
         assert not is_scalar(Series([1]))
         assert not is_scalar(DataFrame())
         assert not is_scalar(DataFrame([[1]]))

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -1252,7 +1252,8 @@ def test_isin(self):
         expected = DataFrame([df.loc[s].isin(other) for s in df.index])
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize("empty", [[], Series(), np.array([])])
+    @pytest.mark.parametrize("empty", [[], Series(dtype='float'),
+                             np.array([])])
     def test_isin_empty(self, empty):
         # see gh-16991
         df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']})