REF: Pieces broken off of pandas-dev#24024 (pandas-dev#24364)

Pingviinituutti · Feb 28, 2019 · dd6c082 · dd6c082
1 parent b167e1d
commit dd6c082
Show file tree

Hide file tree

Showing 13 changed files with 112 additions and 52 deletions.
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -296,9 +296,22 @@ def __iter__(self):
 
     @property
     def asi8(self):
+        # type: () -> ndarray
+        """
+        Integer representation of the values.
+
+        Returns
+        -------
+        ndarray
+            An ndarray with int64 dtype.
+        """
         # do not cache or you'll create a memory leak
         return self._data.view('i8')
 
+    @property
+    def _ndarray_values(self):
+        return self._data
+
     # ----------------------------------------------------------------
     # Rendering Methods
 
@@ -469,7 +482,7 @@ def _isnan(self):
         return (self.asi8 == iNaT)
 
     @property  # NB: override with cache_readonly in immutable subclasses
-    def hasnans(self):
+    def _hasnans(self):
         """
         return if I have any nans; enables various perf speedups
         """
@@ -493,7 +506,7 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None):
         This is an internal routine
         """
 
-        if self.hasnans:
+        if self._hasnans:
             if convert:
                 result = result.astype(convert)
             if fill_value is None:
@@ -696,7 +709,7 @@ def _add_delta_tdi(self, other):
         new_values = checked_add_with_arr(self_i8, other_i8,
                                           arr_mask=self._isnan,
                                           b_mask=other._isnan)
-        if self.hasnans or other.hasnans:
+        if self._hasnans or other._hasnans:
             mask = (self._isnan) | (other._isnan)
             new_values[mask] = iNaT
         return new_values.view('i8')
@@ -764,7 +777,7 @@ def _sub_period_array(self, other):
                                           b_mask=other._isnan)
 
         new_values = np.array([self.freq.base * x for x in new_values])
-        if self.hasnans or other.hasnans:
+        if self._hasnans or other._hasnans:
             mask = (self._isnan) | (other._isnan)
             new_values[mask] = NaT
         return new_values
@@ -1085,7 +1098,7 @@ def _evaluate_compare(self, other, op):
             elif lib.is_scalar(lib.item_from_zerodim(other)):
                 # ndarray scalar
                 other = [other.item()]
-            other = type(self)(other)
+            other = type(self)._from_sequence(other)
 
         # compare
         result = op(self.asi8, other.asi8)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -115,7 +115,7 @@ def wrapper(self, other):
         else:
             if isinstance(other, list):
                 try:
-                    other = type(self)(other)
+                    other = type(self)._from_sequence(other)
                 except ValueError:
                     other = np.array(other, dtype=np.object_)
             elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries,
@@ -147,7 +147,7 @@ def wrapper(self, other):
             if o_mask.any():
                 result[o_mask] = nat_result
 
-        if self.hasnans:
+        if self._hasnans:
             result[self._isnan] = nat_result
 
         return result
@@ -349,14 +349,32 @@ def _box_func(self):
 
     @property
     def dtype(self):
+        # type: () -> Union[np.dtype, DatetimeTZDtype]
+        """
+        The dtype for the DatetimeArray.
+
+        Returns
+        -------
+        numpy.dtype or DatetimeTZDtype
+            If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
+            is returned.
+
+            If the values are tz-aware, then the ``DatetimeTZDtype``
+            is returned.
+        """
         if self.tz is None:
             return _NS_DTYPE
         return DatetimeTZDtype('ns', self.tz)
 
     @property
     def tz(self):
         """
-        Return timezone.
+        Return timezone, if any.
+
+        Returns
+        -------
+        datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
+             Returns None when the array is tz-naive.
         """
         # GH 18595
         return self._tz
@@ -534,7 +552,7 @@ def _sub_datetime_arraylike(self, other):
         other_i8 = other.asi8
         new_values = checked_add_with_arr(self_i8, -other_i8,
                                           arr_mask=self._isnan)
-        if self.hasnans or other.hasnans:
+        if self._hasnans or other._hasnans:
             mask = (self._isnan) | (other._isnan)
             new_values[mask] = iNaT
         return new_values.view('timedelta64[ns]')

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -84,7 +84,7 @@ def wrapper(self, other):
             other = Period(other, freq=self.freq)
             result = op(other.ordinal)
 
-        if self.hasnans:
+        if self._hasnans:
             result[self._isnan] = nat_result
 
         return result
@@ -499,7 +499,7 @@ def _time_shift(self, n, freq=None):
                             "{cls}._time_shift"
                             .format(cls=type(self).__name__))
         values = self.asi8 + n * self.freq.n
-        if self.hasnans:
+        if self._hasnans:
             values[self._isnan] = iNaT
         return type(self)(values, freq=self.freq)
 
@@ -561,7 +561,7 @@ def asfreq(self, freq=None, how='E'):
 
         new_data = period_asfreq_arr(ordinal, base1, base2, end)
 
-        if self.hasnans:
+        if self._hasnans:
             new_data[self._isnan] = iNaT
 
         return type(self)(new_data, freq=freq)
@@ -581,7 +581,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
         else:
             formatter = lambda dt: u'%s' % dt
 
-        if self.hasnans:
+        if self._hasnans:
             mask = self._isnan
             values[mask] = na_rep
             imask = ~mask
@@ -668,7 +668,7 @@ def _sub_period(self, other):
         new_data = asi8 - other.ordinal
         new_data = np.array([self.freq * x for x in new_data])
 
-        if self.hasnans:
+        if self._hasnans:
             new_data[self._isnan] = NaT
 
         return new_data
@@ -983,7 +983,7 @@ def dt64arr_to_periodarr(data, freq, tz=None):
 
     """
     if data.dtype != np.dtype('M8[ns]'):
-        raise ValueError('Wrong dtype: %s' % data.dtype)
+        raise ValueError('Wrong dtype: {dtype}'.format(dtype=data.dtype))
 
     if freq is None:
         if isinstance(data, ABCIndexClass):

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -54,7 +54,7 @@ def _field_accessor(name, alias, docstring=None):
     def f(self):
         values = self.asi8
         result = get_timedelta_field(values, alias)
-        if self.hasnans:
+        if self._hasnans:
             result = self._maybe_mask_results(result, fill_value=None,
                                               convert='float64')
 
@@ -102,7 +102,7 @@ def wrapper(self, other):
             if o_mask.any():
                 result[o_mask] = nat_result
 
-        if self.hasnans:
+        if self._hasnans:
             result[self._isnan] = nat_result
 
         return result
@@ -714,7 +714,7 @@ def components(self):
 
         columns = ['days', 'hours', 'minutes', 'seconds',
                    'milliseconds', 'microseconds', 'nanoseconds']
-        hasnans = self.hasnans
+        hasnans = self._hasnans
         if hasnans:
             def f(x):
                 if isna(x):

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -682,7 +682,7 @@ def __array__(self, dtype=None):
         """
         The array interface, return my values.
         """
-        return self._data.view(np.ndarray)
+        return np.asarray(self._data, dtype=dtype)
 
     def __array_wrap__(self, result, context=None):
         """
@@ -739,6 +739,8 @@ def view(self, cls=None):
         Parameters
         ----------
         dtype : numpy dtype or pandas type
+            Note that any integer `dtype` is treated as ``'int64'``,
+            regardless of the sign and size.
         copy : bool, default True
             By default, astype always returns a newly allocated object.
             If copy is set to False and internal requirements on dtype are

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -40,17 +40,22 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
     # override DatetimeLikeArrayMixin method
     copy = Index.copy
     unique = Index.unique
-    take = Index.take
 
     # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
     # properties there.  They can be made into cache_readonly for Index
     # subclasses bc they are immutable
     inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget)
     _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget)
-    hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget)
+    hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget)
+    _hasnans = hasnans  # for index / array -agnostic code
     _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget)
     resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget)
 
+    # A few methods that are shared
+    _maybe_mask_results = DatetimeLikeArrayMixin._maybe_mask_results
+
+    # ------------------------------------------------------------------------
+
     def equals(self, other):
         """
         Determines if two Index objects contain the same elements.

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -227,11 +227,11 @@ def __new__(cls, data=None,
                           "endpoints is deprecated.  Use "
                           "`pandas.date_range` instead.",
                           FutureWarning, stacklevel=2)
-            result = cls._generate_range(start, end, periods,
-                                         freq=freq, tz=tz, normalize=normalize,
-                                         closed=closed, ambiguous=ambiguous)
-            result.name = name
-            return result
+            dtarr = DatetimeArray._generate_range(
+                start, end, periods,
+                freq=freq, tz=tz, normalize=normalize,
+                closed=closed, ambiguous=ambiguous)
+            return cls(dtarr, name=name)
 
         if is_scalar(data):
             raise TypeError("{cls}() must be called with a "
@@ -1473,12 +1473,12 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
     if freq is None and com._any_none(periods, start, end):
         freq = 'D'
 
-    result = DatetimeIndex._generate_range(
+    dtarr = DatetimeArray._generate_range(
         start=start, end=end, periods=periods,
         freq=freq, tz=tz, normalize=normalize,
         closed=closed, **kwargs)
 
-    result.name = name
+    result = DatetimeIndex(dtarr, name=name)
     return result
 
 

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -125,15 +125,6 @@ def _join_i8_wrapper(joinf, **kwargs):
     _left_indexer_unique = _join_i8_wrapper(
         libjoin.left_join_indexer_unique_int64, with_indexers=False)
 
-    # define my properties & methods for delegation
-    _other_ops = []
-    _bool_ops = []
-    _object_ops = ['freq']
-    _field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds']
-    _datetimelike_ops = _field_ops + _object_ops + _bool_ops
-    _datetimelike_methods = ["to_pytimedelta", "total_seconds",
-                             "round", "floor", "ceil"]
-
     _engine_type = libindex.TimedeltaEngine
 
     _comparables = ['name', 'freq']
@@ -143,6 +134,14 @@ def _join_i8_wrapper(joinf, **kwargs):
 
     _freq = None
 
+    _box_func = TimedeltaArray._box_func
+    _bool_ops = TimedeltaArray._bool_ops
+    _object_ops = TimedeltaArray._object_ops
+    _field_ops = TimedeltaArray._field_ops
+    _datetimelike_ops = TimedeltaArray._datetimelike_ops
+    _datetimelike_methods = TimedeltaArray._datetimelike_methods
+    _other_ops = TimedeltaArray._other_ops
+
     # -------------------------------------------------------------------
     # Constructors
 
@@ -163,10 +162,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
                           "endpoints is deprecated.  Use "
                           "`pandas.timedelta_range` instead.",
                           FutureWarning, stacklevel=2)
-            result = cls._generate_range(start, end, periods, freq,
-                                         closed=closed)
-            result.name = name
-            return result
+            tdarr = TimedeltaArray._generate_range(start, end, periods, freq,
+                                                   closed=closed)
+            return cls(tdarr, name=name)
 
         if is_scalar(data):
             raise TypeError('{cls}() must be called with a '
@@ -766,7 +764,6 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
         freq = 'D'
 
     freq, freq_infer = dtl.maybe_infer_freq(freq)
-    result = TimedeltaIndex._generate_range(start, end, periods, freq,
-                                            closed=closed)
-    result.name = name
-    return result
+    tdarr = TimedeltaArray._generate_range(start, end, periods, freq,
+                                           closed=closed)
+    return TimedeltaIndex(tdarr, name=name)
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1592,8 +1592,8 @@ def _right_outer_join(x, y, max_groups):
 def _factorize_keys(lk, rk, sort=True):
     # Some pre-processing for non-ndarray lk / rk
     if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
-        lk = lk.values
-        rk = rk.values
+        lk = lk._data
+        rk = rk._data
 
     elif (is_categorical_dtype(lk) and
             is_categorical_dtype(rk) and

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -171,6 +171,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         - ndarray of Timestamps if box=False
     """
     from pandas import DatetimeIndex
+    from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
     from pandas.core.arrays.datetimes import (
         maybe_convert_dtype, objects_to_datetime64ns)
 
@@ -179,14 +180,14 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
 
     # these are shortcutable
     if is_datetime64tz_dtype(arg):
-        if not isinstance(arg, DatetimeIndex):
+        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
             return DatetimeIndex(arg, tz=tz, name=name)
         if tz == 'utc':
             arg = arg.tz_convert(None).tz_localize(tz)
         return arg
 
     elif is_datetime64_ns_dtype(arg):
-        if box and not isinstance(arg, DatetimeIndex):
+        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
             try:
                 return DatetimeIndex(arg, tz=tz, name=name)
             except ValueError:

diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py
@@ -8,6 +8,17 @@
 import pandas.util.testing as tm
 
 
+class TestTimedeltaArrayConstructor(object):
+    def test_copy(self):
+        data = np.array([1, 2, 3], dtype='m8[ns]')
+        arr = TimedeltaArray(data, copy=False)
+        assert arr._data is data
+
+        arr = TimedeltaArray(data, copy=True)
+        assert arr._data is not data
+        assert arr._data.base is not data
+
+
 class TestTimedeltaArray(object):
     def test_from_sequence_dtype(self):
         msg = r"Only timedelta64\[ns\] dtype is valid"