pandas-dev · jreback · Nov 11, 2018 · Nov 7, 2018 · Nov 7, 2018 · Nov 7, 2018
diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd
@@ -1,9 +1,6 @@
 from util cimport numeric
 
 
-cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil
-
-
 cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
     cdef:
         numeric t

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -15,8 +15,7 @@ from numpy cimport (ndarray,
                     NPY_FLOAT32, NPY_FLOAT64,
                     NPY_OBJECT,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
-                    uint32_t, uint64_t, float32_t, float64_t,
-                    double_t)
+                    uint32_t, uint64_t, float32_t, float64_t)
 cnp.import_array()
 
 
@@ -32,10 +31,9 @@ import missing
 
 cdef float64_t FP_ERR = 1e-13
 
-cdef double NaN = <double>np.NaN
-cdef double nan = NaN
+cdef float64_t NaN = <float64_t>np.NaN
 
-cdef int64_t iNaT = get_nat()
+cdef int64_t NPY_NAT = get_nat()
 
 tiebreakers = {
     'average': TIEBREAK_AVERAGE,
@@ -197,9 +195,10 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
     return result, counts
 
 
+# TODO: redundant with groupby.kth_smallest_c
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil:
+def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
     cdef:
         Py_ssize_t i, j, l, m, n = a.shape[0]
         numeric x
@@ -812,23 +811,23 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
     n = len(arr)
 
     if n == 1:
-        if arr[0] != arr[0] or (timelike and <int64_t>arr[0] == iNaT):
+        if arr[0] != arr[0] or (timelike and <int64_t>arr[0] == NPY_NAT):
             # single value is NaN
             return False, False, True
         else:
             return True, True, True
     elif n < 2:
         return True, True, True
 
-    if timelike and <int64_t>arr[0] == iNaT:
+    if timelike and <int64_t>arr[0] == NPY_NAT:
         return False, False, True
 
     if algos_t is not object:
         with nogil:
             prev = arr[0]
             for i in range(1, n):
                 cur = arr[i]
-                if timelike and <int64_t>cur == iNaT:
+                if timelike and <int64_t>cur == NPY_NAT:
                     is_monotonic_inc = 0
                     is_monotonic_dec = 0
                     break
@@ -853,7 +852,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
         prev = arr[0]
         for i in range(1, n):
             cur = arr[i]
-            if timelike and <int64_t>cur == iNaT:
+            if timelike and <int64_t>cur == NPY_NAT:
                 is_monotonic_inc = 0
                 is_monotonic_dec = 0
                 break

diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
 
 {{endfor}}
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # ensure_dtype
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 
 cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
 

diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
@@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
     {{elif dtype == 'float64'}}
     mask = np.isnan(values)
     {{elif dtype == 'int64'}}
-    mask = values == iNaT
+    mask = values == NPY_NAT
 
-    # create copy in case of iNaT
+    # create copy in case of NPY_NAT
     # values are mutated inplace
     if mask.any():
         values = values.copy()
@@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
             {{if dtype != 'uint64'}}
             isnan = sorted_mask[i]
             if isnan and keep_na:
-                ranks[argsorted[i]] = nan
+                ranks[argsorted[i]] = NaN
                 continue
             {{endif}}
 
@@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
     {{elif dtype == 'float64'}}
     mask = np.isnan(values)
     {{elif dtype == 'int64'}}
-    mask = values == iNaT
+    mask = values == NPY_NAT
     {{endif}}
 
     np.putmask(values, mask, nan_value)
@@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
             {{else}}
             if (val == nan_value) and keep_na:
             {{endif}}
-                ranks[i, argsorted[i, j]] = nan
+                ranks[i, argsorted[i, j]] = NaN
 
                 {{if dtype == 'object'}}
                 infs += 1

diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -4,9 +4,9 @@ Template for each `dtype` helper function for take
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # take_1d, take_2d
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 
 {{py:
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -1,14 +1,13 @@
 # -*- coding: utf-8 -*-
 
-cimport cython
-from cython cimport Py_ssize_t
+import cython
+from cython import Py_ssize_t
 
 from libc.stdlib cimport malloc, free
 
 import numpy as np
 cimport numpy as cnp
 from numpy cimport (ndarray,
-                    double_t,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
                     uint32_t, uint64_t, float32_t, float64_t)
 cnp.import_array()
@@ -20,10 +19,9 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
                     TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
 from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
 
-cdef int64_t iNaT = get_nat()
+cdef int64_t NPY_NAT = get_nat()
 
-cdef double NaN = <double>np.NaN
-cdef double nan = NaN
+cdef float64_t NaN = <float64_t>np.NaN
 
 
 cdef inline float64_t median_linear(float64_t* a, int n) nogil:
@@ -67,13 +65,13 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
     return result
 
 
-# TODO: Is this redundant with algos.kth_smallest?
+# TODO: Is this redundant with algos.kth_smallest
 cdef inline float64_t kth_smallest_c(float64_t* a,
                                      Py_ssize_t k,
                                      Py_ssize_t n) nogil:
     cdef:
         Py_ssize_t i, j, l, m
-        double_t x, t
+        float64_t x, t
 
     l = 0
     m = n - 1
@@ -109,7 +107,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
     cdef:
         Py_ssize_t i, j, N, K, ngroups, size
         ndarray[int64_t] _counts
-        ndarray data
+        ndarray[float64_t, ndim=2] data
         float64_t* ptr
 
     assert min_count == -1, "'min_count' only used in add and prod"
@@ -139,8 +137,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_cumprod_float64(float64_t[:, :] out,
-                          float64_t[:, :] values,
-                          int64_t[:] labels,
+                          const float64_t[:, :] values,
+                          const int64_t[:] labels,
                           bint is_datetimelike,
                           bint skipna=True):
     """
@@ -177,7 +175,7 @@ def group_cumprod_float64(float64_t[:, :] out,
 @cython.wraparound(False)
 def group_cumsum(numeric[:, :] out,
                  numeric[:, :] values,
-                 int64_t[:] labels,
+                 const int64_t[:] labels,
                  is_datetimelike,
                  bint skipna=True):
     """
@@ -217,7 +215,7 @@ def group_cumsum(numeric[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
+def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
                         int ngroups, int periods):
     cdef:
         Py_ssize_t N, i, j, ii
@@ -291,7 +289,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     """
     cdef:
         Py_ssize_t i, N
-        ndarray[int64_t] sorted_labels
+        int64_t[:] sorted_labels
         int64_t idx, curr_fill_idx=-1, filled_vals=0
 
     N = len(out)
@@ -327,10 +325,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_any_all(ndarray[uint8_t] out,
-                  ndarray[int64_t] labels,
-                  ndarray[uint8_t] values,
-                  ndarray[uint8_t] mask,
+def group_any_all(uint8_t[:] out,
+                  const int64_t[:] labels,
+                  const uint8_t[:] values,
+                  const uint8_t[:] mask,
                   object val_test,
                   bint skipna):
     """Aggregated boolean values to show truthfulness of group elements

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
@@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
 cdef extern from "numpy/npy_math.h":
-    double NAN "NPY_NAN"
+    float64_t NAN "NPY_NAN"
 _int64_max = np.iinfo(np.int64).max
 
 # ----------------------------------------------------------------------
@@ -268,16 +268,16 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,
 
 {{endfor}}
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # group_nth, group_last, group_rank
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 
 {{py:
 
 # name, c_type, nan_val
 dtypes = [('float64', 'float64_t', 'NAN'),
           ('float32', 'float32_t', 'NAN'),
-          ('int64', 'int64_t', 'iNaT'),
+          ('int64', 'int64_t', 'NPY_NAT'),
           ('object', 'object', 'NAN')]
 
 def get_dispatch(dtypes):
@@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 # to the result where appropriate
                 if keep_na and mask[_as[i]]:
                     for j in range(i - dups + 1, i + 1):
-                        out[_as[j], 0] = nan
+                        out[_as[j], 0] = NaN
                         grp_na_count = dups
                 elif tiebreak == TIEBREAK_AVERAGE:
                     for j in range(i - dups + 1, i + 1):
@@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out,
     if groupby_t is int64_t:
         # Note: evaluated at compile-time
         maxx[:] = -_int64_max
-        nan_val = iNaT
+        nan_val = NPY_NAT
     else:
         maxx[:] = -np.inf
         nan_val = NAN
@@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out,
     minx = np.empty_like(out)
     if groupby_t is int64_t:
         minx[:] = _int64_max
-        nan_val = iNaT
+        nan_val = NPY_NAT
     else:
         minx[:] = np.inf
         nan_val = NAN
@@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,
 
                 # val = nan
                 if groupby_t is int64_t:
-                    if is_datetimelike and val == iNaT:
-                        out[i, j] = iNaT
+                    if is_datetimelike and val == NPY_NAT:
+                        out[i, j] = NPY_NAT
                     else:
                         mval = accum[lab, j]
                         if val < mval:
@@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out,
                 val = values[i, j]
 
                 if groupby_t is int64_t:
-                    if is_datetimelike and val == iNaT:
-                        out[i, j] = iNaT
+                    if is_datetimelike and val == NPY_NAT:
+                        out[i, j] = NPY_NAT
                     else:
                         mval = accum[lab, j]
                         if val > mval:

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
@@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport ndarray, uint8_t, uint32_t
+from numpy cimport ndarray, uint8_t, uint32_t, float64_t
 cnp.import_array()
 
 cdef extern from "numpy/npy_math.h":
-    double NAN "NPY_NAN"
+    float64_t NAN "NPY_NAN"
 
 
 from khash cimport (
@@ -42,9 +42,7 @@ cimport util
 from missing cimport checknull
 
 
-nan = np.nan
-
-cdef int64_t iNaT = util.get_nat()
+cdef int64_t NPY_NAT = util.get_nat()
 _SIZE_HINT_LIMIT = (1 << 20) + 7
 
 

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -251,9 +251,9 @@ cdef class HashTable:
 {{py:
 
 # name, dtype, float_group, default_na_value
-dtypes = [('Float64', 'float64', True, 'nan'),
+dtypes = [('Float64', 'float64', True, 'np.nan'),
           ('UInt64', 'uint64', False, 0),
-          ('Int64', 'int64', False, 'iNaT')]
+          ('Int64', 'int64', False, 'NPY_NAT')]
 
 }}
 

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash
 from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib
 from pandas._libs.missing import checknull
 
-cdef int64_t iNaT = util.get_nat()
+cdef int64_t NPY_NAT = util.get_nat()
 
 
 cdef inline bint is_definitely_invalid_key(object val):
@@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value):
         elif isinstance(value, (datetime, np.datetime64, date)):
             return Timestamp(value).value
         elif value is None or value != value:
-            return iNaT
+            return NPY_NAT
         elif util.is_string_object(value):
             return Timestamp(value).value
         raise ValueError("cannot set a Timestamp with a non-timestamp")
@@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value):
         elif isinstance(value, timedelta):
             return Timedelta(value).value
         elif value is None or value != value:
-            return iNaT
+            return NPY_NAT
         elif util.is_string_object(value):
             return Timedelta(value).value
         raise ValueError("cannot set a Timedelta with a non-timedelta")