pythonize cython code (pandas-dev#22638)

forking-repos · Sep 12, 2018 · c040353 · c040353
1 parent 7ce722c
commit c040353
Show file tree

Hide file tree

Showing 31 changed files with 152 additions and 155 deletions.
diff --git a/.coveragerc b/.coveragerc
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
-cimport cython
-from cython cimport Py_ssize_t
+import cython
+from cython import Py_ssize_t
 
 from libc.stdlib cimport malloc, free
 from libc.string cimport memmove
@@ -114,7 +114,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_lexsorted(list list_of_arrays):
+def is_lexsorted(list_of_arrays: list) -> bint:
     cdef:
         Py_ssize_t i
         Py_ssize_t n, nlevels

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -3,7 +3,6 @@
 # at https://github.com/veorq/SipHash
 
 import cython
-from cpython cimport PyBytes_Check, PyUnicode_Check
 from libc.stdlib cimport malloc, free
 
 import numpy as np
@@ -44,6 +43,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
         char **vecs
         char *cdata
         object val
+        list datas = []
 
     k = <bytes>key.encode(encoding)
     kb = <uint8_t *>k
@@ -57,12 +57,11 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
     vecs = <char **> malloc(n * sizeof(char *))
     lens = <uint64_t*> malloc(n * sizeof(uint64_t))
 
-    cdef list datas = []
     for i in range(n):
         val = arr[i]
-        if PyBytes_Check(val):
+        if isinstance(val, bytes):
             data = <bytes>val
-        elif PyUnicode_Check(val):
+        elif isinstance(val, unicode):
             data = <bytes>val.encode(encoding)
         elif val is None or is_nan(val):
             # null, stringify and encode
@@ -132,15 +131,6 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
     v2[0] = _rotl(v2[0], 32)
 
 
-# TODO: This appears unused; remove?
-cpdef uint64_t siphash(bytes data, bytes key) except? 0:
-    if len(key) != 16:
-        raise ValueError("key should be a 16-byte bytestring, "
-                         "got {key} (len {klen})"
-                         .format(key=key, klen=len(key)))
-    return low_level_siphash(data, len(data), key)
-
-
 @cython.cdivision(True)
 cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen,
                                 uint8_t* key) nogil:

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -1,10 +1,7 @@
 # -*- coding: utf-8 -*-
 from datetime import datetime, timedelta, date
 
-cimport cython
-
-from cpython cimport PyTuple_Check, PyList_Check
-from cpython.slice cimport PySlice_Check
+import cython
 
 import numpy as np
 cimport numpy as cnp
@@ -30,15 +27,15 @@ cdef int64_t iNaT = util.get_nat()
 
 
 cdef inline bint is_definitely_invalid_key(object val):
-    if PyTuple_Check(val):
+    if isinstance(val, tuple):
         try:
             hash(val)
         except TypeError:
             return True
 
     # we have a _data, means we are a NDFrame
-    return (PySlice_Check(val) or util.is_array(val)
-            or PyList_Check(val) or hasattr(val, '_data'))
+    return (isinstance(val, slice) or util.is_array(val)
+            or isinstance(val, list) or hasattr(val, '_data'))
 
 
 cpdef get_value_at(ndarray arr, object loc, object tz=None):
@@ -88,7 +85,7 @@ cdef class IndexEngine:
             void* data_ptr
 
         loc = self.get_loc(key)
-        if PySlice_Check(loc) or util.is_array(loc):
+        if isinstance(loc, slice) or util.is_array(loc):
             return arr[loc]
         else:
             return get_value_at(arr, loc, tz=tz)
@@ -640,7 +637,7 @@ cdef class BaseMultiIndexCodesEngine:
     def get_loc(self, object key):
         if is_definitely_invalid_key(key):
             raise TypeError("'{key}' is an invalid key".format(key=key))
-        if not PyTuple_Check(key):
+        if not isinstance(key, tuple):
             raise KeyError(key)
         try:
             indices = [0 if checknull(v) else lev.get_loc(v) + 1

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -1,10 +1,9 @@
 # -*- coding: utf-8 -*-
 
-cimport cython
-from cython cimport Py_ssize_t
+import cython
+from cython import Py_ssize_t
 
 from cpython cimport PyObject
-from cpython.slice cimport PySlice_Check
 
 cdef extern from "Python.h":
     Py_ssize_t PY_SSIZE_T_MAX
@@ -30,14 +29,15 @@ cdef class BlockPlacement:
     cdef bint _has_slice, _has_array, _is_known_slice_like
 
     def __init__(self, val):
-        cdef slice slc
+        cdef:
+            slice slc
 
         self._as_slice = None
         self._as_array = None
         self._has_slice = False
         self._has_array = False
 
-        if PySlice_Check(val):
+        if isinstance(val, slice):
             slc = slice_canonize(val)
 
             if slc.start != slc.stop:
@@ -55,7 +55,8 @@ cdef class BlockPlacement:
             self._has_array = True
 
     def __str__(self):
-        cdef slice s = self._ensure_has_slice()
+        cdef:
+            slice s = self._ensure_has_slice()
         if s is not None:
             v = self._as_slice
         else:
@@ -66,15 +67,17 @@ cdef class BlockPlacement:
     __repr__ = __str__
 
     def __len__(self):
-        cdef slice s = self._ensure_has_slice()
+        cdef:
+            slice s = self._ensure_has_slice()
         if s is not None:
             return slice_len(s)
         else:
             return len(self._as_array)
 
     def __iter__(self):
-        cdef slice s = self._ensure_has_slice()
-        cdef Py_ssize_t start, stop, step, _
+        cdef:
+            slice s = self._ensure_has_slice()
+            Py_ssize_t start, stop, step, _
         if s is not None:
             start, stop, step, _ = slice_get_indices_ex(s)
             return iter(range(start, stop, step))
@@ -83,15 +86,17 @@ cdef class BlockPlacement:
 
     @property
     def as_slice(self):
-        cdef slice s = self._ensure_has_slice()
+        cdef:
+            slice s = self._ensure_has_slice()
         if s is None:
             raise TypeError('Not slice-like')
         else:
             return s
 
     @property
     def indexer(self):
-        cdef slice s = self._ensure_has_slice()
+        cdef:
+            slice s = self._ensure_has_slice()
         if s is not None:
             return s
         else:
@@ -103,7 +108,8 @@ cdef class BlockPlacement:
 
     @property
     def as_array(self):
-        cdef Py_ssize_t start, stop, end, _
+        cdef:
+            Py_ssize_t start, stop, end, _
         if not self._has_array:
             start, stop, step, _ = slice_get_indices_ex(self._as_slice)
             self._as_array = np.arange(start, stop, step,
@@ -113,17 +119,19 @@ cdef class BlockPlacement:
 
     @property
     def is_slice_like(self):
-        cdef slice s = self._ensure_has_slice()
+        cdef:
+            slice s = self._ensure_has_slice()
         return s is not None
 
     def __getitem__(self, loc):
-        cdef slice s = self._ensure_has_slice()
+        cdef:
+            slice s = self._ensure_has_slice()
         if s is not None:
             val = slice_getitem(s, loc)
         else:
             val = self._as_array[loc]
 
-        if not PySlice_Check(val) and val.ndim == 0:
+        if not isinstance(val, slice) and val.ndim == 0:
             return val
 
         return BlockPlacement(val)
@@ -139,8 +147,9 @@ cdef class BlockPlacement:
                                              [o.as_array for o in others]))
 
     cdef iadd(self, other):
-        cdef slice s = self._ensure_has_slice()
-        cdef Py_ssize_t other_int, start, stop, step, l
+        cdef:
+            slice s = self._ensure_has_slice()
+            Py_ssize_t other_int, start, stop, step, l
 
         if isinstance(other, int) and s is not None:
             other_int = <Py_ssize_t>other
@@ -184,7 +193,7 @@ cdef class BlockPlacement:
         return self._as_slice
 
 
-cdef slice_canonize(slice s):
+cdef slice slice_canonize(slice s):
     """
     Convert slice to canonical bounded form.
     """
@@ -282,7 +291,7 @@ def slice_getitem(slice slc not None, ind):
 
     s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc)
 
-    if PySlice_Check(ind):
+    if isinstance(ind, slice):
         ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind,
                                                                       s_len)
 

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -271,7 +271,7 @@ cdef class Interval(IntervalMixin):
         return ((self.left < key if self.open_left else self.left <= key) and
                 (key < self.right if self.open_right else key <= self.right))
 
-    def __richcmp__(self, other, int op):
+    def __richcmp__(self, other, op: int):
         if hasattr(other, 'ndim'):
             # let numpy (or IntervalIndex) handle vectorization
             return NotImplemented