Skip to content

Commit

Permalink
pythonize cython code (pandas-dev#22638)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Sep 12, 2018
1 parent 7ce722c commit c040353
Show file tree
Hide file tree
Showing 31 changed files with 152 additions and 155 deletions.
30 changes: 0 additions & 30 deletions .coveragerc

This file was deleted.

6 changes: 3 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

cimport cython
from cython cimport Py_ssize_t
import cython
from cython import Py_ssize_t

from libc.stdlib cimport malloc, free
from libc.string cimport memmove
Expand Down Expand Up @@ -114,7 +114,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):

@cython.wraparound(False)
@cython.boundscheck(False)
def is_lexsorted(list list_of_arrays):
def is_lexsorted(list_of_arrays: list) -> bint:
cdef:
Py_ssize_t i
Py_ssize_t n, nlevels
Expand Down
16 changes: 3 additions & 13 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# at https://github.com/veorq/SipHash

import cython
from cpython cimport PyBytes_Check, PyUnicode_Check
from libc.stdlib cimport malloc, free

import numpy as np
Expand Down Expand Up @@ -44,6 +43,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
char **vecs
char *cdata
object val
list datas = []

k = <bytes>key.encode(encoding)
kb = <uint8_t *>k
Expand All @@ -57,12 +57,11 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
vecs = <char **> malloc(n * sizeof(char *))
lens = <uint64_t*> malloc(n * sizeof(uint64_t))

cdef list datas = []
for i in range(n):
val = arr[i]
if PyBytes_Check(val):
if isinstance(val, bytes):
data = <bytes>val
elif PyUnicode_Check(val):
elif isinstance(val, unicode):
data = <bytes>val.encode(encoding)
elif val is None or is_nan(val):
# null, stringify and encode
Expand Down Expand Up @@ -132,15 +131,6 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
v2[0] = _rotl(v2[0], 32)


# TODO: This appears unused; remove?
cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError("key should be a 16-byte bytestring, "
"got {key} (len {klen})"
.format(key=key, klen=len(key)))
return low_level_siphash(data, len(data), key)


@cython.cdivision(True)
cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen,
uint8_t* key) nogil:
Expand Down
15 changes: 6 additions & 9 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta, date

cimport cython

from cpython cimport PyTuple_Check, PyList_Check
from cpython.slice cimport PySlice_Check
import cython

import numpy as np
cimport numpy as cnp
Expand All @@ -30,15 +27,15 @@ cdef int64_t iNaT = util.get_nat()


cdef inline bint is_definitely_invalid_key(object val):
if PyTuple_Check(val):
if isinstance(val, tuple):
try:
hash(val)
except TypeError:
return True

# we have a _data, means we are a NDFrame
return (PySlice_Check(val) or util.is_array(val)
or PyList_Check(val) or hasattr(val, '_data'))
return (isinstance(val, slice) or util.is_array(val)
or isinstance(val, list) or hasattr(val, '_data'))


cpdef get_value_at(ndarray arr, object loc, object tz=None):
Expand Down Expand Up @@ -88,7 +85,7 @@ cdef class IndexEngine:
void* data_ptr

loc = self.get_loc(key)
if PySlice_Check(loc) or util.is_array(loc):
if isinstance(loc, slice) or util.is_array(loc):
return arr[loc]
else:
return get_value_at(arr, loc, tz=tz)
Expand Down Expand Up @@ -640,7 +637,7 @@ cdef class BaseMultiIndexCodesEngine:
def get_loc(self, object key):
if is_definitely_invalid_key(key):
raise TypeError("'{key}' is an invalid key".format(key=key))
if not PyTuple_Check(key):
if not isinstance(key, tuple):
raise KeyError(key)
try:
indices = [0 if checknull(v) else lev.get_loc(v) + 1
Expand Down
47 changes: 28 additions & 19 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# -*- coding: utf-8 -*-

cimport cython
from cython cimport Py_ssize_t
import cython
from cython import Py_ssize_t

from cpython cimport PyObject
from cpython.slice cimport PySlice_Check

cdef extern from "Python.h":
Py_ssize_t PY_SSIZE_T_MAX
Expand All @@ -30,14 +29,15 @@ cdef class BlockPlacement:
cdef bint _has_slice, _has_array, _is_known_slice_like

def __init__(self, val):
cdef slice slc
cdef:
slice slc

self._as_slice = None
self._as_array = None
self._has_slice = False
self._has_array = False

if PySlice_Check(val):
if isinstance(val, slice):
slc = slice_canonize(val)

if slc.start != slc.stop:
Expand All @@ -55,7 +55,8 @@ cdef class BlockPlacement:
self._has_array = True

def __str__(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
v = self._as_slice
else:
Expand All @@ -66,15 +67,17 @@ cdef class BlockPlacement:
__repr__ = __str__

def __len__(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
return slice_len(s)
else:
return len(self._as_array)

def __iter__(self):
cdef slice s = self._ensure_has_slice()
cdef Py_ssize_t start, stop, step, _
cdef:
slice s = self._ensure_has_slice()
Py_ssize_t start, stop, step, _
if s is not None:
start, stop, step, _ = slice_get_indices_ex(s)
return iter(range(start, stop, step))
Expand All @@ -83,15 +86,17 @@ cdef class BlockPlacement:

@property
def as_slice(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is None:
raise TypeError('Not slice-like')
else:
return s

@property
def indexer(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
return s
else:
Expand All @@ -103,7 +108,8 @@ cdef class BlockPlacement:

@property
def as_array(self):
cdef Py_ssize_t start, stop, end, _
cdef:
Py_ssize_t start, stop, end, _
if not self._has_array:
start, stop, step, _ = slice_get_indices_ex(self._as_slice)
self._as_array = np.arange(start, stop, step,
Expand All @@ -113,17 +119,19 @@ cdef class BlockPlacement:

@property
def is_slice_like(self):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
return s is not None

def __getitem__(self, loc):
cdef slice s = self._ensure_has_slice()
cdef:
slice s = self._ensure_has_slice()
if s is not None:
val = slice_getitem(s, loc)
else:
val = self._as_array[loc]

if not PySlice_Check(val) and val.ndim == 0:
if not isinstance(val, slice) and val.ndim == 0:
return val

return BlockPlacement(val)
Expand All @@ -139,8 +147,9 @@ cdef class BlockPlacement:
[o.as_array for o in others]))

cdef iadd(self, other):
cdef slice s = self._ensure_has_slice()
cdef Py_ssize_t other_int, start, stop, step, l
cdef:
slice s = self._ensure_has_slice()
Py_ssize_t other_int, start, stop, step, l

if isinstance(other, int) and s is not None:
other_int = <Py_ssize_t>other
Expand Down Expand Up @@ -184,7 +193,7 @@ cdef class BlockPlacement:
return self._as_slice


cdef slice_canonize(slice s):
cdef slice slice_canonize(slice s):
"""
Convert slice to canonical bounded form.
"""
Expand Down Expand Up @@ -282,7 +291,7 @@ def slice_getitem(slice slc not None, ind):

s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc)

if PySlice_Check(ind):
if isinstance(ind, slice):
ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind,
s_len)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ cdef class Interval(IntervalMixin):
return ((self.left < key if self.open_left else self.left <= key) and
(key < self.right if self.open_right else key <= self.right))

def __richcmp__(self, other, int op):
def __richcmp__(self, other, op: int):
if hasattr(other, 'ndim'):
# let numpy (or IntervalIndex) handle vectorization
return NotImplemented
Expand Down
Loading

0 comments on commit c040353

Please sign in to comment.