From 94f3923c99ef612a953942d6c76fc605e8e5c6d9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Nov 2017 03:24:27 -0800 Subject: [PATCH] remove unused (#18533) --- pandas/_libs/algos.pyx | 45 ---------- pandas/_libs/groupby.pyx | 101 ---------------------- pandas/_libs/hashing.pyx | 5 -- pandas/_libs/join.pyx | 24 ------ pandas/_libs/lib.pyx | 106 ------------------------ pandas/_libs/src/datetime/np_datetime.c | 51 ------------ 6 files changed, 332 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index a5aae6d6af656..61d543cd7303a 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -212,51 +212,6 @@ cpdef numeric median(numeric[:] arr): kth_smallest(arr, n // 2 - 1)) / 2 -# -------------- Min, Max subsequence - -@cython.boundscheck(False) -@cython.wraparound(False) -def max_subseq(ndarray[double_t] arr): - cdef: - Py_ssize_t i=0, s=0, e=0, T, n - double m, S - - n = len(arr) - - if len(arr) == 0: - return (-1, -1, None) - - m = arr[0] - S = m - T = 0 - - with nogil: - for i in range(1, n): - # S = max { S + A[i], A[i] ) - if (S > 0): - S = S + arr[i] - else: - S = arr[i] - T = i - if S > m: - s = T - e = i - m = S - - return (s, e, m) - - -@cython.boundscheck(False) -@cython.wraparound(False) -def min_subseq(ndarray[double_t] arr): - cdef: - Py_ssize_t s, e - double m - - (s, e, m) = max_subseq(-arr) - - return (s, e, -m) - # ---------------------------------------------------------------------- # Pairwise correlation/covariance diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index dc0fdcf123c32..9d9ac2ef2f5b1 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -75,57 +75,6 @@ def group_nth_object(ndarray[object, ndim=2] out, out[i, j] = resx[i, j] -@cython.boundscheck(False) -@cython.wraparound(False) -def group_nth_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins, int64_t rank): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, b - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[float64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.float64) - resx = np.empty(( out).shape, dtype=object) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - if nobs[b, j] == rank: - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - @cython.boundscheck(False) @cython.wraparound(False) def group_last_object(ndarray[object, ndim=2] out, @@ -169,56 +118,6 @@ def group_last_object(ndarray[object, ndim=2] out, out[i, j] = resx[i, j] -@cython.boundscheck(False) -@cython.wraparound(False) -def group_last_bin_object(ndarray[object, ndim=2] out, - ndarray[int64_t] counts, - ndarray[object, ndim=2] values, - ndarray[int64_t] bins): - """ - Only aggregates on axis=0 - """ - cdef: - Py_ssize_t i, j, N, K, ngroups, b - object val - float64_t count - ndarray[object, ndim=2] resx - ndarray[float64_t, ndim=2] nobs - - nobs = np.zeros(( out).shape, dtype=np.float64) - resx = np.empty(( out).shape, dtype=object) - - if len(bins) == 0: - return - if bins[len(bins) - 1] == len(values): - ngroups = len(bins) - else: - ngroups = len(bins) + 1 - - N, K = ( values).shape - - b = 0 - for i in range(N): - while b < ngroups - 1 and i >= bins[b]: - b += 1 - - counts[b] += 1 - for j in range(K): - val = values[i, j] - - # not nan - if val == val: - nobs[b, j] += 1 - resx[b, j] = val - - for i in range(ngroups): - for j in range(K): - if nobs[i, j] == 0: - out[i, j] = nan - else: - out[i, j] = resx[i, j] - - cdef inline float64_t _median_linear(float64_t* a, int n) nogil: cdef int i, j, na_count = 0 cdef float64_t result diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 53203dd30daee..4c4449fb3e291 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -105,11 +105,6 @@ cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil: p[3] = (v >> 24) -cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil: - u32to8_le(p, v) - u32to8_le(p + 4, (v >> 32)) - - cdef inline uint64_t u8to64_le(uint8_t* p) nogil: return (p[0] | p[1] << 8 | diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 6befc5e60f5f6..344c5d25d0c3d 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -240,28 +240,4 @@ def ffill_indexer(ndarray[int64_t] indexer): return result -def ffill_by_group(ndarray[int64_t] indexer, ndarray[int64_t] group_ids, - int64_t max_group): - cdef: - Py_ssize_t i, n = len(indexer) - ndarray[int64_t] result, last_obs - int64_t gid, val - - result = np.empty(n, dtype=np.int64) - - last_obs = np.empty(max_group, dtype=np.int64) - last_obs.fill(-1) - - for i in range(n): - gid = group_ids[i] - val = indexer[i] - if val == -1: - result[i] = last_obs[gid] - else: - result[i] = val - last_obs[gid] = val - - return result - - include "join_helper.pxi" diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 2ec4b5cf19b72..02b3839ebf181 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -76,27 +76,6 @@ def values_from_object(object o): return o -cpdef map_indices_list(list index): - """ - Produce a dict mapping the values of the input array to their respective - locations. - - Example: - array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} - - Better to do this with Cython because of the enormous speed boost. - """ - cdef Py_ssize_t i, length - cdef dict result = {} - - length = len(index) - - for i from 0 <= i < length: - result[index[i]] = i - - return result - - @cython.wraparound(False) @cython.boundscheck(False) def memory_usage_of_objects(ndarray[object, ndim=1] arr): @@ -1094,27 +1073,6 @@ def get_level_sorter(ndarray[int64_t, ndim=1] label, return out -def group_count(ndarray[int64_t] values, Py_ssize_t size): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] counts - - counts = np.zeros(size, dtype=np.int64) - for i in range(n): - counts[values[i]] += 1 - return counts - - -def lookup_values(ndarray[object] values, dict mapping): - cdef: - Py_ssize_t i, n = len(values) - - result = np.empty(n, dtype='O') - for i in range(n): - result[i] = mapping[values[i]] - return maybe_convert_objects(result) - - @cython.boundscheck(False) @cython.wraparound(False) def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, @@ -1145,70 +1103,6 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, return counts -cdef class _PandasNull: - - def __richcmp__(_PandasNull self, object other, int op): - if op == 2: # == - return isinstance(other, _PandasNull) - elif op == 3: # != - return not isinstance(other, _PandasNull) - else: - return False - - def __hash__(self): - return 0 - -pandas_null = _PandasNull() - - -def fast_zip_fillna(list ndarrays, fill_value=pandas_null): - """ - For zipping multiple ndarrays into an ndarray of tuples - """ - cdef: - Py_ssize_t i, j, k, n - ndarray[object] result - flatiter it - object val, tup - - k = len(ndarrays) - n = len(ndarrays[0]) - - result = np.empty(n, dtype=object) - - # initialize tuples on first pass - arr = ndarrays[0] - it = PyArray_IterNew(arr) - for i in range(n): - val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) - tup = PyTuple_New(k) - - if val != val: - val = fill_value - - PyTuple_SET_ITEM(tup, 0, val) - Py_INCREF(val) - result[i] = tup - PyArray_ITER_NEXT(it) - - for j in range(1, k): - arr = ndarrays[j] - it = PyArray_IterNew(arr) - if len(arr) != n: - raise ValueError('all arrays must be same length') - - for i in range(n): - val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) - if val != val: - val = fill_value - - PyTuple_SET_ITEM(result[i], j, val) - Py_INCREF(val) - PyArray_ITER_NEXT(it) - - return result - - def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): cdef: Py_ssize_t i, group_size, n, start diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c index 3c63f42f14b83..b1206bd3f2d7a 100644 --- a/pandas/_libs/src/datetime/np_datetime.c +++ b/pandas/_libs/src/datetime/np_datetime.c @@ -24,20 +24,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "np_datetime.h" #if PY_MAJOR_VERSION >= 3 -#define PyIntObject PyLongObject -#define PyInt_Type PyLong_Type -#define PyInt_Check(op) PyLong_Check(op) -#define PyInt_CheckExact(op) PyLong_CheckExact(op) -#define PyInt_FromString PyLong_FromString -#define PyInt_FromUnicode PyLong_FromUnicode -#define PyInt_FromLong PyLong_FromLong -#define PyInt_FromSize_t PyLong_FromSize_t -#define PyInt_FromSsize_t PyLong_FromSsize_t #define PyInt_AsLong PyLong_AsLong -#define PyInt_AS_LONG PyLong_AS_LONG -#define PyInt_AsSsize_t PyLong_AsSsize_t -#define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask -#define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask #endif const pandas_datetimestruct _NS_MIN_DTS = { @@ -692,44 +679,6 @@ int convert_datetimestruct_to_datetime(pandas_datetime_metadata *meta, return 0; } -/* - * This provides the casting rules for the TIMEDELTA data type units. - * - * Notably, there is a barrier between the nonlinear years and - * months units, and all the other units. - */ -npy_bool can_cast_timedelta64_units(PANDAS_DATETIMEUNIT src_unit, - PANDAS_DATETIMEUNIT dst_unit, - NPY_CASTING casting) { - switch (casting) { - /* Allow anything with unsafe casting */ - case NPY_UNSAFE_CASTING: - return 1; - - /* - * Only enforce the 'date units' vs 'time units' barrier with - * 'same_kind' casting. - */ - case NPY_SAME_KIND_CASTING: - return (src_unit <= PANDAS_FR_M && dst_unit <= PANDAS_FR_M) || - (src_unit > PANDAS_FR_M && dst_unit > PANDAS_FR_M); - - /* - * Enforce the 'date units' vs 'time units' barrier and that - * casting is only allowed towards more precise units with - * 'safe' casting. - */ - case NPY_SAFE_CASTING: - return (src_unit <= dst_unit) && - ((src_unit <= PANDAS_FR_M && dst_unit <= PANDAS_FR_M) || - (src_unit > PANDAS_FR_M && dst_unit > PANDAS_FR_M)); - - /* Enforce equality with 'no' or 'equiv' casting */ - default: - return src_unit == dst_unit; - } -} - /* * This provides the casting rules for the DATETIME data type units. *