Skip to content

Commit

Permalink
CLN: use float64_t consistently instead of double, double_t (pandas-d…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and JustinZhengBC committed Nov 14, 2018
1 parent c80ff12 commit ff8130b
Show file tree
Hide file tree
Showing 29 changed files with 251 additions and 316 deletions.
3 changes: 0 additions & 3 deletions pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from util cimport numeric


cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil


cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
cdef:
numeric t
Expand Down
18 changes: 8 additions & 10 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ from numpy cimport (ndarray,
NPY_FLOAT32, NPY_FLOAT64,
NPY_OBJECT,
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t, float32_t, float64_t,
double_t)
uint32_t, uint64_t, float32_t, float64_t)
cnp.import_array()


Expand All @@ -32,10 +31,9 @@ import missing

cdef float64_t FP_ERR = 1e-13

cdef double NaN = <double>np.NaN
cdef double nan = NaN
cdef float64_t NaN = <float64_t>np.NaN

cdef int64_t iNaT = get_nat()
cdef int64_t NPY_NAT = get_nat()

tiebreakers = {
'average': TIEBREAK_AVERAGE,
Expand Down Expand Up @@ -199,7 +197,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil:
def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
cdef:
Py_ssize_t i, j, l, m, n = a.shape[0]
numeric x
Expand Down Expand Up @@ -812,23 +810,23 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
n = len(arr)

if n == 1:
if arr[0] != arr[0] or (timelike and <int64_t>arr[0] == iNaT):
if arr[0] != arr[0] or (timelike and <int64_t>arr[0] == NPY_NAT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True

if timelike and <int64_t>arr[0] == iNaT:
if timelike and <int64_t>arr[0] == NPY_NAT:
return False, False, True

if algos_t is not object:
with nogil:
prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and <int64_t>cur == iNaT:
if timelike and <int64_t>cur == NPY_NAT:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
Expand All @@ -853,7 +851,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and <int64_t>cur == iNaT:
if timelike and <int64_t>cur == NPY_NAT:
is_monotonic_inc = 0
is_monotonic_dec = 0
break
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,

{{endfor}}

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# ensure_dtype
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num

Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
{{elif dtype == 'float64'}}
mask = np.isnan(values)
{{elif dtype == 'int64'}}
mask = values == iNaT
mask = values == NPY_NAT

# create copy in case of iNaT
# create copy in case of NPY_NAT
# values are mutated inplace
if mask.any():
values = values.copy()
Expand Down Expand Up @@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
{{if dtype != 'uint64'}}
isnan = sorted_mask[i]
if isnan and keep_na:
ranks[argsorted[i]] = nan
ranks[argsorted[i]] = NaN
continue
{{endif}}

Expand Down Expand Up @@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
{{elif dtype == 'float64'}}
mask = np.isnan(values)
{{elif dtype == 'int64'}}
mask = values == iNaT
mask = values == NPY_NAT
{{endif}}

np.putmask(values, mask, nan_value)
Expand Down Expand Up @@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
{{else}}
if (val == nan_value) and keep_na:
{{endif}}
ranks[i, argsorted[i, j]] = nan
ranks[i, argsorted[i, j]] = NaN

{{if dtype == 'object'}}
infs += 1
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ Template for each `dtype` helper function for take
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# take_1d, take_2d
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

Expand Down
34 changes: 16 additions & 18 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# -*- coding: utf-8 -*-

cimport cython
from cython cimport Py_ssize_t
import cython
from cython import Py_ssize_t

from libc.stdlib cimport malloc, free

import numpy as np
cimport numpy as cnp
from numpy cimport (ndarray,
double_t,
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t, float32_t, float64_t)
cnp.import_array()
Expand All @@ -20,10 +19,9 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers

cdef int64_t iNaT = get_nat()
cdef int64_t NPY_NAT = get_nat()

cdef double NaN = <double>np.NaN
cdef double nan = NaN
cdef float64_t NaN = <float64_t>np.NaN


cdef inline float64_t median_linear(float64_t* a, int n) nogil:
Expand Down Expand Up @@ -67,13 +65,13 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
return result


# TODO: Is this redundant with algos.kth_smallest?
# TODO: Is this redundant with algos.kth_smallest
cdef inline float64_t kth_smallest_c(float64_t* a,
Py_ssize_t k,
Py_ssize_t n) nogil:
cdef:
Py_ssize_t i, j, l, m
double_t x, t
float64_t x, t

l = 0
m = n - 1
Expand Down Expand Up @@ -109,7 +107,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
cdef:
Py_ssize_t i, j, N, K, ngroups, size
ndarray[int64_t] _counts
ndarray data
ndarray[float64_t, ndim=2] data
float64_t* ptr

assert min_count == -1, "'min_count' only used in add and prod"
Expand Down Expand Up @@ -139,8 +137,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
@cython.boundscheck(False)
@cython.wraparound(False)
def group_cumprod_float64(float64_t[:, :] out,
float64_t[:, :] values,
int64_t[:] labels,
const float64_t[:, :] values,
const int64_t[:] labels,
bint is_datetimelike,
bint skipna=True):
"""
Expand Down Expand Up @@ -177,7 +175,7 @@ def group_cumprod_float64(float64_t[:, :] out,
@cython.wraparound(False)
def group_cumsum(numeric[:, :] out,
numeric[:, :] values,
int64_t[:] labels,
const int64_t[:] labels,
is_datetimelike,
bint skipna=True):
"""
Expand Down Expand Up @@ -217,7 +215,7 @@ def group_cumsum(numeric[:, :] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
def group_shift_indexer(int64_t[:] out, const int64_t[:] labels,
int ngroups, int periods):
cdef:
Py_ssize_t N, i, j, ii
Expand Down Expand Up @@ -291,7 +289,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
"""
cdef:
Py_ssize_t i, N
ndarray[int64_t] sorted_labels
int64_t[:] sorted_labels
int64_t idx, curr_fill_idx=-1, filled_vals=0

N = len(out)
Expand Down Expand Up @@ -327,10 +325,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_any_all(ndarray[uint8_t] out,
ndarray[int64_t] labels,
ndarray[uint8_t] values,
ndarray[uint8_t] mask,
def group_any_all(uint8_t[:] out,
const int64_t[:] labels,
const uint8_t[:] values,
const uint8_t[:] mask,
object val_test,
bint skipna):
"""Aggregated boolean values to show truthfulness of group elements
Expand Down
22 changes: 11 additions & 11 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

cdef extern from "numpy/npy_math.h":
double NAN "NPY_NAN"
float64_t NAN "NPY_NAN"
_int64_max = np.iinfo(np.int64).max

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -268,16 +268,16 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,

{{endfor}}

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# group_nth, group_last, group_rank
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

# name, c_type, nan_val
dtypes = [('float64', 'float64_t', 'NAN'),
('float32', 'float32_t', 'NAN'),
('int64', 'int64_t', 'iNaT'),
('int64', 'int64_t', 'NPY_NAT'),
('object', 'object', 'NAN')]

def get_dispatch(dtypes):
Expand Down Expand Up @@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
# to the result where appropriate
if keep_na and mask[_as[i]]:
for j in range(i - dups + 1, i + 1):
out[_as[j], 0] = nan
out[_as[j], 0] = NaN
grp_na_count = dups
elif tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
Expand Down Expand Up @@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out,
if groupby_t is int64_t:
# Note: evaluated at compile-time
maxx[:] = -_int64_max
nan_val = iNaT
nan_val = NPY_NAT
else:
maxx[:] = -np.inf
nan_val = NAN
Expand Down Expand Up @@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out,
minx = np.empty_like(out)
if groupby_t is int64_t:
minx[:] = _int64_max
nan_val = iNaT
nan_val = NPY_NAT
else:
minx[:] = np.inf
nan_val = NAN
Expand Down Expand Up @@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,

# val = nan
if groupby_t is int64_t:
if is_datetimelike and val == iNaT:
out[i, j] = iNaT
if is_datetimelike and val == NPY_NAT:
out[i, j] = NPY_NAT
else:
mval = accum[lab, j]
if val < mval:
Expand Down Expand Up @@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out,
val = values[i, j]

if groupby_t is int64_t:
if is_datetimelike and val == iNaT:
out[i, j] = iNaT
if is_datetimelike and val == NPY_NAT:
out[i, j] = NPY_NAT
else:
mval = accum[lab, j]
if val > mval:
Expand Down
8 changes: 3 additions & 5 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free

import numpy as np
cimport numpy as cnp
from numpy cimport ndarray, uint8_t, uint32_t
from numpy cimport ndarray, uint8_t, uint32_t, float64_t
cnp.import_array()

cdef extern from "numpy/npy_math.h":
double NAN "NPY_NAN"
float64_t NAN "NPY_NAN"


from khash cimport (
Expand Down Expand Up @@ -42,9 +42,7 @@ cimport util
from missing cimport checknull


nan = np.nan

cdef int64_t iNaT = util.get_nat()
cdef int64_t NPY_NAT = util.get_nat()
_SIZE_HINT_LIMIT = (1 << 20) + 7


Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,9 @@ cdef class HashTable:
{{py:

# name, dtype, float_group, default_na_value
dtypes = [('Float64', 'float64', True, 'nan'),
dtypes = [('Float64', 'float64', True, 'np.nan'),
('UInt64', 'uint64', False, 0),
('Int64', 'int64', False, 'iNaT')]
('Int64', 'int64', False, 'NPY_NAT')]

}}

Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash
from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib
from pandas._libs.missing import checknull

cdef int64_t iNaT = util.get_nat()
cdef int64_t NPY_NAT = util.get_nat()


cdef inline bint is_definitely_invalid_key(object val):
Expand Down Expand Up @@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value):
elif isinstance(value, (datetime, np.datetime64, date)):
return Timestamp(value).value
elif value is None or value != value:
return iNaT
return NPY_NAT
elif util.is_string_object(value):
return Timestamp(value).value
raise ValueError("cannot set a Timestamp with a non-timestamp")
Expand All @@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value):
elif isinstance(value, timedelta):
return Timedelta(value).value
elif value is None or value != value:
return iNaT
return NPY_NAT
elif util.is_string_object(value):
return Timedelta(value).value
raise ValueError("cannot set a Timedelta with a non-timedelta")
Expand Down
Loading

0 comments on commit ff8130b

Please sign in to comment.