Skip to content

Commit

Permalink
BUG: don't mangle NaN-float-values and pd.NaT (GH 22295)
Browse files Browse the repository at this point in the history
it is more or less the clean-up after PR pandas-dev#21904 and PR pandas-dev#22207, the underlying hash-map handles all cases correctly out-of-the box and thus no special handling is needed.
  • Loading branch information
realead committed Sep 5, 2018
1 parent e2e1a10 commit fd17591
Showing 1 changed file with 4 additions and 30 deletions.
34 changes: 4 additions & 30 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,6 @@ cdef class {{name}}HashTable(HashTable):
int ret = 0
{{dtype}}_t val
khiter_t k
bint seen_na = 0
{{name}}Vector uniques = {{name}}Vector()
{{name}}VectorData *ud

Expand All @@ -479,30 +478,13 @@ cdef class {{name}}HashTable(HashTable):
with nogil:
for i in range(n):
val = values[i]
{{if float_group}}
if val == val:
k = kh_get_{{dtype}}(self.table, val)
if k == self.table.n_buckets:
kh_put_{{dtype}}(self.table, val, &ret)
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, val)
elif not seen_na:
seen_na = 1
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, NAN)
{{else}}
k = kh_get_{{dtype}}(self.table, val)
if k == self.table.n_buckets:
kh_put_{{dtype}}(self.table, val, &ret)
if needs_resize(ud):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, val)
{{endif}}
return uniques.to_array()

{{endfor}}
Expand Down Expand Up @@ -854,19 +836,11 @@ cdef class PyObjectHashTable(HashTable):
for i in range(n):
val = values[i]
hash(val)
k = kh_get_pymap(self.table, <PyObject*>val)
if k == self.table.n_buckets:
kh_put_pymap(self.table, <PyObject*>val, &ret)
uniques.append(val)

# `val is None` below is exception to prevent mangling of None and
# other NA values; note however that other NA values (ex: pd.NaT
# and np.nan) will still get mangled, so many not be a permanent
# solution; see GH 20866
if not checknull(val) or val is None:
k = kh_get_pymap(self.table, <PyObject*>val)
if k == self.table.n_buckets:
kh_put_pymap(self.table, <PyObject*>val, &ret)
uniques.append(val)
elif not seen_na:
seen_na = 1
uniques.append(nan)

return uniques.to_array()

Expand Down

0 comments on commit fd17591

Please sign in to comment.