Skip to content

Commit

Permalink
BUG: align logic between replace dict using integers and using string…
Browse files Browse the repository at this point in the history
…s (# 20656)
  • Loading branch information
peterpanmj committed Jul 29, 2018
1 parent 415a01e commit d8f2d70
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 25 deletions.
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,5 @@ Other
- :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
- :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`)
-
-
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
-
85 changes: 82 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,44 @@ def _nanpercentile(values, q, axis, **kw):
placement=np.arange(len(result)),
ndim=ndim)

def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True,
convert=False, regex=False, mgr=None):
"""
Replace value corresponding to the given boolean array with another
value.
Parameters
----------
mask : array_like of bool
The mask of values to replace.
src : object
The value to replace. It is ignored if regex is False.
dst : object
The value to be replaced with.
convert : bool
If true, try to coerce any object types to better types.
regex : bool
If true, search for element matching with the pattern in src.
Masked element is ignored.
mgr : BlockPlacement, optional
Returns
-------
A new block if there is anything to replace or the original block.
"""

if mask.any():
if not regex:
self = self.coerce_to_target_dtype(dst)
return self.putmask(mask, dst, inplace=inplace)
else:
return self._replace_single(src, dst, inplace=inplace,
regex=regex,
convert=convert,
mask=mask,
mgr=mgr)
return self


class ScalarBlock(Block):
"""
Expand Down Expand Up @@ -2464,7 +2502,7 @@ def replace(self, to_replace, value, inplace=False, filter=None,
regex=regex, mgr=mgr)

def _replace_single(self, to_replace, value, inplace=False, filter=None,
regex=False, convert=True, mgr=None):
regex=False, convert=True, mgr=None, mask=None):

inplace = validate_bool_kwarg(inplace, 'inplace')

Expand Down Expand Up @@ -2531,15 +2569,56 @@ def re_replacer(s):
else:
filt = self.mgr_locs.isin(filter).nonzero()[0]

new_values[filt] = f(new_values[filt])
if mask is None:
new_values[filt] = f(new_values[filt])
else:
new_values[filt][mask] = f(new_values[filt][mask])

# convert
block = self.make_block(new_values)
if convert:
block = block.convert(by_item=True, numeric=False)

return block

def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True,
convert=False, regex=False, mgr=None):
"""
Replace value corresponding to the given boolean array with another
value.
Parameters
----------
mask : array_like of bool
The mask of values to replace.
src : object
The value to replace. It is ignored if regex is False.
dst : object
The value to be replaced with.
convert : bool
If true, try to coerce any object types to better types.
regex : bool
If true, search for element matching with the pattern in src.
Masked element is ignored.
mgr : BlockPlacement, optional
Returns
-------
A new block if there is anything to replace or the original block.
"""
if mask.any():
block = super(ObjectBlock, self)._replace_coerce(mask=mask,
src=src,
dst=dst,
inplace=inplace,
convert=convert,
regex=regex,
mgr=mgr)
if convert:
block = [b.convert(by_item=True, numeric=False, copy=True)
for b in block]
return block
return self


class CategoricalBlock(ExtensionBlock):
__slots__ = ()
Expand Down
48 changes: 28 additions & 20 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from functools import partial
import itertools
import operator
import re

import numpy as np

Expand All @@ -19,11 +20,13 @@
is_datetimelike_v_numeric,
is_numeric_v_string_like, is_extension_type,
is_extension_array_dtype,
is_scalar)
is_scalar,
is_re_compilable)
from pandas.core.dtypes.cast import (
maybe_promote,
infer_dtype_from_scalar,
find_common_type)
find_common_type,
maybe_convert_objects)
from pandas.core.dtypes.missing import isna
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCSeries, ABCExtensionArray
Expand Down Expand Up @@ -571,12 +574,17 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False,
# figure out our mask a-priori to avoid repeated replacements
values = self.as_array()

def comp(s):
def comp(s, reg=False):
if isna(s):
return isna(values)
return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq)
if hasattr(s, 'asm8'):
return _maybe_compare(maybe_convert_objects(values),
getattr(s, 'asm8'), reg)
if reg and is_re_compilable(s):
return _maybe_compare(values, s, reg)
return _maybe_compare(values, s, reg)

masks = [comp(s) for i, s in enumerate(src_list)]
masks = [comp(s, regex) for i, s in enumerate(src_list)]

result_blocks = []
src_len = len(src_list) - 1
Expand All @@ -588,20 +596,16 @@ def comp(s):
for i, (s, d) in enumerate(zip(src_list, dest_list)):
new_rb = []
for b in rb:
if b.dtype == np.object_:
convert = i == src_len
result = b.replace(s, d, inplace=inplace, regex=regex,
mgr=mgr, convert=convert)
m = masks[i][b.mgr_locs.indexer]
convert = i == src_len
result = b._replace_coerce(mask=m, src=s, dst=d,
inplace=inplace,
convert=convert, regex=regex,
mgr=mgr)
if m.any():
new_rb = _extend_blocks(result, new_rb)
else:
# get our mask for this element, sized to this
# particular block
m = masks[i][b.mgr_locs.indexer]
if m.any():
b = b.coerce_to_target_dtype(d)
new_rb.extend(b.putmask(m, d, inplace=True))
else:
new_rb.append(b)
new_rb.append(b)
rb = new_rb
result_blocks.extend(rb)

Expand Down Expand Up @@ -1890,7 +1894,12 @@ def _consolidate(blocks):
return new_blocks


def _maybe_compare(a, b, op):
def _maybe_compare(a, b, regex=False):
if not regex:
op = lambda x: operator.eq(x, b)
else:
op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str)
else False)

is_a_array = isinstance(a, np.ndarray)
is_b_array = isinstance(b, np.ndarray)
Expand All @@ -1902,9 +1911,8 @@ def _maybe_compare(a, b, op):
# numpy deprecation warning if comparing numeric vs string-like
elif is_numeric_v_string_like(a, b):
result = False

else:
result = op(a, b)
result = op(a)

if is_scalar(result) and (is_a_array or is_b_array):
type_names = [type(a).__name__, type(b).__name__]
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/series/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,13 @@ def test_replace_string_with_number(self):
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)

def test_repace_intertwined_key_value_dict(self):
# GH 20656
s = pd.Series(['a', 'b'])
expected = pd.Series(['b', 'a'])
result = s.replace({'a': 'b', 'b': 'a'})
tm.assert_series_equal(expected, result)

def test_replace_unicode_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
Expand Down

0 comments on commit d8f2d70

Please sign in to comment.