API: Unify .update to generic

pandas-dev · Oct 16, 2018 · c6e9dfb · c6e9dfb
1 parent 913f71f
commit c6e9dfb
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 146 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -200,6 +200,7 @@ Other Enhancements
 - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
 - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
 - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
+- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update` (:issue:`22358`)
 - Compatibility with Matplotlib 3.0 (:issue:`22790`).
 
 .. _whatsnew_0240.api_breaking:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5198,142 +5198,6 @@ def combiner(x, y, needs_i8_conversion=False):
 
         return self.combine(other, combiner, overwrite=False)
 
-    def update(self, other, join='left', overwrite=True, filter_func=None,
-               raise_conflict=False):
-        """
-        Modify in place using non-NA values from another DataFrame.
-
-        Aligns on indices. There is no return value.
-
-        Parameters
-        ----------
-        other : DataFrame, or object coercible into a DataFrame
-            Should have at least one matching index/column label
-            with the original DataFrame. If a Series is passed,
-            its name attribute must be set, and that will be
-            used as the column name to align with the original DataFrame.
-        join : {'left'}, default 'left'
-            Only left join is implemented, keeping the index and columns of the
-            original object.
-        overwrite : bool, default True
-            How to handle non-NA values for overlapping keys:
-
-            * True: overwrite original DataFrame's values
-              with values from `other`.
-            * False: only update values that are NA in
-              the original DataFrame.
-
-        filter_func : callable(1d-array) -> boolean 1d-array, optional
-            Can choose to replace values other than NA. Return True for values
-            that should be updated.
-        raise_conflict : bool, default False
-            If True, will raise a ValueError if the DataFrame and `other`
-            both contain non-NA data in the same place.
-
-        Raises
-        ------
-        ValueError
-            When `raise_conflict` is True and there's overlapping non-NA data.
-
-        See Also
-        --------
-        dict.update : Similar method for dictionaries.
-        DataFrame.merge : For column(s)-on-columns(s) operations.
-
-        Examples
-        --------
-        >>> df = pd.DataFrame({'A': [1, 2, 3],
-        ...                    'B': [400, 500, 600]})
-        >>> new_df = pd.DataFrame({'B': [4, 5, 6],
-        ...                        'C': [7, 8, 9]})
-        >>> df.update(new_df)
-        >>> df
-           A  B
-        0  1  4
-        1  2  5
-        2  3  6
-
-        The DataFrame's length does not increase as a result of the update,
-        only values at matching index/column labels are updated.
-
-        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
-        ...                    'B': ['x', 'y', 'z']})
-        >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
-        >>> df.update(new_df)
-        >>> df
-           A  B
-        0  a  d
-        1  b  e
-        2  c  f
-
-        For Series, it's name attribute must be set.
-
-        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
-        ...                    'B': ['x', 'y', 'z']})
-        >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
-        >>> df.update(new_column)
-        >>> df
-           A  B
-        0  a  d
-        1  b  y
-        2  c  e
-        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
-        ...                    'B': ['x', 'y', 'z']})
-        >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
-        >>> df.update(new_df)
-        >>> df
-           A  B
-        0  a  x
-        1  b  d
-        2  c  e
-
-        If `other` contains NaNs the corresponding values are not updated
-        in the original dataframe.
-
-        >>> df = pd.DataFrame({'A': [1, 2, 3],
-        ...                    'B': [400, 500, 600]})
-        >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
-        >>> df.update(new_df)
-        >>> df
-           A      B
-        0  1    4.0
-        1  2  500.0
-        2  3    6.0
-        """
-        import pandas.core.computation.expressions as expressions
-        # TODO: Support other joins
-        if join != 'left':  # pragma: no cover
-            raise NotImplementedError("Only left join is supported")
-
-        if not isinstance(other, DataFrame):
-            other = DataFrame(other)
-
-        other = other.reindex_like(self)
-
-        for col in self.columns:
-            this = self[col].values
-            that = other[col].values
-            if filter_func is not None:
-                with np.errstate(all='ignore'):
-                    mask = ~filter_func(this) | isna(that)
-            else:
-                if raise_conflict:
-                    mask_this = notna(that)
-                    mask_that = notna(this)
-                    if any(mask_this & mask_that):
-                        raise ValueError("Data overlaps.")
-
-                if overwrite:
-                    mask = isna(that)
-                else:
-                    mask = notna(this)
-
-            # don't overwrite columns unecessarily
-            if mask.all():
-                continue
-
-            self[col] = expressions.where(mask, this, that)
-
     # ----------------------------------------------------------------------
     # Data reshaping
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -99,6 +99,32 @@ def _single_replace(self, to_replace, method, inplace, limit):
     return result
 
 
+def _update_column(this, that, overwrite=True, filter_func=None,
+                   raise_conflict=False):
+    import pandas.core.computation.expressions as expressions
+
+    if filter_func is not None:
+        with np.errstate(all='ignore'):
+            mask = ~filter_func(this) | isna(that)
+    else:
+        if raise_conflict:
+            mask_this = notna(that)
+            mask_that = notna(this)
+            if any(mask_this & mask_that):
+                raise ValueError("Data overlaps.")
+
+        if overwrite:
+            mask = isna(that)
+        else:
+            mask = notna(this)
+
+    # don't overwrite columns unnecessarily
+    if mask.all():
+        return None
+
+    return expressions.where(mask, this, that)
+
+
 class NDFrame(PandasObject, SelectionMixin):
     """
     N-dimensional analogue of DataFrame. Store multi-dimensional in a
@@ -4079,6 +4105,144 @@ def _reindex_axis(self, new_index, fill_method, axis, copy):
         else:
             return self._constructor(new_data).__finalize__(self)
 
+    def update(self, other, join='left', overwrite=True, filter_func=None,
+               raise_conflict=False):
+        """
+        Modify in place using non-NA values from another DataFrame.
+
+        Aligns on indices. There is no return value.
+
+        Parameters
+        ----------
+        other : DataFrame, or object coercible into a DataFrame
+            Should have at least one matching index/column label
+            with the original DataFrame. If a Series is passed,
+            its name attribute must be set, and that will be
+            used as the column name to align with the original DataFrame.
+        join : {'left'}, default 'left'
+            Only left join is implemented, keeping the index and columns of the
+            original object.
+        overwrite : bool, default True
+            How to handle non-NA values for overlapping keys:
+
+            * True: overwrite original DataFrame's values
+              with values from `other`.
+            * False: only update values that are NA in
+              the original DataFrame.
+
+        filter_func : callable(1d-array) -> boolean 1d-array, optional
+            Can choose to replace values other than NA. Return True for values
+            that should be updated.
+        raise_conflict : bool, default False
+            If True, will raise a ValueError if the DataFrame and `other`
+            both contain non-NA data in the same place.
+
+        Raises
+        ------
+        ValueError
+            When `raise_conflict` is True and there's overlapping non-NA data.
+
+        See Also
+        --------
+        dict.update : Similar method for dictionaries.
+        DataFrame.merge : For column(s)-on-columns(s) operations.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [1, 2, 3],
+        ...                    'B': [400, 500, 600]})
+        >>> new_df = pd.DataFrame({'B': [4, 5, 6],
+        ...                        'C': [7, 8, 9]})
+        >>> df.update(new_df)
+        >>> df
+           A  B
+        0  1  4
+        1  2  5
+        2  3  6
+
+        The DataFrame's length does not increase as a result of the update,
+        only values at matching index/column labels are updated.
+
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
+        ...                    'B': ['x', 'y', 'z']})
+        >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
+        >>> df.update(new_df)
+        >>> df
+           A  B
+        0  a  d
+        1  b  e
+        2  c  f
+
+        For Series, it's name attribute must be set.
+
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
+        ...                    'B': ['x', 'y', 'z']})
+        >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
+        >>> df.update(new_column)
+        >>> df
+           A  B
+        0  a  d
+        1  b  y
+        2  c  e
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
+        ...                    'B': ['x', 'y', 'z']})
+        >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
+        >>> df.update(new_df)
+        >>> df
+           A  B
+        0  a  x
+        1  b  d
+        2  c  e
+
+        If `other` contains NaNs the corresponding values are not updated
+        in the original dataframe.
+
+        >>> df = pd.DataFrame({'A': [1, 2, 3],
+        ...                    'B': [400, 500, 600]})
+        >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
+        >>> df.update(new_df)
+        >>> df
+           A      B
+        0  1    4.0
+        1  2  500.0
+        2  3    6.0
+        """
+        from pandas import Series, DataFrame
+        # TODO: Support other joins
+        if join != 'left':  # pragma: no cover
+            raise NotImplementedError("Only left join is supported")
+
+        if isinstance(self, ABCSeries):
+            if not isinstance(other, ABCSeries):
+                other = Series(other)
+            other = other.reindex_like(self)
+            this = self.values
+            that = other.values
+            updated = _update_column(this, that, overwrite=overwrite,
+                                     filter_func=filter_func,
+                                     raise_conflict=raise_conflict)
+            if updated is None:
+                # don't overwrite Series unnecessarily
+                return
+            self._data._block.values = updated
+        else:  # DataFrame
+            if not isinstance(other, ABCDataFrame):
+                other = DataFrame(other)
+
+            other = other.reindex_like(self)
+
+            for col in self.columns:
+                this = self[col].values
+                that = other[col].values
+
+                updated = _update_column(this, that, overwrite=overwrite,
+                                         filter_func=filter_func,
+                                         raise_conflict=raise_conflict)
+                # don't overwrite columns unnecessarily
+                if updated is None:
+                    continue
+                self[col] = updated
+
     def filter(self, items=None, like=None, regex=None, axis=None):
         """
         Subset rows or columns of dataframe according to labels in

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2377,14 +2377,35 @@ def combine_first(self, other):
 
         return this.where(notna(this), other)
 
-    def update(self, other):
+    def update(self, other, join='left', overwrite=True, filter_func=None,
+               raise_conflict=False):
         """
-        Modify Series in place using non-NA values from passed
-        Series. Aligns on index
+        Modify Series in place using non-NA values from passed Series.
+
+        Aligns on index.
 
         Parameters
         ----------
-        other : Series
+        other : Series, or object coercible into a Series
+            Should have at least one matching index label with the calling
+            Series.
+        join : {'left'}, default 'left'
+            Only left join is implemented, keeping the index and columns of the
+            original object.
+        overwrite : bool, default True
+            How to handle non-NA values for overlapping keys:
+
+            * True: overwrite original DataFrame's values
+              with values from `other`.
+            * False: only update values that are NA in
+              the original DataFrame.
+
+        filter_func : callable(1d-array) -> boolean 1d-array, optional
+            Can choose to replace values other than NA. Return True for values
+            that should be updated.
+        raise_conflict : bool, default False
+            If True, will raise a ValueError if the DataFrame and `other`
+            both contain non-NA data in the same place.
 
         Examples
         --------
@@ -2422,13 +2443,10 @@ def update(self, other):
         1    2
         2    6
         dtype: int64
-
         """
-        other = other.reindex_like(self)
-        mask = notna(other)
-
-        self._data = self._data.putmask(mask=mask, new=other, inplace=True)
-        self._maybe_update_cacher()
+        super(Series, self).update(other, join=join, overwrite=overwrite,
+                                   filter_func=filter_func,
+                                   raise_conflict=raise_conflict)
 
     # ----------------------------------------------------------------------
     # Reindexing, sorting