DOC: improve doc string for .aggregate and .transform (pandas-dev#22641)

Sup3rGeo · Sep 30, 2018 · 8f144bb · 8f144bb
1 parent 9cf7b60
commit 8f144bb
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 49 deletions.
diff --git a/ci/doctests.sh b/ci/doctests.sh
@@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then
 
     # DataFrame / Series docstrings
     pytest --doctest-modules -v pandas/core/frame.py \
-        -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform"
+        -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
 
     if [ $? -ne "0" ]; then
         RET=1
@@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then
     fi
 
     pytest --doctest-modules -v pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs"
+        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transpose -values -xs"
 
     if [ $? -ne "0" ]; then
         RET=1

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -109,10 +109,9 @@
 _shared_doc_kwargs = dict(
     axes='index, columns', klass='DataFrame',
     axes_single_arg="{0 or 'index', 1 or 'columns'}",
-    axis="""
-    axis : {0 or 'index', 1 or 'columns'}, default 0
-        - 0 or 'index': apply function to each column.
-        - 1 or 'columns': apply function to each row.""",
+    axis="""axis : {0 or 'index', 1 or 'columns'}, default 0
+        If 0 or 'index': apply function to each column.
+        If 1 or 'columns': apply function to each row.""",
     optional_by="""
         by : str or list of str
             Name or list of names to sort by.

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4545,17 +4545,16 @@ def pipe(self, func, *args, **kwargs):
 
     Parameters
     ----------
-    func : function, string, dictionary, or list of string/functions
+    func : function, str, list or dict
         Function to use for aggregating the data. If a function, must either
-        work when passed a %(klass)s or when passed to %(klass)s.apply. For
-        a DataFrame, can pass a dict, if the keys are DataFrame column names.
+        work when passed a %(klass)s or when passed to %(klass)s.apply.
 
         Accepted combinations are:
 
-        - string function name.
-        - function.
-        - list of functions.
-        - dict of column names -> functions (or list of functions).
+        - function
+        - string function name
+        - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+        - dict of axis labels -> functions, function names or list of such.
     %(axis)s
     *args
         Positional arguments to pass to `func`.
@@ -4564,7 +4563,11 @@ def pipe(self, func, *args, **kwargs):
 
     Returns
     -------
-    aggregated : %(klass)s
+    DataFrame, Series or scalar
+        if DataFrame.agg is called with a single function, returns a Series
+        if DataFrame.agg is called with several functions, returns a DataFrame
+        if Series.agg is called with single function, returns a scalar
+        if Series.agg is called with several functions, returns a Series
 
     Notes
     -----
@@ -4574,50 +4577,71 @@ def pipe(self, func, *args, **kwargs):
     """)
 
     _shared_docs['transform'] = ("""
-    Call function producing a like-indexed %(klass)s
-    and return a %(klass)s with the transformed values
+    Call ``func`` on self producing a %(klass)s with transformed values
+    and that has the same axis length as self.
 
     .. versionadded:: 0.20.0
 
     Parameters
     ----------
-    func : callable, string, dictionary, or list of string/callables
-        To apply to column
+    func : function, str, list or dict
+        Function to use for transforming the data. If a function, must either
+        work when passed a %(klass)s or when passed to %(klass)s.apply.
 
-        Accepted Combinations are:
+        Accepted combinations are:
 
-        - string function name
         - function
-        - list of functions
-        - dict of column names -> functions (or list of functions)
+        - string function name
+        - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']``
+        - dict of axis labels -> functions, function names or list of such.
+    %(axis)s
+    *args
+        Positional arguments to pass to `func`.
+    **kwargs
+        Keyword arguments to pass to `func`.
 
     Returns
     -------
-    transformed : %(klass)s
+    %(klass)s
+        A %(klass)s that must have the same length as self.
 
-    Examples
+    Raises
+    ------
+    ValueError : If the returned %(klass)s has a different length than self.
+
+    See Also
     --------
-    >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
-    ...                   index=pd.date_range('1/1/2000', periods=10))
-    df.iloc[3:7] = np.nan
-
-    >>> df.transform(lambda x: (x - x.mean()) / x.std())
-                       A         B         C
-    2000-01-01  0.579457  1.236184  0.123424
-    2000-01-02  0.370357 -0.605875 -1.231325
-    2000-01-03  1.455756 -0.277446  0.288967
-    2000-01-04       NaN       NaN       NaN
-    2000-01-05       NaN       NaN       NaN
-    2000-01-06       NaN       NaN       NaN
-    2000-01-07       NaN       NaN       NaN
-    2000-01-08 -0.498658  1.274522  1.642524
-    2000-01-09 -0.540524 -1.012676 -0.828968
-    2000-01-10 -1.366388 -0.614710  0.005378
-
-    See also
+    %(klass)s.agg : Only perform aggregating type operations.
+    %(klass)s.apply : Invoke function on a %(klass)s.
+
+    Examples
     --------
-    pandas.%(klass)s.aggregate
-    pandas.%(klass)s.apply
+    >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
+    >>> df
+       A  B
+    0  0  1
+    1  1  2
+    2  2  3
+    >>> df.transform(lambda x: x + 1)
+       A  B
+    0  1  2
+    1  2  3
+    2  3  4
+
+    Even though the resulting %(klass)s must have the same length as the
+    input %(klass)s, it is possible to provide several input functions:
+
+    >>> s = pd.Series(range(3))
+    >>> s
+    0    0
+    1    1
+    2    2
+    dtype: int64
+    >>> s.transform([np.sqrt, np.exp])
+           sqrt        exp
+    0  0.000000   1.000000
+    1  1.000000   2.718282
+    2  1.414214   7.389056
     """)
 
     # ----------------------------------------------------------------------
@@ -9401,7 +9425,7 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None,
 
         cls.ewm = ewm
 
-    @Appender(_shared_docs['transform'] % _shared_doc_kwargs)
+    @Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs))
     def transform(self, func, *args, **kwargs):
         result = self.agg(func, *args, **kwargs)
         if is_scalar(result) or len(result) != len(self):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -89,10 +89,8 @@
 
 _shared_doc_kwargs = dict(
     axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
-    axis="""
-    axis : {0 or 'index'}
-        Parameter needed for compatibility with DataFrame.
-    """,
+    axis="""axis : {0 or 'index'}
+        Parameter needed for compatibility with DataFrame.""",
     inplace="""inplace : boolean, default False
         If True, performs operation inplace and returns None.""",
     unique='np.ndarray', duplicated='Series',
@@ -3097,6 +3095,12 @@ def aggregate(self, func, axis=0, *args, **kwargs):
 
     agg = aggregate
 
+    @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs)
+    def transform(self, func, axis=0, *args, **kwargs):
+        # Validate the axis parameter
+        self._get_axis_number(axis)
+        return super(Series, self).transform(func, *args, **kwargs)
+
     def apply(self, func, convert_dtype=True, args=(), **kwds):
         """
         Invoke function on values of Series. Can be ufunc (a NumPy function