diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 81d5c112885ec..b7c21b0f64a97 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7248,38 +7248,82 @@ def _get_agg_axis(self, axis_num): def mode(self, axis=0, numeric_only=False, dropna=True): """ - Gets the mode(s) of each element along the axis selected. Adds a row - for each mode per label, fills in gaps with nan. + Get the mode(s) of each element along the selected axis. - Note that there could be multiple values returned for the selected - axis (when more than one item share the maximum frequency), which is - the reason why a dataframe is returned. If you want to impute missing - values with the mode in a dataframe ``df``, you can just do this: - ``df.fillna(df.mode().iloc[0])`` + The mode of a set of values is the value that appears most often. + It can be multiple values. Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to iterate over while searching for the mode: + * 0 or 'index' : get mode of each column * 1 or 'columns' : get mode of each row - numeric_only : boolean, default False - if True, only apply to numeric columns - dropna : boolean, default True + numeric_only : bool, default False + If True, only apply to numeric columns. + dropna : bool, default True Don't consider counts of NaN/NaT. .. versionadded:: 0.24.0 Returns ------- - modes : DataFrame (sorted) + DataFrame + The modes of each column or row. + + See Also + -------- + Series.mode : Return the highest frequency value in a Series. + Series.value_counts : Return the counts of values in a Series. Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 3]}) + >>> df = pd.DataFrame([('bird', 2, 2), + ... ('mammal', 4, np.nan), + ... ('arthropod', 8, 0), + ... ('bird', 2, np.nan)], + ... index=('falcon', 'horse', 'spider', 'ostrich'), + ... columns=('species', 'legs', 'wings')) + >>> df + species legs wings + falcon bird 2 2.0 + horse mammal 4 NaN + spider arthropod 8 0.0 + ostrich bird 2 NaN + + By default, missing values are not considered, and the mode of wings + are both 0 and 2. The second row of species and legs contains ``NaN``, + because they have only one mode, but the DataFrame has two rows. + >>> df.mode() - A - 0 1 - 1 2 + species legs wings + 0 bird 2.0 0.0 + 1 NaN NaN 2.0 + + Setting ``dropna=False`` ``NaN`` values are considered and they can be + the mode (like for wings). + + >>> df.mode(dropna=False) + species legs wings + 0 bird 2 NaN + + Setting ``numeric_only=True``, only the mode of numeric columns is + computed, and columns of other types are ignored. + + >>> df.mode(numeric_only=True) + legs wings + 0 2.0 0.0 + 1 NaN 2.0 + + To compute the mode over columns and not rows, use the axis parameter: + + >>> df.mode(axis='columns', numeric_only=True) + 0 1 + falcon 2.0 NaN + horse 4.0 NaN + spider 0.0 8.0 + ostrich 2.0 NaN """ data = self if not numeric_only else self._get_numeric_data()