diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ada9c5d456a77f..a17e2ce7f1ef54 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -826,8 +826,9 @@ def _aggregate_multiple_funcs(self, arg, _level): for name, func in arg: obj = self if name in results: - raise SpecificationError('Function names must be unique, ' - 'found multiple named %s' % name) + raise SpecificationError( + 'Function names must be unique, found multiple named ' + '{}'.format(name)) # reset the cache so that we # only include the named selection @@ -1027,8 +1028,7 @@ def nunique(self, dropna=True): try: sorter = np.lexsort((val, ids)) except TypeError: # catches object dtypes - msg = ('val.dtype must be object, got {dtype}' - .format(dtype=val.dtype)) + msg = 'val.dtype must be object, got {}'.format(val.dtype) assert val.dtype == object, msg val, _ = algorithms.factorize(val, sort=False) sorter = np.lexsort((val, ids)) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 292d4207cf2c5c..253860d83f49e0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1644,7 +1644,8 @@ def nth(self, n, dropna=None): # just returns NaN raise ValueError("For a DataFrame groupby, dropna must be " "either None, 'any' or 'all', " - "(was passed %s)." % (dropna),) + "(was passed {dropna}).".format( + dropna=dropna)) # old behaviour, but with all and any support for DataFrames. # modified in GH 7559 to have better perf @@ -2099,6 +2100,6 @@ def groupby(obj, by, **kwds): from pandas.core.groupby.generic import DataFrameGroupBy klass = DataFrameGroupBy else: # pragma: no cover - raise TypeError('invalid type: %s' % type(obj)) + raise TypeError('invalid type: {}'.format(obj)) return klass(obj, by, **kwds) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index b49bc5ee5950f9..d8df227d4911a0 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -257,7 +257,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, if level is not None: if not isinstance(level, int): if level not in index.names: - raise AssertionError('Level %s not in index' % str(level)) + raise AssertionError('Level {} not in index'.format(level)) level = index.names.index(level) if self.name is None: @@ -317,7 +317,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, (Series, Index, ExtensionArray, np.ndarray)): if getattr(self.grouper, 'ndim', 1) != 1: t = self.name or str(type(self.grouper)) - raise ValueError("Grouper for '%s' not 1-dimensional" % t) + raise ValueError( + "Grouper for '{}' not 1-dimensional".format(t)) self.grouper = self.index.map(self.grouper) if not (hasattr(self.grouper, "__len__") and len(self.grouper) == len(self.index)): @@ -460,8 +461,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if isinstance(level, compat.string_types): if obj.index.name != level: - raise ValueError('level name %s is not the name of the ' - 'index' % level) + raise ValueError('level name {} is not the name of the ' + 'index'.format(level)) elif level > 0 or level < -1: raise ValueError('level > 0 or level < -1 only valid with ' ' MultiIndex') diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 125bd9a5e855d9..6dcbedfa112bb5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -380,7 +380,8 @@ def get_func(fname): # otherwise find dtype-specific version, falling back to object for dt in [dtype_str, 'object']: - f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None) + f = getattr(libgroupby, "{fname}_{dtype_str}".format( + fname=fname, dtype_str=dtype_str), None) if f is not None: return f @@ -403,9 +404,11 @@ def wrapper(*args, **kwargs): func = get_func(ftype) if func is None: - raise NotImplementedError("function is not implemented for this" - "dtype: [how->%s,dtype->%s]" % - (how, dtype_str)) + raise NotImplementedError( + "function is not implemented for this dtype: " + "[how->{how},dtype->{dtype_str}]".format(how=how, + dtype_str=dtype_str)) + return func def _cython_operation(self, kind, values, how, axis, min_count=-1, @@ -485,7 +488,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, out_dtype = 'float' else: if is_numeric: - out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize) + out_dtype = '{kind}{itemsize}'.format( + kind=values.dtype.kind, itemsize=values.dtype.itemsize) else: out_dtype = 'object' @@ -739,10 +743,6 @@ def group_info(self): obs_group_ids.astype('int64', copy=False), ngroups) - @cache_readonly - def ngroups(self): - return len(self.result_index) - @cache_readonly def result_index(self): if len(self.binlabels) != 0 and isna(self.binlabels[0]): @@ -769,11 +769,6 @@ def agg_series(self, obj, func): grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy) return grouper.get_result() - # ---------------------------------------------------------------------- - # cython aggregation - - _cython_functions = copy.deepcopy(BaseGrouper._cython_functions) - def _get_axes(group): if isinstance(group, Series): @@ -853,9 +848,6 @@ def _chop(self, sdata, slice_obj): class FrameSplitter(DataSplitter): - def __init__(self, data, labels, ngroups, axis=0): - super(FrameSplitter, self).__init__(data, labels, ngroups, axis=axis) - def fast_apply(self, f, names): # must return keys::list, values::list, mutated::bool try: