Skip to content

Commit

Permalink
GroupBy Cleanup (pandas-dev#23971)
Browse files Browse the repository at this point in the history
  • Loading branch information
WillAyd authored and jreback committed Nov 28, 2018
1 parent 440469b commit 580a094
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 27 deletions.
8 changes: 4 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,8 +826,9 @@ def _aggregate_multiple_funcs(self, arg, _level):
for name, func in arg:
obj = self
if name in results:
raise SpecificationError('Function names must be unique, '
'found multiple named %s' % name)
raise SpecificationError(
'Function names must be unique, found multiple named '
'{}'.format(name))

# reset the cache so that we
# only include the named selection
Expand Down Expand Up @@ -1027,8 +1028,7 @@ def nunique(self, dropna=True):
try:
sorter = np.lexsort((val, ids))
except TypeError: # catches object dtypes
msg = ('val.dtype must be object, got {dtype}'
.format(dtype=val.dtype))
msg = 'val.dtype must be object, got {}'.format(val.dtype)
assert val.dtype == object, msg
val, _ = algorithms.factorize(val, sort=False)
sorter = np.lexsort((val, ids))
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1644,7 +1644,8 @@ def nth(self, n, dropna=None):
# just returns NaN
raise ValueError("For a DataFrame groupby, dropna must be "
"either None, 'any' or 'all', "
"(was passed %s)." % (dropna),)
"(was passed {dropna}).".format(
dropna=dropna))

# old behaviour, but with all and any support for DataFrames.
# modified in GH 7559 to have better perf
Expand Down Expand Up @@ -2099,6 +2100,6 @@ def groupby(obj, by, **kwds):
from pandas.core.groupby.generic import DataFrameGroupBy
klass = DataFrameGroupBy
else: # pragma: no cover
raise TypeError('invalid type: %s' % type(obj))
raise TypeError('invalid type: {}'.format(obj))

return klass(obj, by, **kwds)
9 changes: 5 additions & 4 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
if level is not None:
if not isinstance(level, int):
if level not in index.names:
raise AssertionError('Level %s not in index' % str(level))
raise AssertionError('Level {} not in index'.format(level))
level = index.names.index(level)

if self.name is None:
Expand Down Expand Up @@ -317,7 +317,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
(Series, Index, ExtensionArray, np.ndarray)):
if getattr(self.grouper, 'ndim', 1) != 1:
t = self.name or str(type(self.grouper))
raise ValueError("Grouper for '%s' not 1-dimensional" % t)
raise ValueError(
"Grouper for '{}' not 1-dimensional".format(t))
self.grouper = self.index.map(self.grouper)
if not (hasattr(self.grouper, "__len__") and
len(self.grouper) == len(self.index)):
Expand Down Expand Up @@ -460,8 +461,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,

if isinstance(level, compat.string_types):
if obj.index.name != level:
raise ValueError('level name %s is not the name of the '
'index' % level)
raise ValueError('level name {} is not the name of the '
'index'.format(level))
elif level > 0 or level < -1:
raise ValueError('level > 0 or level < -1 only valid with '
' MultiIndex')
Expand Down
26 changes: 9 additions & 17 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ def get_func(fname):

# otherwise find dtype-specific version, falling back to object
for dt in [dtype_str, 'object']:
f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
f = getattr(libgroupby, "{fname}_{dtype_str}".format(
fname=fname, dtype_str=dtype_str), None)
if f is not None:
return f

Expand All @@ -403,9 +404,11 @@ def wrapper(*args, **kwargs):
func = get_func(ftype)

if func is None:
raise NotImplementedError("function is not implemented for this"
"dtype: [how->%s,dtype->%s]" %
(how, dtype_str))
raise NotImplementedError(
"function is not implemented for this dtype: "
"[how->{how},dtype->{dtype_str}]".format(how=how,
dtype_str=dtype_str))

return func

def _cython_operation(self, kind, values, how, axis, min_count=-1,
Expand Down Expand Up @@ -485,7 +488,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
out_dtype = 'float'
else:
if is_numeric:
out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
out_dtype = '{kind}{itemsize}'.format(
kind=values.dtype.kind, itemsize=values.dtype.itemsize)
else:
out_dtype = 'object'

Expand Down Expand Up @@ -739,10 +743,6 @@ def group_info(self):
obs_group_ids.astype('int64', copy=False),
ngroups)

@cache_readonly
def ngroups(self):
return len(self.result_index)

@cache_readonly
def result_index(self):
if len(self.binlabels) != 0 and isna(self.binlabels[0]):
Expand All @@ -769,11 +769,6 @@ def agg_series(self, obj, func):
grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy)
return grouper.get_result()

# ----------------------------------------------------------------------
# cython aggregation

_cython_functions = copy.deepcopy(BaseGrouper._cython_functions)


def _get_axes(group):
if isinstance(group, Series):
Expand Down Expand Up @@ -853,9 +848,6 @@ def _chop(self, sdata, slice_obj):

class FrameSplitter(DataSplitter):

def __init__(self, data, labels, ngroups, axis=0):
super(FrameSplitter, self).__init__(data, labels, ngroups, axis=axis)

def fast_apply(self, f, names):
# must return keys::list, values::list, mutated::bool
try:
Expand Down

0 comments on commit 580a094

Please sign in to comment.