Skip to content

Commit

Permalink
FIX-modin-project#2550: remove decorators usage for asv tested functi…
Browse files Browse the repository at this point in the history
…ons (modin-project#2551)

Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev authored Dec 17, 2020
1 parent d8d58bb commit 126f2a5
Showing 1 changed file with 26 additions and 45 deletions.
71 changes: 26 additions & 45 deletions asv_bench/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,9 @@
]


def trigger_execution(func):
def real_executor(*arg, **kwargs):
return func(*arg, **kwargs).shape

return real_executor
def execute(df):
"Make sure the calculations are done."
return df.shape


class TimeMultiColumnGroupby:
Expand All @@ -69,13 +67,11 @@ def setup(self, data_size, count_columns):
)
self.groupby_columns = [col for col in self.df.columns[:count_columns]]

@trigger_execution
def time_groupby_agg_quan(self, data_size, count_columns):
return self.df.groupby(by=self.groupby_columns).agg("quantile")
execute(self.df.groupby(by=self.groupby_columns).agg("quantile"))

@trigger_execution
def time_groupby_agg_mean(self, data_size, count_columns):
return self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean())
execute(self.df.groupby(by=self.groupby_columns).apply(lambda df: df.mean()))


class TimeGroupByDefaultAggregations:
Expand All @@ -90,21 +86,17 @@ def setup(self, data_size):
)
self.groupby_column = self.df.columns[0]

@trigger_execution
def time_groupby_count(self, data_size):
return self.df.groupby(by=self.groupby_column).count()
execute(self.df.groupby(by=self.groupby_column).count())

@trigger_execution
def time_groupby_size(self, data_size):
return self.df.groupby(by=self.groupby_column).size()
execute(self.df.groupby(by=self.groupby_column).size())

@trigger_execution
def time_groupby_sum(self, data_size):
return self.df.groupby(by=self.groupby_column).sum()
execute(self.df.groupby(by=self.groupby_column).sum())

@trigger_execution
def time_groupby_mean(self, data_size):
return self.df.groupby(by=self.groupby_column).mean()
execute(self.df.groupby(by=self.groupby_column).mean())


class TimeJoin:
Expand All @@ -123,10 +115,11 @@ def setup(self, data_size, how, sort):
ASV_USE_IMPL, "int", data_size[3], data_size[2], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_join(self, data_size, how, sort):
return self.df1.join(
self.df2, on=self.df1.columns[0], how=how, lsuffix="left_", sort=sort
execute(
self.df1.join(
self.df2, on=self.df1.columns[0], how=how, lsuffix="left_", sort=sort
)
)


Expand All @@ -146,9 +139,8 @@ def setup(self, data_size, how, sort):
ASV_USE_IMPL, "int", data_size[3], data_size[2], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_merge(self, data_size, how, sort):
return self.df1.merge(self.df2, on=self.df1.columns[0], how=how, sort=sort)
execute(self.df1.merge(self.df2, on=self.df1.columns[0], how=how, sort=sort))


class TimeConcat:
Expand All @@ -168,12 +160,11 @@ def setup(self, data_size, how, axis):
ASV_USE_IMPL, "int", data_size[3], data_size[2], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_concat(self, data_size, how, axis):
if ASV_USE_IMPL == "modin":
return pd.concat([self.df1, self.df2], axis=axis, join=how)
execute(pd.concat([self.df1, self.df2], axis=axis, join=how))
elif ASV_USE_IMPL == "pandas":
return pandas.concat([self.df1, self.df2], axis=axis, join=how)
execute(pandas.concat([self.df1, self.df2], axis=axis, join=how))
else:
raise NotImplementedError

Expand All @@ -196,9 +187,8 @@ def setup(self, data_size, binary_op, axis):
)
self.op = getattr(self.df1, binary_op)

@trigger_execution
def time_binary_op(self, data_size, binary_op, axis):
return self.op(self.df2, axis=axis)
execute(self.op(self.df2, axis=axis))


class BaseTimeSetItem:
Expand Down Expand Up @@ -243,15 +233,13 @@ class TimeSetItem(BaseTimeSetItem):
[True, False],
]

@trigger_execution
def time_setitem_qc(self, *args, **kwargs):
self.df[self.loc] = self.item
return self.df
execute(self.df)

@trigger_execution
def time_setitem_raw(self, *args, **kwargs):
self.df[self.loc] = self.item_raw
return self.df
execute(self.df)


class TimeInsert(BaseTimeSetItem):
Expand All @@ -262,15 +250,13 @@ class TimeInsert(BaseTimeSetItem):
[True, False],
]

@trigger_execution
def time_insert_qc(self, *args, **kwargs):
self.df.insert(loc=self.iloc, column=random_string(), value=self.item)
return self.df
execute(self.df)

@trigger_execution
def time_insert_raw(self, *args, **kwargs):
self.df.insert(loc=self.iloc, column=random_string(), value=self.item_raw)
return self.df
execute(self.df)


class TimeArithmetic:
Expand All @@ -285,22 +271,17 @@ def setup(self, data_size, axis):
ASV_USE_IMPL, "int", data_size[1], data_size[0], RAND_LOW, RAND_HIGH
)

@trigger_execution
def time_sum(self, data_size, axis):
return self.df.sum(axis=axis)
execute(self.df.sum(axis=axis))

@trigger_execution
def time_median(self, data_size, axis):
return self.df.median(axis=axis)
execute(self.df.median(axis=axis))

@trigger_execution
def time_nunique(self, data_size, axis):
return self.df.nunique(axis=axis)
execute(self.df.nunique(axis=axis))

@trigger_execution
def time_apply(self, data_size, axis):
return self.df.apply(lambda df: df.sum(), axis=axis)
execute(self.df.apply(lambda df: df.sum(), axis=axis))

@trigger_execution
def time_mean(self, data_size, axis):
return self.df.mean(axis=axis)
execute(self.df.mean(axis=axis))

0 comments on commit 126f2a5

Please sign in to comment.