diff --git a/asv_bench/benchmarks/benchmarks.py b/asv_bench/benchmarks/benchmarks.py index 33f597cefdf..74f798cd603 100644 --- a/asv_bench/benchmarks/benchmarks.py +++ b/asv_bench/benchmarks/benchmarks.py @@ -41,6 +41,8 @@ JOIN_DATA_SIZE = MERGE_DATA_SIZE ARITHMETIC_DATA_SIZE = GROUPBY_DATA_SIZE +CONCAT_DATA_SIZE = [(10_128, 100, 10_000, 128)] + class TimeGroupBy: param_names = ["impl", "data_type", "data_size"] @@ -111,6 +113,51 @@ def time_merge(self, impl, data_type, data_size, how, sort): self.df1.merge(self.df2, on=self.df1.columns[0], how=how, sort=sort) +class TimeConcat: + param_names = ["data_type", "data_size", "how", "axis"] + params = [ + ["int"], + CONCAT_DATA_SIZE, + ["inner"], + [0, 1], + ] + + def setup(self, data_type, data_size, how, axis): + # shape for generate_dataframe: first - ncols, second - nrows + self.df1 = generate_dataframe( + "modin", data_type, data_size[1], data_size[0], RAND_LOW, RAND_HIGH + ) + self.df2 = generate_dataframe( + "modin", data_type, data_size[3], data_size[2], RAND_LOW, RAND_HIGH + ) + + def time_concat(self, data_type, data_size, how, axis): + pd.concat([self.df1, self.df2], axis=axis, join=how) + + +class TimeBinaryOp: + param_names = ["data_type", "data_size", "binary_op", "axis"] + params = [ + ["int"], + CONCAT_DATA_SIZE, + ["mul"], + [0, 1], + ] + + def setup(self, data_type, data_size, binary_op, axis): + # shape for generate_dataframe: first - ncols, second - nrows + self.df1 = generate_dataframe( + "modin", data_type, data_size[1], data_size[0], RAND_LOW, RAND_HIGH + ) + self.df2 = generate_dataframe( + "modin", data_type, data_size[3], data_size[2], RAND_LOW, RAND_HIGH + ) + self.op = getattr(self.df1, binary_op) + + def time_concat(self, data_type, data_size, binary_op, axis): + self.op(self.df2, axis=axis) + + class TimeArithmetic: param_names = ["impl", "data_type", "data_size", "axis"] params = [