From f4556896e2f1f02a6f95f6875bf041c879863d04 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Apr 2024 21:45:03 -0600 Subject: [PATCH] Fix benchmark. --- asv_bench/benchmarks/cohorts.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/cohorts.py b/asv_bench/benchmarks/cohorts.py index 7d74f884..c0162ccc 100644 --- a/asv_bench/benchmarks/cohorts.py +++ b/asv_bench/benchmarks/cohorts.py @@ -1,3 +1,5 @@ +from functools import cached_property + import dask import numpy as np import pandas as pd @@ -11,6 +13,10 @@ class Cohorts: def setup(self, *args, **kwargs): raise NotImplementedError + @cached_property + def dask(self): + return flox.groupby_reduce(self.array, self.by, func="sum", axis=self.axis)[0].dask + def containment(self): asfloat = self.bitmask().astype(float) chunks_per_label = asfloat.sum(axis=0) @@ -43,26 +49,17 @@ def time_find_group_cohorts(self): pass def time_graph_construct(self): - flox.groupby_reduce(self.array, self.by, func="sum", axis=self.axis, method="cohorts") + flox.groupby_reduce(self.array, self.by, func="sum", axis=self.axis) def track_num_tasks(self): - result = flox.groupby_reduce( - self.array, self.by, func="sum", axis=self.axis, method="cohorts" - )[0] - return len(result.dask.to_dict()) + return len(self.dask.to_dict()) def track_num_tasks_optimized(self): - result = flox.groupby_reduce( - self.array, self.by, func="sum", axis=self.axis, method="cohorts" - )[0] - (opt,) = dask.optimize(result) - return len(opt.dask.to_dict()) + (opt,) = dask.optimize(self.dask) + return len(opt.to_dict()) def track_num_layers(self): - result = flox.groupby_reduce( - self.array, self.by, func="sum", axis=self.axis, method="cohorts" - )[0] - return len(result.dask.layers) + return len(self.dask.layers) track_num_tasks.unit = "tasks" # type: ignore[attr-defined] # Lazy track_num_tasks_optimized.unit = "tasks" # type: ignore[attr-defined] # Lazy