From 83d786d8cbcb33fbdc0e1610051d7d7e42c3f3dd Mon Sep 17 00:00:00 2001 From: mvashishtha Date: Wed, 25 May 2022 06:33:51 -0500 Subject: [PATCH] PERF-#4493: Use partition size caches more in Modin dataframe. Signed-off-by: mvashishtha --- modin/core/dataframe/pandas/dataframe/dataframe.py | 13 ++----------- modin/core/execution/ray/common/utils.py | 2 +- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py index 8a2f4b7b2a5..70cdc3efb3e 100644 --- a/modin/core/dataframe/pandas/dataframe/dataframe.py +++ b/modin/core/dataframe/pandas/dataframe/dataframe.py @@ -2149,17 +2149,8 @@ def _prepare_frame_to_broadcast(self, axis, indices, broadcast_all): broadcast [self[key1], self[key2]] partitions and internal indices for `self` must be [[0, 1], [5]] """ if broadcast_all: - - def get_len(part): - return part.width() if not axis else part.length() - - parts = self._partitions if not axis else self._partitions.T - return { - key: { - i: np.arange(get_len(parts[0][i])) for i in np.arange(len(parts[0])) - } - for key in indices.keys() - } + sizes = self._column_widths if axis else self._row_lengths + return {key: dict(enumerate(sizes)) for key in indices.keys()} passed_len = 0 result_dict = {} for part_num, internal in indices.items(): diff --git a/modin/core/execution/ray/common/utils.py b/modin/core/execution/ray/common/utils.py index 0543dd1b3da..55e3a6d4a55 100644 --- a/modin/core/execution/ray/common/utils.py +++ b/modin/core/execution/ray/common/utils.py @@ -194,7 +194,7 @@ def initialize_ray( ray_init_kwargs = { "num_cpus": CpuCount.get(), "num_gpus": GpuCount.get(), - "include_dashboard": False, + "include_dashboard": True, "ignore_reinit_error": True, "object_store_memory": object_store_memory, "_redis_password": redis_password,