From 5b96219df4d45da7c9c905be0eade2fd05ca4a66 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Sat, 27 Apr 2024 22:40:45 +0200 Subject: [PATCH] PERF-#7227: Call 'modin_frame.combine()' for merge and join only when necessary (#7228) Signed-off-by: Anatoly Myachev --- modin/core/storage_formats/pandas/merge.py | 2 +- modin/core/storage_formats/pandas/query_compiler.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modin/core/storage_formats/pandas/merge.py b/modin/core/storage_formats/pandas/merge.py index 9a3705ce3c1..d1e05f113d9 100644 --- a/modin/core/storage_formats/pandas/merge.py +++ b/modin/core/storage_formats/pandas/merge.py @@ -115,7 +115,6 @@ def row_axis_merge(cls, left, right, kwargs): left_index = kwargs.get("left_index", False) right_index = kwargs.get("right_index", False) sort = kwargs.get("sort", False) - right_to_broadcast = right._modin_frame.combine() if how in ["left", "inner"] and left_index is False and right_index is False: kwargs["sort"] = False @@ -160,6 +159,7 @@ def map_func( elif on is not None: on = list(on) if is_list_like(on) else [on] + right_to_broadcast = right._modin_frame.combine() new_columns, new_dtypes = cls._compute_result_metadata( left, right, on, left_on, right_on, kwargs.get("suffixes", ("_x", "_y")) ) diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index 7fd07d5fe91..feb766e38c5 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -528,13 +528,13 @@ def join(self, right, **kwargs): on = kwargs.get("on", None) how = kwargs.get("how", "left") sort = kwargs.get("sort", False) - right_to_broadcast = right._modin_frame.combine() if how in ["left", "inner"]: def map_func(left, right, kwargs=kwargs): # pragma: no cover return pandas.DataFrame.join(left, right, **kwargs) + right_to_broadcast = right._modin_frame.combine() new_self = self.__constructor__( self._modin_frame.broadcast_apply_full_axis( axis=1,