From c868065b25588812fd094b3637129dfa5d3bd5a0 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 6 Dec 2022 19:33:11 +0100 Subject: [PATCH] FIX-#5364: fix `get_indices` internal function (#5355) Signed-off-by: Myachev --- modin/core/dataframe/pandas/dataframe/dataframe.py | 13 +------------ .../pandas/partitioning/partition_manager.py | 7 +++++-- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py index 8685b5da409..cdb8d3eac2c 100644 --- a/modin/core/dataframe/pandas/dataframe/dataframe.py +++ b/modin/core/dataframe/pandas/dataframe/dataframe.py @@ -2058,18 +2058,7 @@ def sort_function(df): new_axes[axis.value], new_lengths[axis.value], ) = self._compute_axis_labels_and_lengths(axis.value, new_partitions) - # If we have a MultiIndex, but the first partition is empty, which may happen when - # the dataframe is small, `_compute_axis_labels_and_lengths` will return us a flattened - # MultiIndex - i.e. an Index consisting of tuples. This is because the MultiIndex from - # the remaining partitions is appended to an empty flat Index, which results in a - # flattened index. To work around this, we need to convert this flattened Index back - # into a MultiIndex. - if isinstance(self.axes[axis.value], pandas.MultiIndex) and isinstance( - new_axes[axis.value][0], tuple - ): - new_axes[axis.value] = pandas.MultiIndex.from_tuples( - new_axes[axis.value].values - ) + new_axes[axis.value] = new_axes[axis.value].set_names( self.axes[axis.value].names ) diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py index 2757e395fc4..a4d7fd61e61 100644 --- a/modin/core/dataframe/pandas/partitioning/partition_manager.py +++ b/modin/core/dataframe/pandas/partitioning/partition_manager.py @@ -880,8 +880,11 @@ def get_indices(cls, axis, partitions, index_func=None): target = partitions.T if axis == 0 else partitions new_idx = [idx.apply(func) for idx in target[0]] if len(target) else [] new_idx = cls.get_objects_from_partitions(new_idx) - # TODO FIX INFORMATION LEAK!!!!1!!1!! - total_idx = new_idx[0].append(new_idx[1:]) if new_idx else new_idx + # filter empty indexes + total_idx = list(filter(len, new_idx)) + if len(total_idx) > 0: + # TODO FIX INFORMATION LEAK!!!!1!!1!! + total_idx = total_idx[0].append(total_idx[1:]) return total_idx, new_idx @classmethod