Skip to content

Commit

Permalink
Review
Browse files Browse the repository at this point in the history
  • Loading branch information
shwina committed Mar 26, 2021
1 parent 671a0e0 commit 797087b
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from cudf import _lib as libcudf
from cudf._typing import ColumnLike, DataFrameOrSeries
from cudf.core.column import as_column, build_categorical_column, column_empty
from cudf.core.join import merge
from cudf.utils.dtypes import (
is_categorical_dtype,
is_column_like,
Expand Down Expand Up @@ -3364,7 +3365,7 @@ def _merge(
left_index, right_index = right_index, left_index
suffixes = (suffixes[1], suffixes[0])

return cudf.core.join.merge(
return merge(
lhs,
rhs,
on=on,
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/core/join/_join_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,3 @@ def _coerce_to_tuple(obj):
return tuple(obj)
else:
return (obj,)


def _coerce_to_list(obj):
return list(_coerce_to_tuple(obj))
30 changes: 17 additions & 13 deletions python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
from __future__ import annotations

import functools
from collections import OrderedDict, namedtuple
from collections import namedtuple
from typing import TYPE_CHECKING, Callable, Tuple

import cudf
from cudf import _lib as libcudf
from cudf.core.join._join_helpers import (
_coerce_to_list,
_coerce_to_tuple,
_frame_select_by_indexers,
_Indexer,
Expand Down Expand Up @@ -275,8 +274,8 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
# Compute the result column names:
# left_names and right_names will be a mappings of input column names
# to the corresponding names in the final result.
left_names = OrderedDict(zip(left_result._data, left_result._data))
right_names = OrderedDict(zip(right_result._data, right_result._data))
left_names = dict(zip(left_result._data, left_result._data))
right_names = dict(zip(right_result._data, right_result._data))

# For any columns from left_result and right_result that have the same
# name:
Expand All @@ -288,12 +287,14 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
if self.on:
key_columns_with_same_name = self.on
else:
key_columns_with_same_name = []
for lkey, rkey in zip(*self._keys):
if (lkey.index, rkey.index) == (False, False):
if lkey.name == rkey.name:
key_columns_with_same_name.append(lkey.name)

key_columns_with_same_name = [
lkey.name
for lkey, rkey in zip(*self._keys)
if (
(lkey.index, rkey.index) == (False, False)
and lkey.name == rkey.name
)
]
for name in common_names:
if name not in key_columns_with_same_name:
left_names[name] = f"{name}{self.lsuffix}"
Expand Down Expand Up @@ -339,19 +340,22 @@ def _sort_result(self, result: Frame) -> Frame:
if isinstance(result, cudf.Index):
sort_order = result._get_sorted_inds()
else:
sort_order = result._get_sorted_inds(_coerce_to_list(self.on))
# need a list instead of a tuple here because
# _get_sorted_inds calls down to ColumnAccessor.get_by_label
# which handles lists and tuples differently
sort_order = result._get_sorted_inds(list(self.on))
return result._gather(sort_order, keep_index=False)
by = []
if self.left_index and self.right_index:
if result._index is not None:
by.extend(result._index._data.columns)
if self.left_on:
by.extend(
[result._data[col] for col in _coerce_to_list(self.left_on)]
[result._data[col] for col in _coerce_to_tuple(self.left_on)]
)
if self.right_on:
by.extend(
[result._data[col] for col in _coerce_to_list(self.right_on)]
[result._data[col] for col in _coerce_to_tuple(self.right_on)]
)
if by:
to_sort = cudf.DataFrame._from_columns(by)
Expand Down

0 comments on commit 797087b

Please sign in to comment.