Skip to content

Commit

Permalink
Refactor implementation of column setitem (#9110)
Browse files Browse the repository at this point in the history
This small PR reworks the behavior of `ColumnBase.__setitem__` when it is provided something other than a slice as input, for instance an array. This code path requires scattering the new values into the column, which previously involved converting the Column to a Frame in order to call Frame._scatter. Since that method was only used for this one purpose, the underlying libcudf scatter implementation has been rewritten to accept and return Columns, allowing us to inline the call and also get rid of a round trip from Column to Frame and back.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Marlene  (https://github.com/marlenezw)

URL: #9110
  • Loading branch information
vyasr authored Aug 26, 2021
1 parent d29c441 commit 40cad38
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 88 deletions.
105 changes: 36 additions & 69 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from cudf._lib.column cimport Column
from cudf._lib.scalar import as_device_scalar

from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, make_table_view

from cudf._lib.reduce import minmax
from cudf.core.abc import Serializable
Expand Down Expand Up @@ -192,92 +192,59 @@ def gather(
)


def _scatter_table(Table source_table, Column scatter_map,
Table target_table, bool bounds_check=True):
def scatter(object source, Column scatter_map, Column target_column,
bool bounds_check=True):
"""
Scattering input into target as per the scatter map,
input can be a list of scalars or can be a table
"""

cdef table_view source_table_view = source_table.data_view()
cdef column_view scatter_map_view = scatter_map.view()
cdef table_view target_table_view = target_table.data_view()
cdef table_view target_table_view = make_table_view((target_column,))
cdef bool c_bounds_check = bounds_check

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_copying.scatter(
source_table_view,
scatter_map_view,
target_table_view,
c_bounds_check
)
)

data, _ = data_from_unique_ptr(
move(c_result),
column_names=target_table._column_names,
index_names=None
)

return data, (
None if target_table._index is None else target_table._index.copy(
deep=False)
)


def _scatter_scalar(scalars, Column scatter_map,
Table target_table, bool bounds_check=True):
# Needed for the table branch
cdef table_view source_table_view

# Needed for the scalar branch
cdef vector[reference_wrapper[constscalar]] source_scalars
source_scalars.reserve(len(scalars))
cdef bool c_bounds_check = bounds_check
cdef DeviceScalar slr
for val, col in zip(scalars, target_table._columns):
slr = as_device_scalar(val, col.dtype)

if isinstance(source, Column):
source_table_view = make_table_view((<Column> source,))

with nogil:
c_result = move(
cpp_copying.scatter(
source_table_view,
scatter_map_view,
target_table_view,
c_bounds_check
)
)
else:
slr = as_device_scalar(source, target_column.dtype)
source_scalars.push_back(reference_wrapper[constscalar](
slr.get_raw_ptr()[0]))
cdef column_view scatter_map_view = scatter_map.view()
cdef table_view target_table_view = target_table.data_view()

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_copying.scatter(
source_scalars,
scatter_map_view,
target_table_view,
c_bounds_check
with nogil:
c_result = move(
cpp_copying.scatter(
source_scalars,
scatter_map_view,
target_table_view,
c_bounds_check
)
)
)

data, _ = data_from_unique_ptr(
move(c_result),
column_names=target_table._column_names,
column_names=(None,),
index_names=None
)

return data, (
None if target_table._index is None else target_table._index.copy(
deep=False)
)


def scatter(object input, object scatter_map, Table target,
bool bounds_check=True):
"""
Scattering input into target as per the scatter map,
input can be a list of scalars or can be a table
"""

from cudf.core.column.column import as_column

if not isinstance(scatter_map, Column):
scatter_map = as_column(scatter_map)

if isinstance(input, Table):
return _scatter_table(input, scatter_map, target, bounds_check)
else:
return _scatter_scalar(input, scatter_map, target, bounds_check)
return next(iter(data.values()))


def _reverse_column(Column source_column):
Expand Down
18 changes: 7 additions & 11 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,17 +599,13 @@ def __setitem__(self, key: Any, value: Any):
)
else:
try:
if is_scalar(value):
input = self
out = input.as_frame()._scatter(key, [value])._as_column()
else:
if not isinstance(value, Column):
value = as_column(value)
out = (
self.as_frame()
._scatter(key, value.as_frame())
._as_column()
)
if not isinstance(key, Column):
key = as_column(key)
if not is_scalar(value) and not isinstance(value, Column):
value = as_column(value)
out = libcudf.copying.scatter(
value, key, self
)._with_type_metadata(self.dtype)
except RuntimeError as e:
if "out of bounds" in str(e):
raise IndexError(
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,14 +692,6 @@ def _as_column(self):

return self._data[None].copy(deep=False)

def _scatter(self, key, value):
result = self.__class__._from_data(
*libcudf.copying.scatter(value, key, self)
)

result._copy_type_metadata(self)
return result

def _empty_like(self, keep_index=True):
result = self.__class__._from_data(
*libcudf.copying.table_empty_like(self, keep_index)
Expand Down

0 comments on commit 40cad38

Please sign in to comment.