Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor implementation of column setitem #9110

Merged
merged 8 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 36 additions & 69 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from cudf._lib.column cimport Column
from cudf._lib.scalar import as_device_scalar

from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.table cimport Table
from cudf._lib.table cimport Table, make_table_view

from cudf._lib.reduce import minmax
from cudf.core.abc import Serializable
Expand Down Expand Up @@ -192,92 +192,59 @@ def gather(
)


def _scatter_table(Table source_table, Column scatter_map,
Table target_table, bool bounds_check=True):
def scatter(object source, Column scatter_map, Column target_column,
bool bounds_check=True):
"""
Scattering input into target as per the scatter map,
input can be a list of scalars or can be a table
"""

cdef table_view source_table_view = source_table.data_view()
cdef column_view scatter_map_view = scatter_map.view()
cdef table_view target_table_view = target_table.data_view()
cdef table_view target_table_view = make_table_view((target_column,))
cdef bool c_bounds_check = bounds_check

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_copying.scatter(
source_table_view,
scatter_map_view,
target_table_view,
c_bounds_check
)
)

data, _ = data_from_unique_ptr(
move(c_result),
column_names=target_table._column_names,
index_names=None
)

return data, (
None if target_table._index is None else target_table._index.copy(
deep=False)
)


def _scatter_scalar(scalars, Column scatter_map,
Table target_table, bool bounds_check=True):
# Needed for the table branch
cdef table_view source_table_view

# Needed for the scalar branch
cdef vector[reference_wrapper[constscalar]] source_scalars
source_scalars.reserve(len(scalars))
cdef bool c_bounds_check = bounds_check
cdef DeviceScalar slr
for val, col in zip(scalars, target_table._columns):
slr = as_device_scalar(val, col.dtype)

if isinstance(source, Column):
source_table_view = make_table_view((<Column> source,))

with nogil:
c_result = move(
cpp_copying.scatter(
source_table_view,
scatter_map_view,
target_table_view,
c_bounds_check
)
)
else:
slr = as_device_scalar(source, target_column.dtype)
source_scalars.push_back(reference_wrapper[constscalar](
slr.get_raw_ptr()[0]))
cdef column_view scatter_map_view = scatter_map.view()
cdef table_view target_table_view = target_table.data_view()

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_copying.scatter(
source_scalars,
scatter_map_view,
target_table_view,
c_bounds_check
with nogil:
c_result = move(
cpp_copying.scatter(
source_scalars,
scatter_map_view,
target_table_view,
c_bounds_check
)
)
)

data, _ = data_from_unique_ptr(
move(c_result),
column_names=target_table._column_names,
column_names=(None,),
index_names=None
)

return data, (
None if target_table._index is None else target_table._index.copy(
deep=False)
)


def scatter(object input, object scatter_map, Table target,
bool bounds_check=True):
"""
Scattering input into target as per the scatter map,
input can be a list of scalars or can be a table
"""

from cudf.core.column.column import as_column

if not isinstance(scatter_map, Column):
scatter_map = as_column(scatter_map)

if isinstance(input, Table):
return _scatter_table(input, scatter_map, target, bounds_check)
else:
return _scatter_scalar(input, scatter_map, target, bounds_check)
return next(iter(data.values()))


def _reverse_column(Column source_column):
Expand Down
17 changes: 6 additions & 11 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,17 +599,12 @@ def __setitem__(self, key: Any, value: Any):
)
else:
try:
if is_scalar(value):
input = self
out = input.as_frame()._scatter(key, [value])._as_column()
else:
if not isinstance(value, Column):
value = as_column(value)
out = (
self.as_frame()
._scatter(key, value.as_frame())
._as_column()
)
if not isinstance(key, Column):
key = as_column(key)
if not is_scalar(value) and not isinstance(value, Column):
value = as_column(value)
out = libcudf.copying.scatter(value, key, self)
out._with_type_metadata(self)
vyasr marked this conversation as resolved.
Show resolved Hide resolved
except RuntimeError as e:
if "out of bounds" in str(e):
raise IndexError(
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,14 +692,6 @@ def _as_column(self):

return self._data[None].copy(deep=False)

def _scatter(self, key, value):
result = self.__class__._from_data(
*libcudf.copying.scatter(value, key, self)
)

result._copy_type_metadata(self)
return result

def _empty_like(self, keep_index=True):
result = self.__class__._from_data(
*libcudf.copying.table_empty_like(self, keep_index)
Expand Down