Skip to content

Commit

Permalink
Merge pull request #2456 from shwina/improve-typecast
Browse files Browse the repository at this point in the history
Small improvement to typecast utility
  • Loading branch information
Keith Kraus authored Aug 5, 2019
2 parents 511b75c + 36dda43 commit 87f36d8
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
- PR #2406 Moved all existing `table` related files to a `legacy/` directory
- PR #2350 Performance related changes to get_dummies
- PR #2420 Remove `cudautils.astype` and replace with `typecast.apply_cast`
- PR #2456 Small improvement to typecast utility
- PR #2458 Fix handling of thirdparty packages in `isort` config

## Bug Fixes
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/bindings/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,9 @@ def apply_gather(in_cols, maps, out_cols=None):
else:
in_size = in_cols[0].data.size

import cudf.bindings.typecast as typecast
from cudf.dataframe import columnops
col = typecast.apply_cast(columnops.as_column(maps), dtype=np.int32)
maps = col.data.mem
maps = columnops.as_column(maps).astype("int32")
maps = maps.data.mem
# TODO: replace with libcudf pymod when available
maps = modulo(maps, in_size)

Expand Down
6 changes: 1 addition & 5 deletions python/cudf/cudf/bindings/sort.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,8 @@ class SegmentedRadixSortPlan(object):

# Note: .astype is required below because .copy_to_device
# is just a plain memcpy
import cudf.bindings.typecast as typecast
from cudf.dataframe import columnops
col = typecast.apply_cast(
columnops.as_column(segments),
dtype=seg_dtype
)
col = columnops.as_column(segments).astype(seg_dtype)
d_begins.copy_to_device(col.data.mem)
d_ends[-1:].copy_to_device(np.require([self.nelem], dtype=seg_dtype))

Expand Down
22 changes: 11 additions & 11 deletions python/cudf/cudf/bindings/typecast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,43 @@ from cudf.dataframe.column import Column
from libc.stdlib cimport free

import numpy as np

import pandas as pd

_time_unit = {
'none': TIME_UNIT_NONE,
None: TIME_UNIT_NONE,
's': TIME_UNIT_s,
'ms': TIME_UNIT_ms,
'us': TIME_UNIT_us,
'ns': TIME_UNIT_ns,
}


def apply_cast(incol, **kwargs):
def apply_cast(incol, dtype="float64", time_unit=None):
"""
Cast from incol.dtype to outcol.dtype
Return a Column with values in `incol` casted to `dtype`.
Currently supports numeric and datetime dtypes.
"""

check_gdf_compatibility(incol)
dtype = pd.api.types.pandas_dtype(dtype).type

cdef gdf_column* c_incol = column_view_from_column(incol)

npdtype = kwargs.get("dtype", np.float64)
cdef gdf_dtype dtype = dtypes[npdtype]
cdef uintptr_t category
cdef gdf_dtype c_dtype = dtypes[dtype]
cdef uintptr_t c_category

cdef gdf_dtype_extra_info info = gdf_dtype_extra_info(
time_unit=TIME_UNIT_NONE,
category=<void*>category
category=<void*>c_category
)
unit = kwargs.get("time_unit", 'none')
info.time_unit = _time_unit[unit]
info.time_unit = _time_unit[time_unit]

cdef gdf_column result

with nogil:
result = cast(
c_incol[0],
dtype,
c_dtype,
info
)

Expand Down
9 changes: 4 additions & 5 deletions python/cudf/cudf/dataframe/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,13 @@ def append(self, element):
self.extend(np.asarray(element, dtype=self.dtype))

def extend(self, array):
from cudf.dataframe import columnops

needed = array.size
self._sentry_capacity(needed)
import cudf.bindings.typecast as typecast
from cudf.dataframe import columnops

array = typecast.apply_cast(
columnops.as_column(array), dtype=self.dtype.type
).data.mem
array = columnops.as_column(array).astype(self.dtype).data.mem

self.mem[self.size : self.size + needed].copy_to_device(array)
self.size += needed

Expand Down
10 changes: 3 additions & 7 deletions python/cudf/cudf/dataframe/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,20 +123,16 @@ def as_numerical(self):
return self.view(
numerical.NumericalColumn,
dtype="int64",
data=typecast.apply_cast(self, dtype=np.int64).data,
data=typecast.apply_cast(self, np.int64).data,
)

def as_datetime_column(self, dtype, **kwargs):
import cudf.bindings.typecast as typecast

return typecast.apply_cast(self, dtype=np.dtype(dtype).type)
return typecast.apply_cast(self, dtype=dtype)

def as_numerical_column(self, dtype, **kwargs):
import cudf.bindings.typecast as typecast

return typecast.apply_cast(
self.as_numerical, dtype=np.dtype(dtype).type
)
return self.as_numerical.astype(dtype)

def as_string_column(self, dtype, **kwargs):
from cudf.dataframe import string
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/dataframe/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,14 @@ def as_datetime_column(self, dtype, **kwargs):
return self.view(
datetime.DatetimeColumn,
dtype=dtype,
data=typecast.apply_cast(self, dtype=np.dtype(dtype).type).data,
data=typecast.apply_cast(self, dtype=dtype).data,
)

def as_numerical_column(self, dtype, **kwargs):
import cudf.bindings.typecast as typecast

return self.replace(
data=typecast.apply_cast(self, dtype=np.dtype(dtype).type).data,
dtype=np.dtype(dtype),
data=typecast.apply_cast(self, dtype).data, dtype=np.dtype(dtype)
)

def sort_by_values(self, ascending=True, na_position="last"):
Expand Down

0 comments on commit 87f36d8

Please sign in to comment.