Skip to content

Commit

Permalink
Merge pull request #14 from sklam/libgdf
Browse files Browse the repository at this point in the history
Delegate basic arithmetic and logical binops to libgdf.
  • Loading branch information
seibert authored Jun 2, 2017
2 parents a7d9412 + 3593624 commit 415972c
Show file tree
Hide file tree
Showing 6 changed files with 308 additions and 202 deletions.
30 changes: 10 additions & 20 deletions conda_environments/testing_py35.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
name: pycudf_testing_py35
channels:
- numba
- gpuopenanalytics/label/dev
- defaults
dependencies:
- accelerate_cudalib=2.0=0
- cudatoolkit=8.0=0
- mkl=2017.0.1=0
- numpy=1.12.1=py35_0
- openssl=1.0.2k=1
- pip=9.0.1=py35_1
- py=1.4.33=py35_0
- pytest=3.0.7=py35_0
- python=3.5.3=1
- readline=6.2=2
- setuptools=27.2.0=py35_0
- sqlite=3.13.0=0
- tk=8.5.18=0
- wheel=0.29.0=py35_0
- xz=5.2.2=1
- zlib=1.2.8=3
- llvmlite=0.18
- numba=0.33
- pip:
- flatbuffers==2015.12.22.1
- pytest=3.0.7
- python=3.5.3
- setuptools=27.2.0
- accelerate_cudalib=2.0
- cudatoolkit=8.0
- llvmlite>=0.18
- numpy=1.12.1
- numba>=0.33
- libgdf_cffi>=0.1.0a1.dev
51 changes: 51 additions & 0 deletions pygdf/_gdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
This file provide binding to the libgdf library.
"""
import numpy as np

from libgdf_cffi import ffi, libgdf


def columnview(size, data, mask=None, dtype=None):
"""
Make a column view.
"""
def unwrap(buffer):
if buffer is None:
return ffi.NULL
devary = buffer.to_gpu_array()
return ffi.cast('void*', devary.device_ctypes_pointer.value)

dtype = dtype or data.dtype
colview = ffi.new('gdf_column*')
libgdf.gdf_column_view(colview, unwrap(data), unwrap(mask), size,
np_to_gdf_dtype(dtype))

return colview


def apply_binaryop(binop, lhs, rhs, out):
"""Apply binary operator *binop* to operands *lhs* and *rhs*.
The result is stored to *out*.
"""
binop(lhs._cffi_view, rhs._cffi_view, out._cffi_view)


def apply_unaryop(unaop, inp, out):
"""Apply unary operator *unaop* to *inp* and store to *out*.
"""
unaop(inp._cffi_view, out._cffi_view)


def np_to_gdf_dtype(dtype):
"""Util to convert numpy dtype to gdf dtype.
"""
return {
np.float64: libgdf.GDF_FLOAT64,
np.float32: libgdf.GDF_FLOAT32,
np.int64: libgdf.GDF_INT64,
np.int32: libgdf.GDF_INT32,
np.int8: libgdf.GDF_INT8,
np.bool_: libgdf.GDF_INT8,
}[np.dtype(dtype).type]

120 changes: 119 additions & 1 deletion pygdf/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from numba import cuda

from . import cudautils, utils
from libgdf_cffi import libgdf
from . import cudautils, utils, _gdf



class DataFrame(object):
Expand Down Expand Up @@ -499,6 +501,10 @@ def __init__(self, size, dtype, buffer=None, mask=None, null_count=None):
raise ValueError('null_count must be provided')
null_count = 0
self._null_count = null_count
# make cffi view for libgdf
libgdf.gdf_column_view
self._cffi_view = _gdf.columnview(size=self._size, data=self._data,
mask=self._mask)

def __len__(self):
"""Returns the size of the ``Series`` including null values.
Expand Down Expand Up @@ -532,6 +538,100 @@ def __getitem__(self, arg):
else:
raise NotImplementedError(type(arg))

def __bool__(self):
"""Always raise TypeError when converting a Series
into a boolean.
"""
raise TypeError("can't compute boolean for {!r}".format(type(self)))

def _call_binop(self, other, fn, out_dtype):
"""
Internal util to call a binary operator *fn* on operands *self*
and *other* with output dtype *out_dtype*. Returns the output
Series.
"""
# Allocate output series
out = Series.from_array(cuda.device_array(shape=len(self),
dtype=out_dtype))
_gdf.apply_binaryop(fn, self, other, out)
return out

def _binaryop(self, other, fn):
"""
Internal util to call a binary operator *fn* on operands *self*
and *other*. Return the output Series. The output dtype is
determined by the input operands.
"""
if isinstance(other, Series):
return self._call_binop(other, fn, self.dtype)
else:
return NotImplemented

def _call_unaop(self, fn, out_dtype):
"""
Internal util to call a unary operator *fn* on operands *self* with
output dtype *out_dtype*. Returns the output Series.
"""
# Allocate output series
out = Series.from_array(cuda.device_array(shape=len(self),
dtype=out_dtype))
_gdf.apply_unaryop(fn, self, out)
return out

def _unaryop(self, fn):
"""
Internal util to call a unary operator *fn* on operands *self*.
Return the output Series. The output dtype is determined by the input
operand.
"""
return self._call_unaop(fn, self.dtype)

def __add__(self, other):
return self._binaryop(other, fn=libgdf.gdf_add_generic)

def __sub__(self, other):
return self._binaryop(other, fn=libgdf.gdf_sub_generic)

def __mul__(self, other):
return self._binaryop(other, fn=libgdf.gdf_mul_generic)

def __floordiv__(self, other):
return self._binaryop(other, fn=libgdf.gdf_floordiv_generic)

def __truediv__(self, other):
return self._binaryop(other, fn=libgdf.gdf_div_generic)

__div__ = __truediv__

def _compare(self, other, fn):
"""
Internal util to call a comparison operator *fn*
comparing *self* and *other*. Return the output Series.
The output dtype is always `np.bool_`.
"""
if isinstance(other, Series):
return self._call_binop(other, fn, np.bool_)
else:
return NotImplemented

def __eq__(self, other):
return self._compare(other, fn=libgdf.gdf_eq_generic)

def __ne__(self, other):
return self._compare(other, fn=libgdf.gdf_ne_generic)

def __lt__(self, other):
return self._compare(other, fn=libgdf.gdf_lt_generic)

def __le__(self, other):
return self._compare(other, fn=libgdf.gdf_le_generic)

def __gt__(self, other):
return self._compare(other, fn=libgdf.gdf_gt_generic)

def __ge__(self, other):
return self._compare(other, fn=libgdf.gdf_ge_generic)

@property
def dtype(self):
"""dtype of the Series"""
Expand Down Expand Up @@ -720,6 +820,24 @@ def scale(self):
scaled = cudautils.compute_scale(gpuarr, vmin, vmax)
return Series.from_array(scaled)

# Rounding

def ceil(self):
"""Rounds each value upward to the smallest integral value not less
than the original.
Returns a new Series.
"""
return self._unaryop(libgdf.gdf_ceil_generic)

def floor(self):
"""Rounds each value downward to the largest integral value not greater
than the original.
Returns a new Series.
"""
return self._unaryop(libgdf.gdf_floor_generic)


class BufferSentryError(ValueError):
pass
Expand Down
Loading

0 comments on commit 415972c

Please sign in to comment.