Merge pull request #14 from sklam/libgdf

Delegate basic arithmetic and logical binops to libgdf.
rapidsai · Jun 2, 2017 · 415972c · 415972c
2 parents a7d9412 + 3593624
commit 415972c
Show file tree

Hide file tree

Showing 6 changed files with 308 additions and 202 deletions.
diff --git a/conda_environments/testing_py35.yml b/conda_environments/testing_py35.yml
@@ -1,25 +1,15 @@
 name: pycudf_testing_py35
 channels:
 - numba
+- gpuopenanalytics/label/dev
 - defaults
 dependencies:
-- accelerate_cudalib=2.0=0
-- cudatoolkit=8.0=0
-- mkl=2017.0.1=0
-- numpy=1.12.1=py35_0
-- openssl=1.0.2k=1
-- pip=9.0.1=py35_1
-- py=1.4.33=py35_0
-- pytest=3.0.7=py35_0
-- python=3.5.3=1
-- readline=6.2=2
-- setuptools=27.2.0=py35_0
-- sqlite=3.13.0=0
-- tk=8.5.18=0
-- wheel=0.29.0=py35_0
-- xz=5.2.2=1
-- zlib=1.2.8=3
-- llvmlite=0.18
-- numba=0.33
-- pip:
-  - flatbuffers==2015.12.22.1
+- pytest=3.0.7
+- python=3.5.3
+- setuptools=27.2.0
+- accelerate_cudalib=2.0
+- cudatoolkit=8.0
+- llvmlite>=0.18
+- numpy=1.12.1
+- numba>=0.33
+- libgdf_cffi>=0.1.0a1.dev
diff --git a/pygdf/_gdf.py b/pygdf/_gdf.py
@@ -0,0 +1,51 @@
+"""
+This file provide binding to the libgdf library.
+"""
+import numpy as np
+
+from libgdf_cffi import ffi, libgdf
+
+
+def columnview(size, data, mask=None, dtype=None):
+    """
+    Make a column view.
+    """
+    def unwrap(buffer):
+        if buffer is None:
+            return ffi.NULL
+        devary = buffer.to_gpu_array()
+        return ffi.cast('void*', devary.device_ctypes_pointer.value)
+
+    dtype = dtype or data.dtype
+    colview = ffi.new('gdf_column*')
+    libgdf.gdf_column_view(colview, unwrap(data), unwrap(mask), size,
+                           np_to_gdf_dtype(dtype))
+
+    return colview
+
+
+def apply_binaryop(binop, lhs, rhs, out):
+    """Apply binary operator *binop* to operands *lhs* and *rhs*.
+    The result is stored to *out*.
+    """
+    binop(lhs._cffi_view, rhs._cffi_view, out._cffi_view)
+
+
+def apply_unaryop(unaop, inp, out):
+    """Apply unary operator *unaop* to *inp* and store to *out*.
+    """
+    unaop(inp._cffi_view, out._cffi_view)
+
+
+def np_to_gdf_dtype(dtype):
+    """Util to convert numpy dtype to gdf dtype.
+    """
+    return {
+        np.float64: libgdf.GDF_FLOAT64,
+        np.float32: libgdf.GDF_FLOAT32,
+        np.int64:   libgdf.GDF_INT64,
+        np.int32:   libgdf.GDF_INT32,
+        np.int8:    libgdf.GDF_INT8,
+        np.bool_:   libgdf.GDF_INT8,
+    }[np.dtype(dtype).type]
+
diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
@@ -6,7 +6,9 @@
 
 from numba import cuda
 
-from . import cudautils, utils
+from libgdf_cffi import libgdf
+from . import cudautils, utils, _gdf
+
 
 
 class DataFrame(object):
@@ -499,6 +501,10 @@ def __init__(self, size, dtype, buffer=None, mask=None, null_count=None):
                 raise ValueError('null_count must be provided')
             null_count = 0
         self._null_count = null_count
+        # make cffi view for libgdf
+        libgdf.gdf_column_view
+        self._cffi_view = _gdf.columnview(size=self._size, data=self._data,
+                                          mask=self._mask)
 
     def __len__(self):
         """Returns the size of the ``Series`` including null values.
@@ -532,6 +538,100 @@ def __getitem__(self, arg):
         else:
             raise NotImplementedError(type(arg))
 
+    def __bool__(self):
+        """Always raise TypeError when converting a Series
+        into a boolean.
+        """
+        raise TypeError("can't compute boolean for {!r}".format(type(self)))
+
+    def _call_binop(self, other, fn, out_dtype):
+        """
+        Internal util to call a binary operator *fn* on operands *self*
+        and *other* with output dtype *out_dtype*.  Returns the output
+        Series.
+        """
+        # Allocate output series
+        out = Series.from_array(cuda.device_array(shape=len(self),
+                                                  dtype=out_dtype))
+        _gdf.apply_binaryop(fn, self, other, out)
+        return out
+
+    def _binaryop(self, other, fn):
+        """
+        Internal util to call a binary operator *fn* on operands *self*
+        and *other*.  Return the output Series.  The output dtype is
+        determined by the input operands.
+        """
+        if isinstance(other, Series):
+            return self._call_binop(other, fn, self.dtype)
+        else:
+            return NotImplemented
+
+    def _call_unaop(self, fn, out_dtype):
+        """
+        Internal util to call a unary operator *fn* on operands *self* with
+        output dtype *out_dtype*.  Returns the output Series.
+        """
+        # Allocate output series
+        out = Series.from_array(cuda.device_array(shape=len(self),
+                                                  dtype=out_dtype))
+        _gdf.apply_unaryop(fn, self, out)
+        return out
+
+    def _unaryop(self, fn):
+        """
+        Internal util to call a unary operator *fn* on operands *self*.
+        Return the output Series.  The output dtype is determined by the input
+        operand.
+        """
+        return self._call_unaop(fn, self.dtype)
+
+    def __add__(self, other):
+        return self._binaryop(other, fn=libgdf.gdf_add_generic)
+
+    def __sub__(self, other):
+        return self._binaryop(other, fn=libgdf.gdf_sub_generic)
+
+    def __mul__(self, other):
+        return self._binaryop(other, fn=libgdf.gdf_mul_generic)
+
+    def __floordiv__(self, other):
+        return self._binaryop(other, fn=libgdf.gdf_floordiv_generic)
+
+    def __truediv__(self, other):
+        return self._binaryop(other, fn=libgdf.gdf_div_generic)
+
+    __div__ = __truediv__
+
+    def _compare(self, other, fn):
+        """
+        Internal util to call a comparison operator *fn*
+        comparing *self* and *other*.  Return the output Series.
+        The output dtype is always `np.bool_`.
+        """
+        if isinstance(other, Series):
+            return self._call_binop(other, fn, np.bool_)
+        else:
+            return NotImplemented
+
+    def __eq__(self, other):
+        return self._compare(other, fn=libgdf.gdf_eq_generic)
+
+    def __ne__(self, other):
+        return self._compare(other, fn=libgdf.gdf_ne_generic)
+
+    def __lt__(self, other):
+        return self._compare(other, fn=libgdf.gdf_lt_generic)
+
+    def __le__(self, other):
+        return self._compare(other, fn=libgdf.gdf_le_generic)
+
+    def __gt__(self, other):
+        return self._compare(other, fn=libgdf.gdf_gt_generic)
+
+    def __ge__(self, other):
+        return self._compare(other, fn=libgdf.gdf_ge_generic)
+
     @property
     def dtype(self):
         """dtype of the Series"""
@@ -720,6 +820,24 @@ def scale(self):
         scaled = cudautils.compute_scale(gpuarr, vmin, vmax)
         return Series.from_array(scaled)
 
+    # Rounding
+
+    def ceil(self):
+        """Rounds each value upward to the smallest integral value not less
+        than the original.
+
+        Returns a new Series.
+        """
+        return self._unaryop(libgdf.gdf_ceil_generic)
+
+    def floor(self):
+        """Rounds each value downward to the largest integral value not greater
+        than the original.
+
+        Returns a new Series.
+        """
+        return self._unaryop(libgdf.gdf_floor_generic)
+
 
 class BufferSentryError(ValueError):
     pass