Merge branch '10070' of https://github.com/galipremsagar/cudf into 10070

rapidsai · Jan 18, 2022 · e7cdab2 · e7cdab2
2 parents a5f496c + 012300d
commit e7cdab2
Show file tree

Hide file tree

Showing 40 changed files with 848 additions and 511 deletions.
diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp
@@ -171,8 +171,9 @@ std::vector<metadata::stripe_source_mapping> aggregate_orc_metadata::select_stri
 
       // Coalesce stripe info at the source file later since that makes downstream processing much
       // easier in impl::read
-      for (const size_t& stripe_idx : user_specified_stripes[src_file_idx]) {
-        CUDF_EXPECTS(stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size(),
+      for (const auto& stripe_idx : user_specified_stripes[src_file_idx]) {
+        CUDF_EXPECTS(stripe_idx < static_cast<decltype(stripe_idx)>(
+                                    per_file_metadata[src_file_idx].ff.stripes.size()),
                      "Invalid stripe index");
         stripe_infos.push_back(
           std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr));

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
 from cudf.utils.gpu_utils import validate_setup
 
 validate_setup()
@@ -51,6 +52,7 @@
     CategoricalDtype,
     Decimal64Dtype,
     Decimal32Dtype,
+    Decimal128Dtype,
     IntervalDtype,
     ListDtype,
     StructDtype,

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import cupy as cp
 import numpy as np
@@ -8,12 +8,7 @@ import rmm
 
 import cudf
 import cudf._lib as libcudfxx
-from cudf.api.types import (
-    is_categorical_dtype,
-    is_decimal_dtype,
-    is_list_dtype,
-    is_struct_dtype,
-)
+from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
 from cudf.core.buffer import Buffer
 
 from cpython.buffer cimport PyObject_CheckBuffer

diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t, int64_t
 from libcpp cimport bool
@@ -59,6 +59,9 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
         fixed_point_scalar(int64_t value,
                            scale_type scale,
                            bool is_valid) except +
+        fixed_point_scalar(data_type value,
+                           scale_type scale,
+                           bool is_valid) except +
         int64_t value() except +
         # TODO: Figure out how to add an int32 overload of value()
 

diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t, uint32_t
 
@@ -79,6 +79,7 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
         DURATION_NANOSECONDS   "cudf::type_id::DURATION_NANOSECONDS"
         DECIMAL32              "cudf::type_id::DECIMAL32"
         DECIMAL64              "cudf::type_id::DECIMAL64"
+        DECIMAL128             "cudf::type_id::DECIMAL128"
 
     ctypedef enum hash_id "cudf::hash_id":
         HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
@@ -102,3 +103,7 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
         HIGHER "cudf::interpolation::HIGHER"
         MIDPOINT "cudf::interpolation::MIDPOINT"
         NEAREST "cudf::interpolation::NEAREST"
+
+    # A Hack to let cython compile with __int128_t symbol
+    # https://stackoverflow.com/a/27609033
+    ctypedef int int128 "__int128_t"
diff --git a/python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd b/python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd
@@ -1,12 +1,17 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 from libc.stdint cimport int32_t, int64_t
 
+from cudf._lib.cpp.types cimport int128
+
 
 cdef extern from "cudf/fixed_point/fixed_point.hpp" namespace "numeric" nogil:
     # cython type stub to help resolve to numeric::decimal64
     ctypedef int64_t decimal64
     # cython type stub to help resolve to numeric::decimal32
     ctypedef int64_t decimal32
+    # cython type stub to help resolve to numeric::decimal128
+    ctypedef int128 decimal128
 
     cdef cppclass scale_type:
         scale_type(int32_t)
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import cudf
 
@@ -249,7 +249,6 @@ cdef orc_reader_options make_orc_reader_options(
         .timestamp_type(data_type(timestamp_type))
         .use_index(use_index)
         .decimal_cols_as_float(c_decimal_cols_as_float)
-        .decimal128(False)
         .build()
     )
 

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
@@ -1,4 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
 import decimal
 
 import numpy as np
@@ -45,7 +46,12 @@ from cudf._lib.cpp.scalar.scalar cimport (
     struct_scalar,
     timestamp_scalar,
 )
-from cudf._lib.cpp.wrappers.decimals cimport decimal32, decimal64, scale_type
+from cudf._lib.cpp.wrappers.decimals cimport (
+    decimal32,
+    decimal64,
+    decimal128,
+    scale_type,
+)
 from cudf._lib.cpp.wrappers.durations cimport (
     duration_ms,
     duration_ns,
@@ -88,7 +94,7 @@ cdef class DeviceScalar:
         # IMPORTANT: this should only ever be called from __init__
         valid = not _is_null_host_scalar(value)
 
-        if isinstance(dtype, (cudf.Decimal64Dtype, cudf.Decimal32Dtype)):
+        if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
             _set_decimal_from_scalar(
                 self.c_value, value, dtype, valid)
         elif isinstance(dtype, cudf.ListDtype):
@@ -118,7 +124,7 @@ cdef class DeviceScalar:
             )
 
     def _to_host_scalar(self):
-        if isinstance(self.dtype, (cudf.Decimal64Dtype, cudf.Decimal32Dtype)):
+        if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
             result = _get_py_decimal_from_fixed_point(self.c_value)
         elif cudf.api.types.is_struct_dtype(self.dtype):
             result = _get_py_dict_from_struct(self.c_value)
@@ -181,6 +187,7 @@ cdef class DeviceScalar:
 
         s.c_value = move(ptr)
         cdtype = s.get_raw_ptr()[0].type()
+
         if cdtype.id() == libcudf_types.DECIMAL64 and dtype is None:
             raise TypeError(
                 "Must pass a dtype when constructing from a fixed-point scalar"
@@ -322,6 +329,12 @@ cdef _set_decimal_from_scalar(unique_ptr[scalar]& s,
                 <int32_t>np.int32(value), scale_type(-dtype.scale), valid
             )
         )
+    elif isinstance(dtype, cudf.Decimal128Dtype):
+        s.reset(
+            new fixed_point_scalar[decimal128](
+                <libcudf_types.int128>value, scale_type(-dtype.scale), valid
+            )
+        )
     else:
         raise ValueError(f"dtype not supported: {dtype}")
 
@@ -463,6 +476,10 @@ cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s):
         rep_val = int((<fixed_point_scalar[decimal32]*>s_ptr)[0].value())
         scale = int((<fixed_point_scalar[decimal32]*>s_ptr)[0].type().scale())
         return decimal.Decimal(rep_val).scaleb(scale)
+    elif cdtype.id() == libcudf_types.DECIMAL128:
+        rep_val = int((<fixed_point_scalar[decimal128]*>s_ptr)[0].value())
+        scale = int((<fixed_point_scalar[decimal128]*>s_ptr)[0].type().scale())
+        return decimal.Decimal(rep_val).scaleb(scale)
     else:
         raise ValueError("Could not convert cudf::scalar to numpy scalar")
 

diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -1,7 +1,9 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 import numpy as np
 
+import cudf
+
 from cudf._lib.column cimport Column
 
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
@@ -17,7 +19,13 @@ from cudf._lib.cpp.strings.convert.convert_fixed_point cimport (
     is_fixed_point as cpp_is_fixed_point,
     to_fixed_point as cpp_to_fixed_point,
 )
-from cudf._lib.cpp.types cimport DECIMAL64, data_type, type_id
+from cudf._lib.cpp.types cimport (
+    DECIMAL32,
+    DECIMAL64,
+    DECIMAL128,
+    data_type,
+    type_id,
+)
 from cudf._lib.types cimport underlying_type_t_type_id
 
 
@@ -60,7 +68,15 @@ def to_decimal(Column input_col, object out_type):
     cdef column_view input_column_view = input_col.view()
     cdef unique_ptr[column] c_result
     cdef int scale = out_type.scale
-    cdef data_type c_out_type = data_type(DECIMAL64, -scale)
+    cdef data_type c_out_type
+    if isinstance(out_type, cudf.Decimal32Dtype):
+        c_out_type = data_type(DECIMAL32, -scale)
+    elif isinstance(out_type, cudf.Decimal64Dtype):
+        c_out_type = data_type(DECIMAL64, -scale)
+    elif isinstance(out_type, cudf.Decimal128Dtype):
+        c_out_type = data_type(DECIMAL128, -scale)
+    else:
+        raise TypeError("should be a decimal dtype")
     with nogil:
         c_result = move(
             cpp_to_fixed_point(

diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from enum import IntEnum
 
@@ -66,6 +66,7 @@ class TypeId(IntEnum):
     )
     DECIMAL32 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL32
     DECIMAL64 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL64
+    DECIMAL128 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL128
 
 
 SUPPORTED_NUMPY_TO_LIBCUDF_TYPES = {
@@ -206,6 +207,11 @@ cdef dtype_from_column_view(column_view cv):
             precision=cudf.Decimal32Dtype.MAX_PRECISION,
             scale=-cv.type().scale()
         )
+    elif tid == libcudf_types.type_id.DECIMAL128:
+        return cudf.Decimal128Dtype(
+            precision=cudf.Decimal128Dtype.MAX_PRECISION,
+            scale=-cv.type().scale()
+        )
     else:
         return LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
             <underlying_type_t_type_id>(tid)
@@ -216,6 +222,8 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
         tid = libcudf_types.type_id.LIST
     elif cudf.api.types.is_struct_dtype(dtype):
         tid = libcudf_types.type_id.STRUCT
+    elif cudf.api.types.is_decimal128_dtype(dtype):
+        tid = libcudf_types.type_id.DECIMAL128
     elif cudf.api.types.is_decimal64_dtype(dtype):
         tid = libcudf_types.type_id.DECIMAL64
     elif cudf.api.types.is_decimal32_dtype(dtype):
@@ -232,6 +240,7 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
 
 cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *:
     return tid in (
+        libcudf_types.type_id.DECIMAL128,
         libcudf_types.type_id.DECIMAL64,
-        libcudf_types.type_id.DECIMAL32
+        libcudf_types.type_id.DECIMAL32,
     )
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 """Define common type operations."""
 
 from __future__ import annotations
@@ -20,6 +21,7 @@
     is_categorical_dtype,
     is_decimal32_dtype,
     is_decimal64_dtype,
+    is_decimal128_dtype,
     is_decimal_dtype,
     is_interval_dtype,
     is_list_dtype,
@@ -41,19 +43,23 @@ def is_numeric_dtype(obj):
         Whether or not the array or dtype is of a numeric dtype.
     """
     if isclass(obj):
-        if issubclass(obj, (cudf.Decimal32Dtype, cudf.Decimal64Dtype)):
+        if issubclass(obj, cudf.core.dtypes.DecimalDtype):
             return True
         if issubclass(obj, _BaseDtype):
             return False
     else:
-        if isinstance(obj, cudf.Decimal32Dtype) or isinstance(
-            getattr(obj, "dtype", None), cudf.Decimal32Dtype
+        if isinstance(obj, cudf.Decimal128Dtype) or isinstance(
+            getattr(obj, "dtype", None), cudf.Decimal128Dtype
         ):
             return True
         if isinstance(obj, cudf.Decimal64Dtype) or isinstance(
             getattr(obj, "dtype", None), cudf.Decimal64Dtype
         ):
             return True
+        if isinstance(obj, cudf.Decimal32Dtype) or isinstance(
+            getattr(obj, "dtype", None), cudf.Decimal32Dtype
+        ):
+            return True
         if isinstance(obj, _BaseDtype) or isinstance(
             getattr(obj, "dtype", None), _BaseDtype
         ):

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
 """
 isort: skip_file
 """
@@ -31,5 +32,7 @@
 from cudf.core.column.decimal import (  # noqa: F401
     Decimal32Column,
     Decimal64Column,
+    Decimal128Column,
+    DecimalBaseColumn,
 )
 from cudf.core.column.interval import IntervalColumn  # noqa: F401