Skip to content

Commit

Permalink
Merge branch '10070' of https://github.com/galipremsagar/cudf into 10070
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Jan 18, 2022
2 parents a5f496c + 012300d commit e7cdab2
Show file tree
Hide file tree
Showing 40 changed files with 848 additions and 511 deletions.
5 changes: 3 additions & 2 deletions cpp/src/io/orc/aggregate_orc_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,9 @@ std::vector<metadata::stripe_source_mapping> aggregate_orc_metadata::select_stri

// Coalesce stripe info at the source file later since that makes downstream processing much
// easier in impl::read
for (const size_t& stripe_idx : user_specified_stripes[src_file_idx]) {
CUDF_EXPECTS(stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size(),
for (const auto& stripe_idx : user_specified_stripes[src_file_idx]) {
CUDF_EXPECTS(stripe_idx < static_cast<decltype(stripe_idx)>(
per_file_metadata[src_file_idx].ff.stripes.size()),
"Invalid stripe index");
stripe_infos.push_back(
std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr));
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

from cudf.utils.gpu_utils import validate_setup

validate_setup()
Expand Down Expand Up @@ -51,6 +52,7 @@
CategoricalDtype,
Decimal64Dtype,
Decimal32Dtype,
Decimal128Dtype,
IntervalDtype,
ListDtype,
StructDtype,
Expand Down
9 changes: 2 additions & 7 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import cupy as cp
import numpy as np
Expand All @@ -8,12 +8,7 @@ import rmm

import cudf
import cudf._lib as libcudfxx
from cudf.api.types import (
is_categorical_dtype,
is_decimal_dtype,
is_list_dtype,
is_struct_dtype,
)
from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
from cudf.core.buffer import Buffer

from cpython.buffer cimport PyObject_CheckBuffer
Expand Down
5 changes: 4 additions & 1 deletion python/cudf/cudf/_lib/cpp/scalar/scalar.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libc.stdint cimport int32_t, int64_t
from libcpp cimport bool
Expand Down Expand Up @@ -59,6 +59,9 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
fixed_point_scalar(int64_t value,
scale_type scale,
bool is_valid) except +
fixed_point_scalar(data_type value,
scale_type scale,
bool is_valid) except +
int64_t value() except +
# TODO: Figure out how to add an int32 overload of value()

Expand Down
7 changes: 6 additions & 1 deletion python/cudf/cudf/_lib/cpp/types.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libc.stdint cimport int32_t, uint32_t

Expand Down Expand Up @@ -79,6 +79,7 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
DURATION_NANOSECONDS "cudf::type_id::DURATION_NANOSECONDS"
DECIMAL32 "cudf::type_id::DECIMAL32"
DECIMAL64 "cudf::type_id::DECIMAL64"
DECIMAL128 "cudf::type_id::DECIMAL128"

ctypedef enum hash_id "cudf::hash_id":
HASH_IDENTITY "cudf::hash_id::HASH_IDENTITY"
Expand All @@ -102,3 +103,7 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
HIGHER "cudf::interpolation::HIGHER"
MIDPOINT "cudf::interpolation::MIDPOINT"
NEAREST "cudf::interpolation::NEAREST"

# A Hack to let cython compile with __int128_t symbol
# https://stackoverflow.com/a/27609033
ctypedef int int128 "__int128_t"
7 changes: 6 additions & 1 deletion python/cudf/cudf/_lib/cpp/wrappers/decimals.pxd
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

from libc.stdint cimport int32_t, int64_t

from cudf._lib.cpp.types cimport int128


cdef extern from "cudf/fixed_point/fixed_point.hpp" namespace "numeric" nogil:
# cython type stub to help resolve to numeric::decimal64
ctypedef int64_t decimal64
# cython type stub to help resolve to numeric::decimal32
ctypedef int64_t decimal32
# cython type stub to help resolve to numeric::decimal128
ctypedef int128 decimal128

cdef cppclass scale_type:
scale_type(int32_t)
3 changes: 1 addition & 2 deletions python/cudf/cudf/_lib/orc.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import cudf

Expand Down Expand Up @@ -249,7 +249,6 @@ cdef orc_reader_options make_orc_reader_options(
.timestamp_type(data_type(timestamp_type))
.use_index(use_index)
.decimal_cols_as_float(c_decimal_cols_as_float)
.decimal128(False)
.build()
)

Expand Down
25 changes: 21 additions & 4 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import decimal

import numpy as np
Expand Down Expand Up @@ -45,7 +46,12 @@ from cudf._lib.cpp.scalar.scalar cimport (
struct_scalar,
timestamp_scalar,
)
from cudf._lib.cpp.wrappers.decimals cimport decimal32, decimal64, scale_type
from cudf._lib.cpp.wrappers.decimals cimport (
decimal32,
decimal64,
decimal128,
scale_type,
)
from cudf._lib.cpp.wrappers.durations cimport (
duration_ms,
duration_ns,
Expand Down Expand Up @@ -88,7 +94,7 @@ cdef class DeviceScalar:
# IMPORTANT: this should only ever be called from __init__
valid = not _is_null_host_scalar(value)

if isinstance(dtype, (cudf.Decimal64Dtype, cudf.Decimal32Dtype)):
if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
_set_decimal_from_scalar(
self.c_value, value, dtype, valid)
elif isinstance(dtype, cudf.ListDtype):
Expand Down Expand Up @@ -118,7 +124,7 @@ cdef class DeviceScalar:
)

def _to_host_scalar(self):
if isinstance(self.dtype, (cudf.Decimal64Dtype, cudf.Decimal32Dtype)):
if isinstance(self.dtype, cudf.core.dtypes.DecimalDtype):
result = _get_py_decimal_from_fixed_point(self.c_value)
elif cudf.api.types.is_struct_dtype(self.dtype):
result = _get_py_dict_from_struct(self.c_value)
Expand Down Expand Up @@ -181,6 +187,7 @@ cdef class DeviceScalar:

s.c_value = move(ptr)
cdtype = s.get_raw_ptr()[0].type()

if cdtype.id() == libcudf_types.DECIMAL64 and dtype is None:
raise TypeError(
"Must pass a dtype when constructing from a fixed-point scalar"
Expand Down Expand Up @@ -322,6 +329,12 @@ cdef _set_decimal_from_scalar(unique_ptr[scalar]& s,
<int32_t>np.int32(value), scale_type(-dtype.scale), valid
)
)
elif isinstance(dtype, cudf.Decimal128Dtype):
s.reset(
new fixed_point_scalar[decimal128](
<libcudf_types.int128>value, scale_type(-dtype.scale), valid
)
)
else:
raise ValueError(f"dtype not supported: {dtype}")

Expand Down Expand Up @@ -463,6 +476,10 @@ cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s):
rep_val = int((<fixed_point_scalar[decimal32]*>s_ptr)[0].value())
scale = int((<fixed_point_scalar[decimal32]*>s_ptr)[0].type().scale())
return decimal.Decimal(rep_val).scaleb(scale)
elif cdtype.id() == libcudf_types.DECIMAL128:
rep_val = int((<fixed_point_scalar[decimal128]*>s_ptr)[0].value())
scale = int((<fixed_point_scalar[decimal128]*>s_ptr)[0].type().scale())
return decimal.Decimal(rep_val).scaleb(scale)
else:
raise ValueError("Could not convert cudf::scalar to numpy scalar")

Expand Down
22 changes: 19 additions & 3 deletions python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

import numpy as np

import cudf

from cudf._lib.column cimport Column

from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
Expand All @@ -17,7 +19,13 @@ from cudf._lib.cpp.strings.convert.convert_fixed_point cimport (
is_fixed_point as cpp_is_fixed_point,
to_fixed_point as cpp_to_fixed_point,
)
from cudf._lib.cpp.types cimport DECIMAL64, data_type, type_id
from cudf._lib.cpp.types cimport (
DECIMAL32,
DECIMAL64,
DECIMAL128,
data_type,
type_id,
)
from cudf._lib.types cimport underlying_type_t_type_id


Expand Down Expand Up @@ -60,7 +68,15 @@ def to_decimal(Column input_col, object out_type):
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
cdef int scale = out_type.scale
cdef data_type c_out_type = data_type(DECIMAL64, -scale)
cdef data_type c_out_type
if isinstance(out_type, cudf.Decimal32Dtype):
c_out_type = data_type(DECIMAL32, -scale)
elif isinstance(out_type, cudf.Decimal64Dtype):
c_out_type = data_type(DECIMAL64, -scale)
elif isinstance(out_type, cudf.Decimal128Dtype):
c_out_type = data_type(DECIMAL128, -scale)
else:
raise TypeError("should be a decimal dtype")
with nogil:
c_result = move(
cpp_to_fixed_point(
Expand Down
13 changes: 11 additions & 2 deletions python/cudf/cudf/_lib/types.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from enum import IntEnum

Expand Down Expand Up @@ -66,6 +66,7 @@ class TypeId(IntEnum):
)
DECIMAL32 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL32
DECIMAL64 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL64
DECIMAL128 = <underlying_type_t_type_id> libcudf_types.type_id.DECIMAL128


SUPPORTED_NUMPY_TO_LIBCUDF_TYPES = {
Expand Down Expand Up @@ -206,6 +207,11 @@ cdef dtype_from_column_view(column_view cv):
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
else:
return LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(tid)
Expand All @@ -216,6 +222,8 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:
tid = libcudf_types.type_id.LIST
elif cudf.api.types.is_struct_dtype(dtype):
tid = libcudf_types.type_id.STRUCT
elif cudf.api.types.is_decimal128_dtype(dtype):
tid = libcudf_types.type_id.DECIMAL128
elif cudf.api.types.is_decimal64_dtype(dtype):
tid = libcudf_types.type_id.DECIMAL64
elif cudf.api.types.is_decimal32_dtype(dtype):
Expand All @@ -232,6 +240,7 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *:

cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *:
return tid in (
libcudf_types.type_id.DECIMAL128,
libcudf_types.type_id.DECIMAL64,
libcudf_types.type_id.DECIMAL32
libcudf_types.type_id.DECIMAL32,
)
14 changes: 10 additions & 4 deletions python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

"""Define common type operations."""

from __future__ import annotations
Expand All @@ -20,6 +21,7 @@
is_categorical_dtype,
is_decimal32_dtype,
is_decimal64_dtype,
is_decimal128_dtype,
is_decimal_dtype,
is_interval_dtype,
is_list_dtype,
Expand All @@ -41,19 +43,23 @@ def is_numeric_dtype(obj):
Whether or not the array or dtype is of a numeric dtype.
"""
if isclass(obj):
if issubclass(obj, (cudf.Decimal32Dtype, cudf.Decimal64Dtype)):
if issubclass(obj, cudf.core.dtypes.DecimalDtype):
return True
if issubclass(obj, _BaseDtype):
return False
else:
if isinstance(obj, cudf.Decimal32Dtype) or isinstance(
getattr(obj, "dtype", None), cudf.Decimal32Dtype
if isinstance(obj, cudf.Decimal128Dtype) or isinstance(
getattr(obj, "dtype", None), cudf.Decimal128Dtype
):
return True
if isinstance(obj, cudf.Decimal64Dtype) or isinstance(
getattr(obj, "dtype", None), cudf.Decimal64Dtype
):
return True
if isinstance(obj, cudf.Decimal32Dtype) or isinstance(
getattr(obj, "dtype", None), cudf.Decimal32Dtype
):
return True
if isinstance(obj, _BaseDtype) or isinstance(
getattr(obj, "dtype", None), _BaseDtype
):
Expand Down
5 changes: 4 additions & 1 deletion python/cudf/cudf/core/column/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

"""
isort: skip_file
"""
Expand Down Expand Up @@ -31,5 +32,7 @@
from cudf.core.column.decimal import ( # noqa: F401
Decimal32Column,
Decimal64Column,
Decimal128Column,
DecimalBaseColumn,
)
from cudf.core.column.interval import IntervalColumn # noqa: F401
Loading

0 comments on commit e7cdab2

Please sign in to comment.