Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-24.08' into pq-large-strings-zstd-comp
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Jun 13, 2024
2 parents acfcd8a + 31d909b commit 85299e4
Show file tree
Hide file tree
Showing 26 changed files with 441 additions and 76 deletions.
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=======
copying
=======

.. automodule:: cudf._lib.pylibcudf.datetime
:members:
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This page provides API documentation for pylibcudf.
column_factories
concatenate
copying
datetime
filling
gpumemoryview
groupby
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ set(cython_sources
column_factories.pyx
concatenate.pyx
copying.pyx
datetime.pyx
filling.pyx
gpumemoryview.pyx
groupby.pyx
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ from . cimport (
column_factories,
concatenate,
copying,
datetime,
filling,
groupby,
join,
Expand Down Expand Up @@ -40,9 +41,10 @@ __all__ = [
"Table",
"aggregation",
"binaryop",
"column_factories",
"concatenate",
"copying",
"column_factories",
"datetime",
"filling",
"gpumemoryview",
"groupby",
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
column_factories,
concatenate,
copying,
datetime,
filling,
groupby,
interop,
Expand Down Expand Up @@ -39,9 +40,10 @@
"TypeId",
"aggregation",
"binaryop",
"column_factories",
"concatenate",
"copying",
"column_factories",
"datetime",
"filling",
"gpumemoryview",
"groupby",
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/datetime.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from .column cimport Column


cpdef Column extract_year(
Column col
)
33 changes: 33 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/datetime.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.datetime cimport (
extract_year as cpp_extract_year,
)

from .column cimport Column


cpdef Column extract_year(
Column values
):
"""
Extract the year from a datetime column.
Parameters
----------
values : Column
The column to extract the year from.
Returns
-------
Column
Column with the extracted years.
"""
cdef unique_ptr[column] result

with nogil:
result = move(cpp_extract_year(values.view()))
return Column.from_libcudf(move(result))
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources aggregation.pyx binaryop.pyx copying.pyx replace.pyx reduce.pxd round.pyx
set(cython_sources aggregation.pyx binaryop.pyx copying.pyx reduce.pyx replace.pyx round.pyx
stream_compaction.pyx types.pyx unary.pyx
)

Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,11 @@ def __cuda_array_interface__(self) -> abc.Mapping[str, Any]:
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
return _array_ufunc(self, ufunc, method, inputs, kwargs)

def __invert__(self):
raise TypeError(
f"Operation `~` not supported on {self.dtype.type.__name__}"
)

def searchsorted(
self,
value,
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,14 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase:
unaryop = pylibcudf.unary.UnaryOperator[unaryop]
return libcudf.unary.unary_operation(self, unaryop)

def __invert__(self):
if self.dtype.kind in "ui":
return self.unary_operator("invert")
elif self.dtype.kind == "b":
return self.unary_operator("not")
else:
return super().__invert__()

def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
int_float_dtype_mapping = {
np.int8: np.float32,
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8072,11 +8072,11 @@ def from_pandas(obj, nan_as_null=no_default):
return cudf.Index.from_pandas(obj, nan_as_null=nan_as_null)
elif isinstance(obj, pd.CategoricalDtype):
return cudf.CategoricalDtype.from_pandas(obj)
elif isinstance(obj, pd.IntervalDtype):
return cudf.IntervalDtype.from_pandas(obj)
else:
raise TypeError(
"from_pandas only accepts Pandas Dataframes, Series, "
"Index, RangeIndex and MultiIndex objects. "
"Got %s" % type(obj)
f"from_pandas unsupported for object of type {type(obj).__name__}"
)


Expand Down
61 changes: 2 additions & 59 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._typing import Dtype
from cudf.api.types import is_bool_dtype, is_dtype_equal, is_scalar
from cudf.api.types import is_dtype_equal, is_scalar
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column import (
ColumnBase,
Expand Down Expand Up @@ -1455,51 +1455,6 @@ def _get_sorted_inds(
stable=True,
)

@_cudf_nvtx_annotate
def _is_sorted(self, ascending=None, null_position=None):
"""
Returns a boolean indicating whether the data of the Frame are sorted
based on the parameters given. Does not account for the index.
Parameters
----------
self : Frame
Frame whose columns are to be checked for sort order
ascending : None or list-like of booleans
None or list-like of boolean values indicating expected sort order
of each column. If list-like, size of list-like must be
len(columns). If None, all columns expected sort order is set to
ascending. False (0) - ascending, True (1) - descending.
null_position : None or list-like of booleans
None or list-like of boolean values indicating desired order of
nulls compared to other elements. If list-like, size of list-like
must be len(columns). If None, null order is set to before. False
(0) - before, True (1) - after.
Returns
-------
returns : boolean
Returns True, if sorted as expected by ``ascending`` and
``null_position``, False otherwise.
"""
if ascending is not None and not cudf.api.types.is_list_like(
ascending
):
raise TypeError(
f"Expected a list-like or None for `ascending`, got "
f"{type(ascending)}"
)
if null_position is not None and not cudf.api.types.is_list_like(
null_position
):
raise TypeError(
f"Expected a list-like or None for `null_position`, got "
f"{type(null_position)}"
)
return libcudf.sort.is_sorted(
[*self._columns], ascending=ascending, null_position=null_position
)

@_cudf_nvtx_annotate
def _split(self, splits):
"""Split a frame with split points in ``splits``. Returns a list of
Expand Down Expand Up @@ -1920,7 +1875,7 @@ def __invert__(self):
"""Bitwise invert (~) for integral dtypes, logical NOT for bools."""
return self._from_data_like_self(
self._data._from_columns_like_self(
(_apply_inverse_column(col) for col in self._data.columns)
(~col for col in self._data.columns)
)
)

Expand Down Expand Up @@ -1970,15 +1925,3 @@ def __dask_tokenize__(self):
str(dict(self._dtypes)),
normalize_token(self.to_pandas()),
]


def _apply_inverse_column(col: ColumnBase) -> ColumnBase:
"""Bitwise invert (~) for integral dtypes, logical NOT for bools."""
if np.issubdtype(col.dtype, np.integer):
return col.unary_operator("invert")
elif is_bool_dtype(col.dtype):
return col.unary_operator("not")
else:
raise TypeError(
f"Operation `~` not supported on {col.dtype.type.__name__}"
)
49 changes: 47 additions & 2 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1636,9 +1636,54 @@ def is_unique(self):
def dtype(self):
return np.dtype("O")

@_cudf_nvtx_annotate
def _is_sorted(self, ascending=None, null_position=None) -> bool:
"""
Returns a boolean indicating whether the data of the MultiIndex are sorted
based on the parameters given. Does not account for the index.
Parameters
----------
self : MultiIndex
MultiIndex whose columns are to be checked for sort order
ascending : None or list-like of booleans
None or list-like of boolean values indicating expected sort order
of each column. If list-like, size of list-like must be
len(columns). If None, all columns expected sort order is set to
ascending. False (0) - ascending, True (1) - descending.
null_position : None or list-like of booleans
None or list-like of boolean values indicating desired order of
nulls compared to other elements. If list-like, size of list-like
must be len(columns). If None, null order is set to before. False
(0) - before, True (1) - after.
Returns
-------
returns : boolean
Returns True, if sorted as expected by ``ascending`` and
``null_position``, False otherwise.
"""
if ascending is not None and not cudf.api.types.is_list_like(
ascending
):
raise TypeError(
f"Expected a list-like or None for `ascending`, got "
f"{type(ascending)}"
)
if null_position is not None and not cudf.api.types.is_list_like(
null_position
):
raise TypeError(
f"Expected a list-like or None for `null_position`, got "
f"{type(null_position)}"
)
return libcudf.sort.is_sorted(
[*self._columns], ascending=ascending, null_position=null_position
)

@cached_property # type: ignore
@_cudf_nvtx_annotate
def is_monotonic_increasing(self):
def is_monotonic_increasing(self) -> bool:
"""
Return if the index is monotonic increasing
(only equal or increasing) values.
Expand All @@ -1647,7 +1692,7 @@ def is_monotonic_increasing(self):

@cached_property # type: ignore
@_cudf_nvtx_annotate
def is_monotonic_decreasing(self):
def is_monotonic_decreasing(self) -> bool:
"""
Return if the index is monotonic decreasing
(only equal or decreasing) values.
Expand Down
41 changes: 41 additions & 0 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os
import warnings

import rmm.mr

from .fast_slow_proxy import is_proxy_object
from .magics import load_ipython_extension
from .profiler import Profiler
Expand All @@ -20,6 +25,42 @@ def install():
global LOADED
LOADED = loader is not None

if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None:
# Check if a non-default memory resource is set
current_mr = rmm.mr.get_current_device_resource()
if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
warnings.warn(
f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
UserWarning,
)
free_memory, _ = rmm.mr.available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)

if rmm_mode == "cuda":
mr = rmm.mr.CudaMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "pool":
rmm.mr.set_current_device_resource(
rmm.mr.PoolMemoryResource(
rmm.mr.get_current_device_resource(),
initial_pool_size=free_memory,
)
)
elif rmm_mode == "async":
mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory)
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "managed":
mr = rmm.mr.ManagedMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "managed_pool":
mr = rmm.mr.PoolMemoryResource(
rmm.mr.ManagedMemoryResource(),
initial_pool_size=free_memory,
)
rmm.mr.set_current_device_resource(mr)
else:
raise ValueError(f"Unsupported rmm mode: {rmm_mode}")


def pytest_load_initial_conftests(early_config, parser, args):
# We need to install ourselves before conftest.py import (which
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,8 @@ def interp_opt(request):
)
def sorted_opt(request):
return request.param


@pytest.fixture(scope="session", params=[False, True])
def has_nulls(request):
return request.param
Loading

0 comments on commit 85299e4

Please sign in to comment.