Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into impr-enum-class-fieldtype
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Jan 9, 2024
2 parents 0f085d5 + 3a1601d commit 91c90b6
Show file tree
Hide file tree
Showing 18 changed files with 184 additions and 130 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ jobs:
build_type: pull-request
script: ci/test_wheel_cudf.sh
wheel-build-dask-cudf:
needs: wheel-tests-cudf
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
6 changes: 3 additions & 3 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -23,7 +23,7 @@ pyproject_file="${package_dir}/pyproject.toml"

sed -i "s/^name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
echo "${version}" > VERSION
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name}/_version.py"
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name//-/_}/_version.py"

# For nightlies we want to ensure that we're pulling in alphas as well. The
# easiest way to do so is to augment the spec with a constraint containing a
Expand All @@ -34,7 +34,7 @@ if ! rapids-is-release-build; then
alpha_spec=',>=0.0.0a0'
fi

if [[ ${package_name} == "dask_cudf" ]]; then
if [[ ${package_name} == "dask-cudf" ]]; then
sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/dask-cuda==(.*)\"/dask-cuda==\1${alpha_spec}\"/g" ${pyproject_file}
sed -r -i "s/rapids-dask-dependency==(.*)\"/rapids-dask-dependency==\1${alpha_spec}\"/g" ${pyproject_file}
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/dask_cudf"

./ci/build_wheel.sh dask_cudf ${package_dir}
./ci/build_wheel.sh dask-cudf ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/dist
1 change: 0 additions & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ dependencies:
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.2.*
- make
- mimesis>=4.1.0
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand Down
1 change: 0 additions & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ dependencies:
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.2.*
- make
- mimesis>=4.1.0
- moto>=4.0.8
- msgpack-python
- myst-nb
Expand Down
14 changes: 9 additions & 5 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,6 @@ dependencies:
- cramjam
- fastavro>=0.22.9
- hypothesis
- mimesis>=4.1.0
- pytest-benchmark
- pytest-cases
- python-snappy>=0.6.0
Expand Down Expand Up @@ -755,7 +754,12 @@ dependencies:
- ipython
- openpyxl
notebook_cuda_version:
common:
- output_types: conda
packages:
- cuda-version=12.0
specific:
- output_types: conda
matrices:
- matrix: {cuda: "12.0"}
packages:
- cuda-version=12.0
- matrix: {cuda: "11.8"}
packages:
- cuda-version=11.8
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -1836,7 +1836,7 @@ def __array_function__(self, func, types, args, kwargs):
return NotImplemented

@classmethod
def from_pandas(cls, index, nan_as_null=no_default):
def from_pandas(cls, index: pd.Index, nan_as_null=no_default):
"""
Convert from a Pandas Index.
Expand Down
22 changes: 20 additions & 2 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -99,9 +99,14 @@ def _has_nulls(self):

@_cudf_nvtx_annotate
def serialize(self):
# TODO: See if self._data can be serialized outright
header = {
"type-serialized": pickle.dumps(type(self)),
"column_names": pickle.dumps(tuple(self._data.names)),
"column_rangeindex": pickle.dumps(self._data.rangeindex),
"column_multiindex": pickle.dumps(self._data.multiindex),
"column_label_dtype": pickle.dumps(self._data.label_dtype),
"column_level_names": pickle.dumps(self._data._level_names),
}
header["columns"], frames = serialize_columns(self._columns)
return header, frames
Expand All @@ -112,7 +117,20 @@ def deserialize(cls, header, frames):
cls_deserialize = pickle.loads(header["type-serialized"])
column_names = pickle.loads(header["column_names"])
columns = deserialize_columns(header["columns"], frames)
return cls_deserialize._from_data(dict(zip(column_names, columns)))
kwargs = {}
for metadata in [
"rangeindex",
"multiindex",
"label_dtype",
"level_names",
]:
key = f"column_{metadata}"
if key in header:
kwargs[metadata] = pickle.loads(header[key])
col_accessor = ColumnAccessor(
data=dict(zip(column_names, columns)), **kwargs
)
return cls_deserialize._from_data(col_accessor)

@classmethod
@_cudf_nvtx_annotate
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -1601,7 +1601,7 @@ def to_pandas(self, *, nullable: bool = False) -> pd.MultiIndex:

@classmethod
@_cudf_nvtx_annotate
def from_pandas(cls, multiindex, nan_as_null=no_default):
def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
"""
Convert from a Pandas MultiIndex
Expand Down
131 changes: 53 additions & 78 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -57,7 +57,6 @@
TimeDeltaColumn,
arange,
as_column,
column,
full,
)
from cudf.core.column.categorical import (
Expand Down Expand Up @@ -202,7 +201,6 @@ def __getitem__(self, arg):

@_cudf_nvtx_annotate
def __setitem__(self, key, value):
from cudf.core.column import column

if isinstance(key, tuple):
key = list(key)
Expand Down Expand Up @@ -264,7 +262,7 @@ def __setitem__(self, key, value):
self._frame._column.dtype, (cudf.ListDtype, cudf.StructDtype)
)
):
value = column.as_column(value)
value = as_column(value)

if (
(
Expand Down Expand Up @@ -568,7 +566,7 @@ def from_masked_array(cls, data, mask, null_count=None):
4 14
dtype: int64
"""
col = column.as_column(data).set_mask(mask)
col = as_column(data).set_mask(mask)
return cls(data=col)

@_cudf_nvtx_annotate
Expand All @@ -593,73 +591,33 @@ def __init__(
"to silence this warning.",
FutureWarning,
)
if isinstance(data, pd.Series):
if name is None:
name = data.name
if isinstance(data.index, pd.MultiIndex):
index = cudf.from_pandas(data.index)
else:
index = as_index(data.index)
elif isinstance(data, pd.Index):
if name is None:
name = data.name
data = as_column(data, nan_as_null=nan_as_null, dtype=dtype)
elif isinstance(data, BaseIndex):
if name is None:
name = data.name
data = data._values
if dtype is not None:
data = data.astype(dtype)
index_from_data = None
name_from_data = None
if data is None:
data = {}

if isinstance(data, (pd.Series, pd.Index, BaseIndex, Series)):
if copy:
data = data.copy(deep=True)
name_from_data = data.name
column = as_column(data, nan_as_null=nan_as_null, dtype=dtype)
if isinstance(data, (pd.Series, Series)):
index_from_data = as_index(data.index)
elif isinstance(data, ColumnAccessor):
raise TypeError(
"Use cudf.Series._from_data for constructing a Series from "
"ColumnAccessor"
)

if isinstance(data, Series):
if index is not None:
data = data.reindex(index)
else:
index = data._index
if name is None:
name = data.name
data = data._column
if copy:
data = data.copy(deep=True)
if dtype is not None:
data = data.astype(dtype)

if isinstance(data, dict):
elif isinstance(data, dict):
if not data:
current_index = RangeIndex(0)
column = as_column(data, nan_as_null=nan_as_null, dtype=dtype)
index_from_data = RangeIndex(0)
else:
current_index = data.keys()
if index is not None:
series = Series(
list(data.values()),
nan_as_null=nan_as_null,
dtype=dtype,
index=current_index,
)
new_index = as_index(index)
if not series.index.equals(new_index):
series = series.reindex(new_index)
data = series._column
index = series._index
else:
data = column.as_column(
column = as_column(
list(data.values()), nan_as_null=nan_as_null, dtype=dtype
)
index = current_index
if data is None:
if index is not None:
data = column.column_empty(
row_count=len(index), dtype=None, masked=True
)
else:
data = {}

if not isinstance(data, ColumnBase):
index_from_data = as_index(list(data.keys()))
else:
# Using `getattr_static` to check if
# `data` is on device memory and perform
# a deep copy later. This is different
Expand All @@ -677,25 +635,42 @@ def __init__(
)
is property
)
data = column.as_column(
column = as_column(
data,
nan_as_null=nan_as_null,
dtype=dtype,
length=len(index) if index is not None else None,
)
if copy and has_cai:
data = data.copy(deep=True)
else:
if dtype is not None:
data = data.astype(dtype)
column = column.copy(deep=True)

if index is not None and not isinstance(index, BaseIndex):
index = as_index(index)
assert isinstance(column, ColumnBase)

if dtype is not None:
column = column.astype(dtype)

assert isinstance(data, ColumnBase)
if name_from_data is not None and name is None:
name = name_from_data

super().__init__({name: data})
self._index = RangeIndex(len(data)) if index is None else index
if index is not None:
index = as_index(index)

if index_from_data is not None:
first_index = index_from_data
second_index = index
elif index is None:
first_index = RangeIndex(len(column))
second_index = None
else:
first_index = index
second_index = None

super().__init__({name: column}, index=first_index)
if second_index is not None:
# TODO: This there a better way to do this?
reindexed = self.reindex(index=second_index, copy=False)
self._data = reindexed._data
self._index = second_index
self._check_data_index_length_match()

@classmethod
Expand All @@ -717,7 +692,7 @@ def __contains__(self, item):

@classmethod
@_cudf_nvtx_annotate
def from_pandas(cls, s, nan_as_null=no_default):
def from_pandas(cls, s: pd.Series, nan_as_null=no_default):
"""
Convert from a Pandas Series.
Expand Down Expand Up @@ -760,7 +735,7 @@ def from_pandas(cls, s, nan_as_null=no_default):
False if cudf.get_option("mode.pandas_compatible") else None
)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
warnings.simplefilter("ignore", FutureWarning)
result = cls(s, nan_as_null=nan_as_null)
return result

Expand Down Expand Up @@ -5250,16 +5225,16 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
b = b.reindex(a.index)
index = as_index(a.index)

a_col = column.as_column(a)
a_col = as_column(a)
a_array = cupy.asarray(a_col.data_array_view(mode="read"))

b_col = column.as_column(b)
b_col = as_column(b)
b_array = cupy.asarray(b_col.data_array_view(mode="read"))

result = cupy.isclose(
a=a_array, b=b_array, rtol=rtol, atol=atol, equal_nan=equal_nan
)
result_col = column.as_column(result)
result_col = as_column(result)

if a_col.null_count and b_col.null_count:
a_nulls = a_col.isnull()
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/tools/numeric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

import warnings

Expand Down Expand Up @@ -161,7 +161,7 @@ def to_numeric(arg, errors="raise", downcast=None):
break

if isinstance(arg, (cudf.Series, pd.Series)):
return cudf.Series(col)
return cudf.Series(col, index=arg.index, name=arg.name)
else:
if col.has_nulls():
# To match pandas, always return a floating type filled with nan.
Expand Down
Loading

0 comments on commit 91c90b6

Please sign in to comment.