Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.02' into cccl-2.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Dec 11, 2023
2 parents 1f4cefe + 759a1c8 commit 218992e
Show file tree
Hide file tree
Showing 13 changed files with 33 additions and 244 deletions.
3 changes: 2 additions & 1 deletion cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2137,7 +2137,8 @@ stripe_dictionaries build_dictionaries(orc_table_view& orc_table,
}
}
}
stripe_dicts.host_to_device_async(stream);
// Synchronize to ensure the copy is complete before we clear `map_slots`
stripe_dicts.host_to_device_sync(stream);

gpu::collect_map_entries(stripe_dicts, stream);
gpu::get_dictionary_indices(stripe_dicts, orc_table.d_columns, stream);
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,9 @@ void __device__ calculate_frag_size(frag_init_state_s* const s, int t)
}
}
}

auto const total_len = block_reduce(reduce_storage).Sum(len);
__syncthreads();
auto const total_len = block_reduce(reduce_storage).Sum(len);
auto const total_valid = block_reduce(reduce_storage).Sum(num_valid);

if (t == 0) {
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ ignore_missing_imports = true
follow_imports = "skip"
exclude = [
"cudf/_lib/",
"cudf/cudf/benchmarks/",
"cudf/cudf/tests/",
"cudf/cudf/utils/metadata/orc_column_statistics_pb2.py",
"custreamz/custreamz/tests/",
Expand Down
32 changes: 0 additions & 32 deletions python/cudf/cudf/benchmarks/README.md

This file was deleted.

86 changes: 0 additions & 86 deletions python/cudf/cudf/benchmarks/bench_cudf_io.py

This file was deleted.

20 changes: 0 additions & 20 deletions python/cudf/cudf/benchmarks/conftest.py

This file was deleted.

91 changes: 0 additions & 91 deletions python/cudf/cudf/benchmarks/get_datasets.py

This file was deleted.

5 changes: 2 additions & 3 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@
from cudf import _lib as libcudf
from cudf._lib.transform import bools_to_mask
from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
from cudf.api.types import is_interval_dtype
from cudf.core.buffer import Buffer
from cudf.core.column import column
from cudf.core.column.methods import ColumnMethods
from cudf.core.dtypes import CategoricalDtype
from cudf.core.dtypes import CategoricalDtype, IntervalDtype
from cudf.utils.dtypes import (
is_mixed_with_object_dtype,
min_signed_type,
Expand Down Expand Up @@ -997,7 +996,7 @@ def to_pandas(
.fillna(_DEFAULT_CATEGORICAL_VALUE)
.values_host
)
if is_interval_dtype(col.categories.dtype):
if isinstance(col.categories.dtype, IntervalDtype):
# leaving out dropna because it temporarily changes an interval
# index into a struct and throws off results.
# TODO: work on interval index dropna
Expand Down
7 changes: 6 additions & 1 deletion python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,12 @@ def is_interval_dtype(obj):
or (
isinstance(obj, str) and obj == cudf.core.dtypes.IntervalDtype.name
)
or (hasattr(obj, "dtype") and is_interval_dtype(obj.dtype))
or (
isinstance(
getattr(obj, "dtype", None),
(pd.IntervalDtype, cudf.core.dtypes.IntervalDtype),
)
)
)


Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
_is_non_decimal_numeric_dtype,
is_dtype_equal,
is_integer,
is_interval_dtype,
is_list_like,
is_scalar,
is_signed_integer_dtype,
Expand Down Expand Up @@ -3192,7 +3191,9 @@ def __init__(

if isinstance(data, IntervalColumn):
data = data
elif isinstance(data, pd.Series) and (is_interval_dtype(data.dtype)):
elif isinstance(data, pd.Series) and isinstance(
data.dtype, pd.IntervalDtype
):
data = column.as_column(data, data.dtype)
elif isinstance(data, (pd.Interval, pd.IntervalIndex)):
data = column.as_column(
Expand Down
10 changes: 7 additions & 3 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2661,13 +2661,17 @@ def _reindex(
)
for name in names
}
if column_names is None:
level_names = self._data.level_names
elif isinstance(column_names, pd.Index):
level_names = tuple(column_names.names)
else:
level_names = None
result = self.__class__._from_data(
data=cudf.core.column_accessor.ColumnAccessor(
cols,
multiindex=self._data.multiindex,
level_names=tuple(column_names.names)
if isinstance(column_names, pd.Index)
else None,
level_names=level_names,
),
index=index,
)
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from cudf.api.types import (
is_categorical_dtype,
is_decimal_dtype,
is_interval_dtype,
is_list_dtype,
is_numeric_dtype,
is_string_dtype,
Expand All @@ -30,7 +29,7 @@ def dtype_can_compare_equal_to_other(dtype):
or is_list_dtype(dtype)
or is_struct_dtype(dtype)
or is_decimal_dtype(dtype)
or is_interval_dtype(dtype)
or isinstance(dtype, cudf.IntervalDtype)
)


Expand Down Expand Up @@ -235,7 +234,7 @@ def assert_column_equal(
)
or (
is_numeric_dtype(left)
and not dtype_can_compare_equal_to_other(right)
and not dtype_can_compare_equal_to_other(right.dtype)
)
):
try:
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10754,6 +10754,15 @@ def test_dataframe_series_dot():
assert_eq(expected, actual)


def test_dataframe_reindex_keep_colname():
gdf = cudf.DataFrame([1], columns=cudf.Index([1], name="foo"))
result = gdf.reindex(index=[0, 1])
expected = cudf.DataFrame(
[1, None], columns=cudf.Index([1], name="foo"), index=[0, 1]
)
assert_eq(result, expected)


def test_dataframe_duplicate_index_reindex():
gdf = cudf.DataFrame({"a": [0, 1, 2, 3]}, index=[0, 0, 1, 1])
pdf = gdf.to_pandas()
Expand Down

0 comments on commit 218992e

Please sign in to comment.