Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.02' into spdlog_1.12…
Browse files Browse the repository at this point in the history
…_fmt_10
  • Loading branch information
bdice committed Nov 22, 2023
2 parents 7c648d3 + fcc8950 commit e29057e
Show file tree
Hide file tree
Showing 12 changed files with 191 additions and 121 deletions.
2 changes: 1 addition & 1 deletion cpp/src/io/parquet/decode_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ __device__ size_type gpuDecodeTotalPageStringSize(page_state_s* s, int t)
} else if ((s->col.data_type & 7) == BYTE_ARRAY) {
str_len = gpuInitStringDescriptors<true, unused_state_buf>(s, nullptr, target_pos, t);
}
if (!t) { *(int32_t volatile*)&s->dict_pos = target_pos; }
if (!t) { s->dict_pos = target_pos; }
return str_len;
}

Expand Down
31 changes: 11 additions & 20 deletions cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@ constexpr int rolling_buf_size = decode_block_size * 2;
* @param[in] dstv Pointer to row output data (string descriptor or 32-bit hash)
*/
template <typename state_buf>
inline __device__ void gpuOutputString(volatile page_state_s* s,
volatile state_buf* sb,
int src_pos,
void* dstv)
inline __device__ void gpuOutputString(page_state_s* s, state_buf* sb, int src_pos, void* dstv)
{
auto [ptr, len] = gpuGetStringData(s, sb, src_pos);
// make sure to only hash `BYTE_ARRAY` when specified with the output type size
Expand All @@ -69,7 +66,7 @@ inline __device__ void gpuOutputString(volatile page_state_s* s,
* @param[in] dst Pointer to row output data
*/
template <typename state_buf>
inline __device__ void gpuOutputBoolean(volatile state_buf* sb, int src_pos, uint8_t* dst)
inline __device__ void gpuOutputBoolean(state_buf* sb, int src_pos, uint8_t* dst)
{
*dst = sb->dict_idx[rolling_index<state_buf::dict_buf_size>(src_pos)];
}
Expand Down Expand Up @@ -143,8 +140,8 @@ inline __device__ void gpuStoreOutput(uint2* dst,
* @param[out] dst Pointer to row output data
*/
template <typename state_buf>
inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s,
volatile state_buf* sb,
inline __device__ void gpuOutputInt96Timestamp(page_state_s* s,
state_buf* sb,
int src_pos,
int64_t* dst)
{
Expand Down Expand Up @@ -218,8 +215,8 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s,
* @param[in] dst Pointer to row output data
*/
template <typename state_buf>
inline __device__ void gpuOutputInt64Timestamp(volatile page_state_s* s,
volatile state_buf* sb,
inline __device__ void gpuOutputInt64Timestamp(page_state_s* s,
state_buf* sb,
int src_pos,
int64_t* dst)
{
Expand Down Expand Up @@ -301,10 +298,7 @@ __device__ void gpuOutputByteArrayAsInt(char const* ptr, int32_t len, T* dst)
* @param[in] dst Pointer to row output data
*/
template <typename T, typename state_buf>
__device__ void gpuOutputFixedLenByteArrayAsInt(volatile page_state_s* s,
volatile state_buf* sb,
int src_pos,
T* dst)
__device__ void gpuOutputFixedLenByteArrayAsInt(page_state_s* s, state_buf* sb, int src_pos, T* dst)
{
uint32_t const dtype_len_in = s->dtype_len_in;
uint8_t const* data = s->dict_base ? s->dict_base : s->data_start;
Expand Down Expand Up @@ -338,10 +332,7 @@ __device__ void gpuOutputFixedLenByteArrayAsInt(volatile page_state_s* s,
* @param[in] dst Pointer to row output data
*/
template <typename T, typename state_buf>
inline __device__ void gpuOutputFast(volatile page_state_s* s,
volatile state_buf* sb,
int src_pos,
T* dst)
inline __device__ void gpuOutputFast(page_state_s* s, state_buf* sb, int src_pos, T* dst)
{
uint8_t const* dict;
uint32_t dict_pos, dict_size = s->dict_size;
Expand Down Expand Up @@ -371,7 +362,7 @@ inline __device__ void gpuOutputFast(volatile page_state_s* s,
*/
template <typename state_buf>
static __device__ void gpuOutputGeneric(
volatile page_state_s* s, volatile state_buf* sb, int src_pos, uint8_t* dst8, int len)
page_state_s* s, state_buf* sb, int src_pos, uint8_t* dst8, int len)
{
uint8_t const* dict;
uint32_t dict_pos, dict_size = s->dict_size;
Expand Down Expand Up @@ -512,7 +503,7 @@ __global__ void __launch_bounds__(decode_block_size)
(s->col.data_type & 7) == FIXED_LEN_BYTE_ARRAY) {
gpuInitStringDescriptors<false>(s, sb, src_target_pos, t & 0x1f);
}
if (t == 32) { *(volatile int32_t*)&s->dict_pos = src_target_pos; }
if (t == 32) { s->dict_pos = src_target_pos; }
} else {
// WARP1..WARP3: Decode values
int const dtype = s->col.data_type & 7;
Expand Down Expand Up @@ -601,7 +592,7 @@ __global__ void __launch_bounds__(decode_block_size)
}
}

if (t == out_thread0) { *(volatile int32_t*)&s->src_pos = target_pos; }
if (t == out_thread0) { s->src_pos = target_pos; }
}
__syncthreads();
}
Expand Down
29 changes: 13 additions & 16 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,15 @@ struct page_state_s {
// points to either nesting_decode_cache above when possible, or to the global source otherwise
PageNestingDecodeInfo* nesting_info{};

inline __device__ void set_error_code(decode_error err) volatile
inline __device__ void set_error_code(decode_error err)
{
cuda::atomic_ref<int32_t, cuda::thread_scope_block> ref{const_cast<int&>(error)};
cuda::atomic_ref<int32_t, cuda::thread_scope_block> ref{error};
ref.fetch_or(static_cast<int32_t>(err), cuda::std::memory_order_relaxed);
}

inline __device__ void reset_error_code() volatile
inline __device__ void reset_error_code()
{
cuda::atomic_ref<int32_t, cuda::thread_scope_block> ref{const_cast<int&>(error)};
cuda::atomic_ref<int32_t, cuda::thread_scope_block> ref{error};
ref.store(0, cuda::std::memory_order_release);
}
};
Expand Down Expand Up @@ -185,8 +185,8 @@ inline __device__ bool is_page_contained(page_state_s* const s, size_t start_row
* @return A pair containing a pointer to the string and its length
*/
template <typename state_buf>
inline __device__ cuda::std::pair<char const*, size_t> gpuGetStringData(page_state_s volatile* s,
state_buf volatile* sb,
inline __device__ cuda::std::pair<char const*, size_t> gpuGetStringData(page_state_s* s,
state_buf* sb,
int src_pos)
{
char const* ptr = nullptr;
Expand Down Expand Up @@ -232,8 +232,10 @@ inline __device__ cuda::std::pair<char const*, size_t> gpuGetStringData(page_sta
* additional values.
*/
template <bool sizes_only, typename state_buf>
__device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(
page_state_s volatile* s, [[maybe_unused]] state_buf volatile* sb, int target_pos, int t)
__device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(page_state_s* s,
[[maybe_unused]] state_buf* sb,
int target_pos,
int t)
{
uint8_t const* end = s->data_end;
int dict_bits = s->dict_bits;
Expand Down Expand Up @@ -349,10 +351,7 @@ __device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(
* @return The new output position
*/
template <typename state_buf>
inline __device__ int gpuDecodeRleBooleans(page_state_s volatile* s,
state_buf volatile* sb,
int target_pos,
int t)
inline __device__ int gpuDecodeRleBooleans(page_state_s* s, state_buf* sb, int target_pos, int t)
{
uint8_t const* end = s->data_end;
int64_t pos = s->dict_pos;
Expand Down Expand Up @@ -420,10 +419,8 @@ inline __device__ int gpuDecodeRleBooleans(page_state_s volatile* s,
* @return Total length of strings processed
*/
template <bool sizes_only, typename state_buf>
__device__ size_type gpuInitStringDescriptors(page_state_s volatile* s,
[[maybe_unused]] state_buf volatile* sb,
int target_pos,
int t)
__device__ size_type
gpuInitStringDescriptors(page_state_s* s, [[maybe_unused]] state_buf* sb, int target_pos, int t)
{
int pos = s->dict_pos;
int total_len = 0;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct page_enc_state_s {
uint32_t rle_rpt_count;
uint32_t page_start_val;
uint32_t chunk_start_val;
volatile uint32_t rpt_map[num_encode_warps];
uint32_t rpt_map[num_encode_warps];
EncPage page;
EncColumnChunk ck;
parquet_column_device_view col;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/page_string_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ __global__ void __launch_bounds__(decode_block_size)
} else {
gpuInitStringDescriptors<false>(s, sb, src_target_pos, lane_id);
}
if (t == 32) { *(volatile int32_t*)&s->dict_pos = src_target_pos; }
if (t == 32) { s->dict_pos = src_target_pos; }
} else {
int const me = t - out_thread0;

Expand Down Expand Up @@ -934,7 +934,7 @@ __global__ void __launch_bounds__(decode_block_size)
}
}

if (t == out_thread0) { *(volatile int32_t*)&s->src_pos = target_pos; }
if (t == out_thread0) { s->src_pos = target_pos; }
}
__syncthreads();
}
Expand Down
13 changes: 5 additions & 8 deletions docs/dask_cudf/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,14 @@ data reading facilities, followed by calling
:members:
from_cudf,
from_dask_dataframe,
from_delayed,
read_csv,
read_json,
read_orc,
to_orc,
read_text,
read_parquet

.. warning::

FIXME: where should the following live?

.. autofunction:: dask_cudf.concat

.. autofunction:: dask_cudf.from_delayed

Grouping
========

Expand Down Expand Up @@ -77,3 +70,7 @@ identical. The full API is provided below.
:members:
:inherited-members:
:show-inheritance:

.. automodule:: dask_cudf
:members:
concat
14 changes: 13 additions & 1 deletion python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from cudf.core import column

if TYPE_CHECKING:
from cudf._typing import Dtype
from cudf.core.column import ColumnBase


Expand Down Expand Up @@ -99,6 +100,9 @@ class ColumnAccessor(abc.MutableMapping):
rangeindex : bool, optional
Whether the keys should be returned as a RangeIndex
in `to_pandas_index` (default=False).
label_dtype : Dtype, optional
What dtype should be returned in `to_pandas_index`
(default=None).
"""

_data: "Dict[Any, ColumnBase]"
Expand All @@ -111,8 +115,10 @@ def __init__(
multiindex: bool = False,
level_names=None,
rangeindex: bool = False,
label_dtype: Dtype | None = None,
):
self.rangeindex = rangeindex
self.label_dtype = label_dtype
if data is None:
data = {}
# TODO: we should validate the keys of `data`
Expand All @@ -123,6 +129,7 @@ def __init__(
self.multiindex = multiindex
self._level_names = level_names
self.rangeindex = data.rangeindex
self.label_dtype = data.label_dtype
else:
# This code path is performance-critical for copies and should be
# modified with care.
Expand Down Expand Up @@ -292,7 +299,12 @@ def to_pandas_index(self) -> pd.Index:
self.names[0], self.names[-1] + diff, diff
)
return pd.RangeIndex(new_range, name=self.name)
result = pd.Index(self.names, name=self.name, tupleize_cols=False)
result = pd.Index(
self.names,
name=self.name,
tupleize_cols=False,
dtype=self.label_dtype,
)
return result

def insert(
Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,7 @@ def __init__(
rangeindex = isinstance(
columns, (range, pd.RangeIndex, cudf.RangeIndex)
)
label_dtype = getattr(columns, "dtype", None)
self._data = ColumnAccessor(
{
k: column.column_empty(
Expand All @@ -745,6 +746,7 @@ def __init__(
if isinstance(columns, pd.Index)
else None,
rangeindex=rangeindex,
label_dtype=label_dtype,
)
elif isinstance(data, ColumnAccessor):
raise TypeError(
Expand Down Expand Up @@ -995,12 +997,15 @@ def _init_from_list_like(self, data, index=None, columns=None):
self._data.rangeindex = isinstance(
columns, (range, pd.RangeIndex, cudf.RangeIndex)
)
self._data.label_dtype = getattr(columns, "dtype", None)

@_cudf_nvtx_annotate
def _init_from_dict_like(
self, data, index=None, columns=None, nan_as_null=None
):
label_dtype = None
if columns is not None:
label_dtype = getattr(columns, "dtype", None)
# remove all entries in data that are not in columns,
# inserting new empty columns for entries in columns that
# are not in data
Expand Down Expand Up @@ -1069,6 +1074,7 @@ def _init_from_dict_like(
if isinstance(columns, pd.Index)
else self._data._level_names
)
self._data.label_dtype = label_dtype

@classmethod
def _from_data(
Expand Down
11 changes: 11 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4566,6 +4566,17 @@ def test_dataframe_columns_returns_rangeindex_single_col():
assert_eq(result, expected)


@pytest.mark.parametrize("dtype", ["int64", "datetime64[ns]", "int8"])
@pytest.mark.parametrize("idx_data", [[], [1, 2]])
@pytest.mark.parametrize("data", [None, [], {}])
def test_dataframe_columns_empty_data_preserves_dtype(dtype, idx_data, data):
result = cudf.DataFrame(
data, columns=cudf.Index(idx_data, dtype=dtype)
).columns
expected = pd.Index(idx_data, dtype=dtype)
assert_eq(result, expected)


@pytest.mark.parametrize(
"data",
[
Expand Down
Loading

0 comments on commit e29057e

Please sign in to comment.