Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-23.04' into sunset-string…
Browse files Browse the repository at this point in the history
…sudf-package
  • Loading branch information
vyasr committed Feb 21, 2023
2 parents 473ac7a + a308b24 commit 2945dc8
Show file tree
Hide file tree
Showing 19 changed files with 776 additions and 586 deletions.
8 changes: 5 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,11 @@ if buildAll || hasArg libcudf; then
LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}')
MSG="${MSG}<br/>libcudf.so size: $LIBCUDF_FS"
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log
BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
echo "Metrics output dir: [$BMR_DIR]"
mkdir -p ${BMR_DIR}
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
fi

if [[ ${INSTALL_TARGET} != "" ]]; then
Expand Down
28 changes: 27 additions & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -14,3 +14,29 @@ rapids-logger "Begin cpp build"
rapids-mamba-retry mambabuild conda/recipes/libcudf

rapids-upload-conda-to-s3 cpp

echo "++++++++++++++++++++++++++++++++++++++++++++"

if [[ -d $RAPIDS_ARTIFACTS_DIR ]]; then
ls -l ${RAPIDS_ARTIFACTS_DIR}
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"

FILE=${RAPIDS_ARTIFACTS_DIR}/ninja.log
if [[ -f $FILE ]]; then
echo -e "\x1B[33;1m\x1B[48;5;240m Ninja log for this build available at the following link \x1B[0m"
UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).ninja.log
rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"

FILE=${RAPIDS_ARTIFACTS_DIR}/ninja_log.html
if [[ -f $FILE ]]; then
echo -e "\x1B[33;1m\x1B[48;5;240m Build Metrics Report for this build available at the following link \x1B[0m"
UPLOAD_NAME=cpp_cuda${RAPIDS_CUDA_VERSION%%.*}_$(arch).BuildMetricsReport.html
rapids-upload-to-s3 "${UPLOAD_NAME}" "${FILE}"
fi

echo "++++++++++++++++++++++++++++++++++++++++++++"
16 changes: 0 additions & 16 deletions ci/test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,5 @@ for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
fi
done

if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
rapids-logger "Memcheck gtests with rmm_mode=cuda"
export GTEST_CUDF_RMM_MODE=cuda
COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
for gt in "$CONDA_PREFIX"/bin/gtests/{libcudf,libcudf_kafka}/* ; do
test_name=$(basename ${gt})
if [[ "$test_name" == "ERROR_TEST" ]]; then
continue
fi
echo "Running gtest $test_name"
${COMPUTE_SANITIZER_CMD} ${gt} | tee "${RAPIDS_TESTS_DIR}${test_name}.cs.log"
done
unset GTEST_CUDF_RMM_MODE
# TODO: test-results/*.cs.log are processed in CI
fi

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ build:
- SCCACHE_IDLE_TIMEOUT
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- RAPIDS_ARTIFACTS_DIR

requirements:
build:
Expand Down
14 changes: 7 additions & 7 deletions cpp/benchmarks/string/split.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,12 +57,12 @@ static void BM_split(benchmark::State& state, split_type rt)

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
int constexpr min_rows = 1 << 12;
int constexpr max_rows = 1 << 24;
int constexpr row_mult = 8;
int constexpr min_rowlen = 1 << 5;
int constexpr max_rowlen = 1 << 13;
int constexpr len_mult = 2;
for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
// avoid generating combinations that exceed the cudf column limit
Expand Down
10 changes: 10 additions & 0 deletions cpp/include/cudf/io/datasource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,21 @@ class datasource {
/**
* @brief Creates a source from a host memory buffer.
*
# @deprecated Since 23.04
*
* @param[in] buffer Host buffer object
* @return Constructed datasource object
*/
static std::unique_ptr<datasource> create(host_buffer const& buffer);

/**
* @brief Creates a source from a host memory buffer.
*
* @param[in] buffer Host buffer object
* @return Constructed datasource object
*/
static std::unique_ptr<datasource> create(cudf::host_span<std::byte const> buffer);

/**
* @brief Creates a source from a device memory buffer.
*
Expand Down
75 changes: 71 additions & 4 deletions cpp/include/cudf/io/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ struct table_with_metadata {
/**
* @brief Non-owning view of a host memory buffer
*
* @deprecated Since 23.04
*
* Used to describe buffer input in `source_info` objects.
*/
struct host_buffer {
Expand All @@ -166,6 +168,22 @@ struct host_buffer {
host_buffer(const char* data, size_t size) : data(data), size(size) {}
};

/**
* @brief Returns `true` if the type is byte-like, meaning it is reasonable to pass as a pointer to
* bytes.
*
* @tparam T The representation type
* @return `true` if the type is considered a byte-like type
*/
template <typename T>
constexpr inline auto is_byte_like_type()
{
using non_cv_T = std::remove_cv_t<T>;
return std::is_same_v<non_cv_T, int8_t> || std::is_same_v<non_cv_T, char> ||
std::is_same_v<non_cv_T, uint8_t> || std::is_same_v<non_cv_T, unsigned char> ||
std::is_same_v<non_cv_T, std::byte>;
}

/**
* @brief Source information for read interfaces
*/
Expand All @@ -191,21 +209,70 @@ struct source_info {
/**
* @brief Construct a new source info object for multiple buffers in host memory
*
* @deprecated Since 23.04
*
* @param host_buffers Input buffers in host memory
*/
explicit source_info(std::vector<host_buffer> const& host_buffers)
: _type(io_type::HOST_BUFFER), _host_buffers(host_buffers)
explicit source_info(std::vector<host_buffer> const& host_buffers) : _type(io_type::HOST_BUFFER)
{
_host_buffers.reserve(host_buffers.size());
std::transform(host_buffers.begin(),
host_buffers.end(),
std::back_inserter(_host_buffers),
[](auto const hb) {
return cudf::host_span<std::byte const>{
reinterpret_cast<std::byte const*>(hb.data), hb.size};
});
}

/**
* @brief Construct a new source info object for a single buffer
*
* @deprecated Since 23.04
*
* @param host_data Input buffer in host memory
* @param size Size of the buffer
*/
explicit source_info(const char* host_data, size_t size)
: _type(io_type::HOST_BUFFER), _host_buffers({{host_data, size}})
: _type(io_type::HOST_BUFFER),
_host_buffers(
{cudf::host_span<std::byte const>(reinterpret_cast<std::byte const*>(host_data), size)})
{
}

/**
* @brief Construct a new source info object for multiple buffers in host memory
*
* @param host_buffers Input buffers in host memory
*/
template <typename T, CUDF_ENABLE_IF(is_byte_like_type<std::remove_cv_t<T>>())>
explicit source_info(cudf::host_span<cudf::host_span<T>> const host_buffers)
: _type(io_type::HOST_BUFFER)
{
if constexpr (not std::is_same_v<std::remove_cv_t<T>, std::byte>) {
_host_buffers.reserve(host_buffers.size());
std::transform(host_buffers.begin(),
host_buffers.end(),
std::back_inserter(_host_buffers),
[](auto const s) {
return cudf::host_span<std::byte const>{
reinterpret_cast<std::byte const*>(s.data()), s.size()};
});
} else {
_host_buffers.assign(host_buffers.begin(), host_buffers.end());
}
}

/**
* @brief Construct a new source info object for a single buffer
*
* @param host_data Input buffer in host memory
*/
template <typename T, CUDF_ENABLE_IF(is_byte_like_type<std::remove_cv_t<T>>())>
explicit source_info(cudf::host_span<T> host_data)
: _type(io_type::HOST_BUFFER),
_host_buffers{cudf::host_span<std::byte const>(
reinterpret_cast<std::byte const*>(host_data.data()), host_data.size())}
{
}

Expand Down Expand Up @@ -289,7 +356,7 @@ struct source_info {
private:
io_type _type = io_type::FILEPATH;
std::vector<std::string> _filepaths;
std::vector<host_buffer> _host_buffers;
std::vector<cudf::host_span<std::byte const>> _host_buffers;
std::vector<cudf::device_span<std::byte const>> _device_buffers;
std::vector<cudf::io::datasource*> _user_sources;
};
Expand Down
8 changes: 7 additions & 1 deletion cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,16 @@ std::unique_ptr<datasource> datasource::create(const std::string& filepath,
}

std::unique_ptr<datasource> datasource::create(host_buffer const& buffer)
{
return create(
cudf::host_span<std::byte const>{reinterpret_cast<std::byte const*>(buffer.data), buffer.size});
}

std::unique_ptr<datasource> datasource::create(cudf::host_span<std::byte const> buffer)
{
// Use Arrow IO buffer class for zero-copy reads of host memory
return std::make_unique<arrow_io_source>(std::make_shared<arrow::io::BufferReader>(
reinterpret_cast<const uint8_t*>(buffer.data), buffer.size));
reinterpret_cast<const uint8_t*>(buffer.data()), buffer.size()));
}

std::unique_ptr<datasource> datasource::create(cudf::device_span<std::byte const> buffer)
Expand Down
Loading

0 comments on commit 2945dc8

Please sign in to comment.