Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Statistics cleanup #7439

Merged
merged 18 commits into from
Mar 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 4 additions & 28 deletions cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
* limitations under the License.
*/

#ifndef DEVICE_OPERATORS_CUH
#define DEVICE_OPERATORS_CUH
#pragma once

/**
* @brief definition of the device operators
Expand All @@ -24,6 +23,7 @@

#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
Expand Down Expand Up @@ -84,16 +84,6 @@ struct DeviceCount {
}
};

/**
* @brief string value for sentinel which is used in min, max reduction
* operators
* This sentinel string value is the highest possible valid UTF-8 encoded
* character. This serves as identity value for maximum operator on string
* values. Also, this char pointer serves as valid device pointer of identity
* value for minimum operator on string values.
*/
static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};

/* @brief binary `min` operator */
struct DeviceMin {
template <typename T>
Expand Down Expand Up @@ -123,13 +113,7 @@ struct DeviceMin {
typename std::enable_if_t<std::is_same<T, cudf::string_view>::value>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE static constexpr T identity()
{
const char* psentinel{nullptr};
#if defined(__CUDA_ARCH__)
psentinel = &max_string_sentinel[0];
#else
CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, max_string_sentinel));
#endif
return T(psentinel, 4);
return string_view::max();
}

template <typename T, typename std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
Expand Down Expand Up @@ -167,13 +151,7 @@ struct DeviceMax {
typename std::enable_if_t<std::is_same<T, cudf::string_view>::value>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE static constexpr T identity()
{
const char* psentinel{nullptr};
#if defined(__CUDA_ARCH__)
psentinel = &max_string_sentinel[0];
#else
CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, max_string_sentinel));
#endif
return T(psentinel, 0);
return string_view::min();
}

template <typename T, typename std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
Expand Down Expand Up @@ -242,5 +220,3 @@ struct DeviceLeadLag {
};

} // namespace cudf

#endif
5 changes: 1 addition & 4 deletions cpp/include/cudf/detail/utilities/int_fastdiv.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
* limitations under the License.
*/

#ifndef _INT_FASTDIV_KJGIUHFG
#define _INT_FASTDIV_KJGIUHFG
#pragma once

class int_fastdiv {
kaatish marked this conversation as resolved.
Show resolved Hide resolved
public:
Expand Down Expand Up @@ -172,5 +171,3 @@ __host__ __device__ __forceinline__ int operator%(const unsigned char n, const i
{
return ((int)n) % divisor;
}

#endif
5 changes: 3 additions & 2 deletions cpp/include/cudf/strings/string.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace string {
* @param d_str String to check.
* @return true if string has valid integer characters
*/
__device__ bool is_integer(string_view const& d_str)
inline __device__ bool is_integer(string_view const& d_str)
{
if (d_str.empty()) return false;
auto begin = d_str.begin();
Expand Down Expand Up @@ -71,7 +71,7 @@ __device__ bool is_integer(string_view const& d_str)
* @param d_str String to check.
* @return true if string has valid float characters
*/
__device__ bool is_float(string_view const& d_str)
inline __device__ bool is_float(string_view const& d_str)
{
if (d_str.empty()) return false;
// strings allowed by the converter
Expand Down Expand Up @@ -105,6 +105,7 @@ __device__ bool is_float(string_view const& d_str)
}
return result;
}

kaatish marked this conversation as resolved.
Show resolved Hide resolved
/** @} */ // end of group
} // namespace string
} // namespace strings
Expand Down
42 changes: 42 additions & 0 deletions cpp/include/cudf/strings/string_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cudf/strings/string_view.hpp>
#include <cudf/utilities/error.hpp>

#include <thrust/count.h>
#include <thrust/find.h>
Expand All @@ -43,9 +44,50 @@ __device__ inline size_type characters_in_string(const char* str, size_type byte
return thrust::count_if(
thrust::seq, ptr, ptr + bytes, [](uint8_t chr) { return is_begin_utf8_char(chr); });
}

/**
* @brief string value for sentinel which is used in min, max reduction
* operators
*
* This sentinel string value is the highest possible valid UTF-8 encoded
vuule marked this conversation as resolved.
Show resolved Hide resolved
* character. This serves as identity value for maximum operator on string
* values. Also, this char pointer serves as valid device pointer of identity
* value for minimum operator on string values.
*/
static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
} // namespace detail
} // namespace strings

/**
* @brief Return minimum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMax::identity<string_view>()
*
* @return An empty string
*/
CUDA_HOST_DEVICE_CALLABLE string_view string_view::min() { return string_view(); }

/**
* @brief Return maximum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMin::identity<string_view>()
*
* @return A string value which represents the highest possible valid UTF-8 encoded
* character.
*/
CUDA_HOST_DEVICE_CALLABLE string_view string_view::max()
kaatish marked this conversation as resolved.
Show resolved Hide resolved
{
const char* psentinel{nullptr};
#if defined(__CUDA_ARCH__)
psentinel = &cudf::strings::detail::max_string_sentinel[0];
#else
CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, cudf::strings::detail::max_string_sentinel));
#endif
return string_view(psentinel, 4);
}

__device__ inline size_type string_view::length() const
{
if (_length == UNKNOWN_STRING_LENGTH)
Expand Down
21 changes: 21 additions & 0 deletions cpp/include/cudf/strings/string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,27 @@ class string_view {
*/
CUDA_DEVICE_CALLABLE string_view substr(size_type start, size_type length) const;

/**
* @brief Return minimum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMax::identity<string_view>()
*
* @return An empty string
*/
CUDA_HOST_DEVICE_CALLABLE static string_view min();

/**
* @brief Return maximum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMin::identity<string_view>()
*
* @return A string value which represents the highest possible valid UTF-8 encoded
* character.
*/
CUDA_HOST_DEVICE_CALLABLE static string_view max();

/**
* @brief Default constructor represents an empty string.
*/
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/stats_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ __global__ void __launch_bounds__(init_threads_per_block)
uint32_t t = threadIdx.x;
statistics_group *group = &group_g[threadIdx.y];
if (chunk_id < num_rowgroups and t == 0) {
uint32_t num_rows = cols[col_id].num_rows;
uint32_t num_rows = cols[col_id].leaf_column->size();
group->col = &cols[col_id];
group->start_row = chunk_id * row_index_stride;
group->num_rows = min(num_rows - min(chunk_id * row_index_stride, num_rows), row_index_stride);
Expand Down
15 changes: 13 additions & 2 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@

#include "writer_impl.hpp"

#include <io/utilities/column_utils.cuh>

#include <cudf/null_mask.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
Expand All @@ -39,6 +42,7 @@ namespace detail {
namespace orc {
using namespace cudf::io::orc;
using namespace cudf::io;
using cudf::io::orc::gpu::nvstrdesc_s;

struct row_group_index_info {
int32_t pos = -1; // Position
Expand Down Expand Up @@ -775,7 +779,9 @@ std::vector<StripeInformation> writer::impl::gather_stripes(
}

std::vector<std::vector<uint8_t>> writer::impl::gather_statistic_blobs(
host_span<orc_column_view const> columns, host_span<stripe_rowgroups const> stripe_bounds)
const table_device_view &table,
host_span<orc_column_view const> columns,
host_span<stripe_rowgroups const> stripe_bounds)
{
auto const num_rowgroups = stripes_size(stripe_bounds);
size_t num_stat_blobs = (1 + stripe_bounds.size()) * columns.size();
Expand Down Expand Up @@ -833,6 +839,10 @@ std::vector<std::vector<uint8_t>> writer::impl::gather_statistic_blobs(
}
stat_desc.host_to_device(stream);
stat_merge.host_to_device(stream);

rmm::device_uvector<column_device_view> leaf_column_views =
create_leaf_column_device_views<stats_column_desc>(stat_desc, table, stream);

gpu::orc_init_statistics_groups(stat_groups.data(),
stat_desc.device_ptr(),
columns.size(),
Expand Down Expand Up @@ -1106,10 +1116,11 @@ void writer::impl::write(table_view const &table)
auto stripes =
gather_stripes(num_rows, num_index_streams, stripe_bounds, &enc_data.streams, &strm_descs);

auto device_columns = table_device_view::create(table);
// Gather column statistics
std::vector<std::vector<uint8_t>> column_stats;
if (enable_statistics_ && num_columns > 0 && num_rows > 0) {
column_stats = gather_statistic_blobs(orc_columns, stripe_bounds);
column_stats = gather_statistic_blobs(*device_columns, orc_columns, stripe_bounds);
}

// Allocate intermediate output stream buffer
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/io/orc/writer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cudf/io/detail/orc.hpp>
#include <cudf/io/orc.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/error.hpp>

#include <thrust/iterator/counting_iterator.h>
Expand Down Expand Up @@ -271,13 +272,16 @@ class writer::impl {
* @brief Returns per-stripe and per-file column statistics encoded
* in ORC protobuf format.
*
* @param table Table information to be written
* @param columns List of columns
* @param stripe_bounds List of stripe boundaries
*
* @return The statistic blobs
*/
std::vector<std::vector<uint8_t>> gather_statistic_blobs(
host_span<orc_column_view const> columns, host_span<stripe_rowgroups const> stripe_bounds);
const table_device_view& table,
host_span<orc_column_view const> columns,
host_span<stripe_rowgroups const> stripe_bounds);

/**
* @brief Writes the specified column's row index stream.
Expand Down
35 changes: 0 additions & 35 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1961,41 +1961,6 @@ void InitPageFragments(PageFragment *frag,
frag, col_desc, num_fragments, num_columns, fragment_size, num_rows);
}

/**
* @copydoc void init_column_device_views(EncColumnDesc *col_desc,
* column_device_view *leaf_column_views,
* const table_device_view &parent_table_device_view,
* rmm::cuda_stream_view stream)
*/
void init_column_device_views(EncColumnDesc *col_desc,
column_device_view *leaf_column_views,
const table_device_view &parent_column_table_device_view,
rmm::cuda_stream_view stream)
{
cudf::detail::device_single_thread(
[col_desc,
parent_col_view = parent_column_table_device_view,
leaf_column_views] __device__() mutable {
for (size_type i = 0; i < parent_col_view.num_columns(); ++i) {
column_device_view col = parent_col_view.column(i);
if (col.type().id() == type_id::LIST) {
col_desc[i].parent_column = parent_col_view.begin() + i;
} else {
col_desc[i].parent_column = nullptr;
}
// traverse till leaf column
while (col.type().id() == type_id::LIST) {
col = col.child(lists_column_view::child_column_index);
}
// Store leaf_column to device storage
column_device_view *leaf_col_ptr = leaf_column_views + i;
*leaf_col_ptr = col;
col_desc[i].leaf_column = leaf_col_ptr;
}
},
stream);
}

/**
* @brief Launches kernel for initializing fragment statistics groups
*
Expand Down
16 changes: 0 additions & 16 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,6 @@ struct EncColumnDesc : stats_column_desc {
size_type const *level_offsets; //!< Offset array for per-row pre-calculated rep/def level values
uint8_t const *rep_values; //!< Pre-calculated repetition level values
uint8_t const *def_values; //!< Pre-calculated definition level values

column_device_view *leaf_column; //!< Pointer to leaf column
column_device_view *parent_column; //!< Pointer to parent column. Is nullptr if not list type.
};

constexpr int max_page_fragment_size = 5000; //!< Max number of rows in a page fragment
Expand Down Expand Up @@ -448,19 +445,6 @@ void InitPageFragments(PageFragment *frag,
uint32_t num_rows,
rmm::cuda_stream_view stream);

/**
* @brief Set column_device_view pointers in column description array
*
* @param[out] col_desc Column description array [column_id]
* @param[out] leaf_column_views Device array to store leaf columns
* @param[in] parent_table_device_view Table device view containing parent columns
* @param[in] stream CUDA stream to use, default 0
*/
void init_column_device_views(EncColumnDesc *col_desc,
column_device_view *leaf_column_views,
const table_device_view &parent_table_device_view,
rmm::cuda_stream_view stream);

/**
* @brief Launches kernel for initializing fragment statistics groups
*
Expand Down
15 changes: 3 additions & 12 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "writer_impl.hpp"

#include <io/parquet/compact_protocol_writer.hpp>
#include <io/utilities/column_utils.cuh>

#include <cudf/column/column_device_view.cuh>
#include <cudf/lists/lists_column_view.hpp>
Expand Down Expand Up @@ -495,17 +496,6 @@ class parquet_column_view {
uint8_t _decimal_precision = 0;
};

rmm::device_uvector<column_device_view> writer::impl::create_leaf_column_device_views(
hostdevice_vector<gpu::EncColumnDesc> &col_desc,
const table_device_view &parent_table_device_view)
{
rmm::device_uvector<column_device_view> leaf_column_views(parent_table_device_view.num_columns(),
stream);
gpu::init_column_device_views(
col_desc.device_ptr(), leaf_column_views.data(), parent_table_device_view, stream);
return leaf_column_views;
}

void writer::impl::init_page_fragments(hostdevice_vector<gpu::PageFragment> &frag,
hostdevice_vector<gpu::EncColumnDesc> &col_desc,
uint32_t num_columns,
Expand Down Expand Up @@ -919,7 +909,8 @@ void writer::impl::write(table_view const &table)
if (fragments.size() != 0) {
// Move column info to device
col_desc.host_to_device(stream);
leaf_column_views = create_leaf_column_device_views(col_desc, *parent_column_table_device_view);
leaf_column_views = create_leaf_column_device_views<gpu::EncColumnDesc>(
col_desc, *parent_column_table_device_view, stream);

init_page_fragments(fragments, col_desc, num_columns, num_fragments, num_rows, fragment_size);
}
Expand Down
Loading