Skip to content

Commit

Permalink
Merge branch 'branch-0.19' into fixed-point-to-strings
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Mar 4, 2021
2 parents 6a582ee + 4d0c160 commit 9204a7b
Show file tree
Hide file tree
Showing 25 changed files with 294 additions and 256 deletions.
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ target_link_libraries(cudf_datagen
GTest::gmock_main
GTest::gtest_main
benchmark::benchmark
benchmark::benchmark_main
Threads::Threads
cudf)

Expand All @@ -52,7 +51,8 @@ function(ConfigureBench CMAKE_BENCH_NAME)
add_executable(${CMAKE_BENCH_NAME} ${ARGN})
set_target_properties(${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/gbenchmarks>")
target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen)
target_link_libraries(${CMAKE_BENCH_NAME}
PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main)
endfunction()

###################################################################################################
Expand Down
18 changes: 17 additions & 1 deletion cpp/cmake/thirdparty/CUDF_GetRMM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,26 @@
# limitations under the License.
#=============================================================================

function(find_and_configure_rmm VERSION)
function(cudf_save_if_enabled var)
if(CUDF_${var})
unset(${var} PARENT_SCOPE)
unset(${var} CACHE)
endif()
endfunction()

function(cudf_restore_if_enabled var)
if(CUDF_${var})
set(${var} ON CACHE INTERNAL "" FORCE)
endif()
endfunction()

function(find_and_configure_rmm VERSION)
# Consumers have two options for local source builds:
# 1. Pass `-D CPM_rmm_SOURCE=/path/to/rmm` to build a local RMM source tree
# 2. Pass `-D CMAKE_PREFIX_PATH=/path/to/rmm/build` to use an existing local
# RMM build directory as the install location for find_package(rmm)
cudf_save_if_enabled(BUILD_TESTS)
cudf_save_if_enabled(BUILD_BENCHMARKS)

CPMFindPackage(NAME rmm
VERSION ${VERSION}
Expand All @@ -32,6 +46,8 @@ function(find_and_configure_rmm VERSION)
"CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}"
"DISABLE_DEPRECATION_WARNING ${DISABLE_DEPRECATION_WARNING}"
)
cudf_restore_if_enabled(BUILD_TESTS)
cudf_restore_if_enabled(BUILD_BENCHMARKS)

if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH)
list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}")
Expand Down
42 changes: 22 additions & 20 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ class column_view : public detail::column_view_base {
auto child_end() const noexcept { return _children.cend(); }

private:
friend column_view logical_cast(column_view const& input, data_type type);
friend column_view bit_cast(column_view const& input, data_type type);

std::vector<column_view> _children{}; ///< Based on element type, children
///< may contain additional data
Expand Down Expand Up @@ -550,7 +550,7 @@ class mutable_column_view : public detail::column_view_base {
operator column_view() const;

private:
friend mutable_column_view logical_cast(mutable_column_view const& input, data_type type);
friend mutable_column_view bit_cast(mutable_column_view const& input, data_type type);

std::vector<mutable_column_view> mutable_children;
};
Expand All @@ -564,47 +564,49 @@ class mutable_column_view : public detail::column_view_base {
size_type count_descendants(column_view parent);

/**
* @brief Zero-copy cast between types with the same underlying representation.
* @brief Zero-copy cast between types with the same size and compatible underlying representations.
*
* This is similar to `reinterpret_cast` or `bit_cast` in that it gives a view of the same raw bits
* as a different type. Unlike `reinterpret_cast` however, this cast is only allowed on types that
* have the same width and underlying representation. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch; logically
* casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of the
* duration count. However, an INT32 column cannot be logically cast to INT64 as the sizes differ,
* nor can an INT32 columm be logically cast to a FLOAT32 since what the bits represent differs.
* have the same width and compatible representations. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch;
* bit-casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of
* the duration count. A FLOAT32 can also be bit-casted into INT32 and treated as an integer value.
* However, an INT32 column cannot be bit-casted to INT64 as the sizes differ, nor can a string_view
* column be casted into a numeric type column as their data representations are not compatible.
*
* The validity of the conversion can be checked with `cudf::is_logically_castable()`.
* The validity of the conversion can be checked with `cudf::is_bit_castable()`.
*
* @throws cudf::logic_error if the specified cast is not possible, i.e.,
* `is_logically_castable(input.type(), type)` is false.
* `is_bit_castable(input.type(), type)` is false.
*
* @param input The `column_view` to cast from
* @param type The `data_type` to cast to
* @return New `column_view` wrapping the same data as `input` but cast to `type`
*/
column_view logical_cast(column_view const& input, data_type type);
column_view bit_cast(column_view const& input, data_type type);

/**
* @brief Zero-copy cast between types with the same underlying representation.
* @brief Zero-copy cast between types with the same size and compatible underlying representations.
*
* This is similar to `reinterpret_cast` or `bit_cast` in that it gives a view of the same raw bits
* as a different type. Unlike `reinterpret_cast` however, this cast is only allowed on types that
* have the same width and underlying representation. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch; logically
* casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of the
* duration count. However, an INT32 column cannot be logically cast to INT64 as the sizes differ,
* nor can an INT32 columm be logically cast to a FLOAT32 since what the bits represent differs.
* have the same width and compatible representations. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch;
* bit-casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of
* the duration count. A FLOAT32 can also be bit-casted into INT32 and treated as an integer value.
* However, an INT32 column cannot be bit-casted to INT64 as the sizes differ, nor can a string_view
* column be casted into a numeric type column as their data representations are not compatible.
*
* The validity of the conversion can be checked with `cudf::is_logically_castable()`.
* The validity of the conversion can be checked with `cudf::is_bit_castable()`.
*
* @throws cudf::logic_error if the specified cast is not possible, i.e.,
* `is_logically_castable(input.type(), type)` is false.
* `is_bit_castable(input.type(), type)` is false.
*
* @param input The `mutable_column_view` to cast from
* @param type The `data_type` to cast to
* @return New `mutable_column_view` wrapping the same data as `input` but cast to `type`
*/
mutable_column_view logical_cast(mutable_column_view const& input, data_type type);
mutable_column_view bit_cast(mutable_column_view const& input, data_type type);

} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ struct DeviceCount {
* values. Also, this char pointer serves as valid device pointer of identity
* value for minimum operator on string values.
*/
__constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};
static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};

/* @brief binary `min` operator */
struct DeviceMin {
Expand Down
7 changes: 6 additions & 1 deletion cpp/include/cudf/scalar/scalar_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,13 @@ std::unique_ptr<scalar> make_string_scalar(
* @throws std::bad_alloc if device memory allocation fails
*
* @param type The desired element type
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool.
*/
std::unique_ptr<scalar> make_default_constructed_scalar(data_type type);
std::unique_ptr<scalar> make_default_constructed_scalar(
data_type type,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct scalar using the given value of fixed width type
Expand Down
81 changes: 32 additions & 49 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,74 +591,57 @@ constexpr inline bool is_nested(data_type type)
return cudf::type_dispatcher(type, is_nested_impl{});
}

template <typename FromType, typename ToType>
struct is_logically_castable_impl : std::false_type {
};

// Allow cast to same type
template <typename Type>
struct is_logically_castable_impl<Type, Type> : std::true_type {
};

#ifndef MAP_CASTABLE_TYPES
#define MAP_CASTABLE_TYPES(Type1, Type2) \
template <> \
struct is_logically_castable_impl<Type1, Type2> : std::true_type { \
}; \
template <> \
struct is_logically_castable_impl<Type2, Type1> : std::true_type { \
};
#endif

// Allow cast between timestamp and integer representation
MAP_CASTABLE_TYPES(cudf::timestamp_D, cudf::timestamp_D::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_s, cudf::timestamp_s::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_ms, cudf::timestamp_ms::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_us, cudf::timestamp_us::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_ns, cudf::timestamp_ns::duration::rep);
// Allow cast between durations and integer representation
MAP_CASTABLE_TYPES(cudf::duration_D, cudf::duration_D::rep);
MAP_CASTABLE_TYPES(cudf::duration_s, cudf::duration_s::rep);
MAP_CASTABLE_TYPES(cudf::duration_ms, cudf::duration_ms::rep);
MAP_CASTABLE_TYPES(cudf::duration_us, cudf::duration_us::rep);
MAP_CASTABLE_TYPES(cudf::duration_ns, cudf::duration_ns::rep);
// Allow cast between decimals and integer representation
MAP_CASTABLE_TYPES(numeric::decimal32, numeric::decimal32::rep);
MAP_CASTABLE_TYPES(numeric::decimal64, numeric::decimal64::rep);

template <typename FromType>
struct is_logically_castable_to_impl {
template <typename ToType>
struct is_bit_castable_to_impl {
template <typename ToType, typename std::enable_if_t<is_compound<ToType>()>* = nullptr>
constexpr bool operator()()
{
return false;
}

template <typename ToType, typename std::enable_if_t<not is_compound<ToType>()>* = nullptr>
constexpr bool operator()()
{
return is_logically_castable_impl<FromType, ToType>::value;
if (not cuda::std::is_trivially_copyable_v<FromType> ||
not cuda::std::is_trivially_copyable_v<ToType>) {
return false;
}
constexpr auto from_size = sizeof(cudf::device_storage_type_t<FromType>);
constexpr auto to_size = sizeof(cudf::device_storage_type_t<ToType>);
return from_size == to_size;
}
};

struct is_logically_castable_from_impl {
template <typename FromType>
struct is_bit_castable_from_impl {
template <typename FromType, typename std::enable_if_t<is_compound<FromType>()>* = nullptr>
constexpr bool operator()(data_type)
{
return false;
}

template <typename FromType, typename std::enable_if_t<not is_compound<FromType>()>* = nullptr>
constexpr bool operator()(data_type to)
{
return type_dispatcher(to, is_logically_castable_to_impl<FromType>{});
return cudf::type_dispatcher(to, is_bit_castable_to_impl<FromType>{});
}
};

/**
* @brief Indicates whether `from` is logically castable to `to`.
* @brief Indicates whether `from` is bit-castable to `to`.
*
* Data types that have the same size and underlying representation, e.g. INT32 and TIMESTAMP_DAYS
* which are both represented as 32-bit integers in memory, are eligible for logical cast.
* This casting is based on std::bit_cast. Data types that have the same size and are trivially
* copyable are eligible for this casting.
*
* See `cudf::logical_cast()` which returns a zero-copy `column_view` when casting between
* logically castable types.
* See `cudf::bit_cast()` which returns a zero-copy `column_view` when casting between
* bit-castable types.
*
* @param from The `data_type` to convert from
* @param to The `data_type` to convert to
* @return `true` if the types are logically castable
* @return `true` if the types are castable
*/
constexpr bool is_logically_castable(data_type from, data_type to)
constexpr bool is_bit_castable(data_type from, data_type to)
{
return type_dispatcher(from, is_logically_castable_from_impl{}, to);
return type_dispatcher(from, is_bit_castable_from_impl{}, to);
}

template <typename From, typename To>
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/column/column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ size_type count_descendants(column_view parent)
return std::accumulate(begin, begin + parent.num_children(), size_type{parent.num_children()});
}

column_view logical_cast(column_view const& input, data_type type)
column_view bit_cast(column_view const& input, data_type type)
{
CUDF_EXPECTS(is_logically_castable(input._type, type), "types are not logically castable");
CUDF_EXPECTS(is_bit_castable(input._type, type), "types are not bit-castable");
return column_view{type,
input._size,
input._data,
Expand All @@ -144,9 +144,9 @@ column_view logical_cast(column_view const& input, data_type type)
input._children};
}

mutable_column_view logical_cast(mutable_column_view const& input, data_type type)
mutable_column_view bit_cast(mutable_column_view const& input, data_type type)
{
CUDF_EXPECTS(is_logically_castable(input._type, type), "types are not logically castable");
CUDF_EXPECTS(is_bit_castable(input._type, type), "types are not bit-castable");
return mutable_column_view{type,
input._size,
const_cast<void*>(input._data),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/copying/get_element.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ struct get_element_functor {
stream);

if (!key_index_scalar.is_valid(stream)) {
auto null_result = make_default_constructed_scalar(dict_view.keys().type());
auto null_result = make_default_constructed_scalar(dict_view.keys().type(), stream, mr);
null_result->set_valid(false, stream);
return null_result;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ struct in_place_fill_range_dispatch {
auto unscaled = static_cast<cudf::fixed_point_scalar<T> const&>(value).value();
using RepType = typename T::rep;
auto s = cudf::numeric_scalar<RepType>(unscaled, value.is_valid());
auto view = cudf::logical_cast(destination, s.type());
auto view = cudf::bit_cast(destination, s.type());
in_place_fill<RepType>(view, begin, end, s, stream);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/reductions/minmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ std::pair<std::unique_ptr<scalar>, std::unique_ptr<scalar>> minmax(
if (col.null_count() == col.size()) {
// this handles empty and all-null columns
// return scalars with valid==false
return {make_default_constructed_scalar(col.type()),
make_default_constructed_scalar(col.type())};
return {make_default_constructed_scalar(col.type(), stream, mr),
make_default_constructed_scalar(col.type(), stream, mr)};
}

return type_dispatcher(col.type(), minmax_functor{}, col, stream, mr);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reductions/reductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ std::unique_ptr<scalar> reduce(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
{
std::unique_ptr<scalar> result = make_default_constructed_scalar(output_dtype);
std::unique_ptr<scalar> result = make_default_constructed_scalar(output_dtype, stream, mr);
result->set_valid(false, stream);

// check if input column is empty
Expand Down
28 changes: 20 additions & 8 deletions cpp/src/scalar/scalar_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,36 +100,48 @@ std::unique_ptr<scalar> make_fixed_width_scalar(data_type type,
namespace {
struct default_scalar_functor {
template <typename T>
std::unique_ptr<cudf::scalar> operator()()
std::unique_ptr<cudf::scalar> operator()(rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
using ScalarType = scalar_type_t<T>;
return std::unique_ptr<scalar>(new ScalarType);
return make_fixed_width_scalar(data_type(type_to_id<T>()), stream, mr);
}
};

template <>
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<dictionary32>()
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<string_view>(
rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
return std::unique_ptr<scalar>(new string_scalar("", false, stream, mr));
}

template <>
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<dictionary32>(
rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("dictionary type not supported");
}

template <>
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<list_view>()
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<list_view>(
rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("list_view type not supported");
}

template <>
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<struct_view>()
std::unique_ptr<cudf::scalar> default_scalar_functor::operator()<struct_view>(
rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
CUDF_FAIL("struct_view type not supported");
}

} // namespace

std::unique_ptr<scalar> make_default_constructed_scalar(data_type type)
std::unique_ptr<scalar> make_default_constructed_scalar(data_type type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return type_dispatcher(type, default_scalar_functor{});
return type_dispatcher(type, default_scalar_functor{}, stream, mr);
}

} // namespace cudf
Loading

0 comments on commit 9204a7b

Please sign in to comment.