Skip to content

Commit

Permalink
Merge branch 'branch-22.04' into bug-regex-hex-a
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Feb 8, 2022
2 parents 3ffb8ca + 1bc3727 commit 2456ed7
Show file tree
Hide file tree
Showing 30 changed files with 608 additions and 298 deletions.
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,15 @@ fi

cd "$WORKSPACE/python/cudf"
gpuci_logger "Python py.test for cuDF"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope

cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term

cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term

gpuci_logger "Test notebooks"
"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
Expand Down
23 changes: 19 additions & 4 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ add_custom_command(
function(ConfigureBench CMAKE_BENCH_NAME)
add_executable(${CMAKE_BENCH_NAME} ${ARGN})
set_target_properties(
${CMAKE_BENCH_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main
Expand All @@ -69,19 +70,33 @@ function(ConfigureBench CMAKE_BENCH_NAME)
APPEND
COMMENT "Adding ${CMAKE_BENCH_NAME}"
)

install(
TARGETS ${CMAKE_BENCH_NAME}
COMPONENT testing
DESTINATION bin/benchmarks/libcudf
EXCLUDE_FROM_ALL
)
endfunction()

# This function takes in a benchmark name and benchmark source for nvbench benchmarks and handles
# setting all of the associated properties and linking to build the benchmark
function(ConfigureNVBench CMAKE_BENCH_NAME)
add_executable(${CMAKE_BENCH_NAME} ${ARGN})
set_target_properties(
${CMAKE_BENCH_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen nvbench::main
)
install(
TARGETS ${CMAKE_BENCH_NAME}
COMPONENT testing
DESTINATION bin/benchmarks/libcudf
EXCLUDE_FROM_ALL
)
endfunction()

# ##################################################################################################
Expand Down
27 changes: 14 additions & 13 deletions cpp/benchmarks/common/generate_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,45 +65,46 @@ T get_distribution_mean(distribution_params<T> const& dist)
}

// Utilities to determine the mean size of an element, given the data profile
template <typename T>
std::enable_if_t<cudf::is_fixed_width<T>(), size_t> avg_element_size(data_profile const& profile)
template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
{
return sizeof(T);
CUDF_FAIL("Should not be called, use `size_of` for this type instead");
}

template <typename T>
std::enable_if_t<!cudf::is_fixed_width<T>(), size_t> avg_element_size(data_profile const& profile)
template <typename T, CUDF_ENABLE_IF(!cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
{
CUDF_FAIL("not implemented!");
}

template <>
size_t avg_element_size<cudf::string_view>(data_profile const& profile)
size_t non_fixed_width_size<cudf::string_view>(data_profile const& profile)
{
auto const dist = profile.get_distribution_params<cudf::string_view>().length_params;
return get_distribution_mean(dist);
}

template <>
size_t avg_element_size<cudf::list_view>(data_profile const& profile)
size_t non_fixed_width_size<cudf::list_view>(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::list_view>();
auto const single_level_mean = get_distribution_mean(dist_params.length_params);
auto const element_size = cudf::size_of(cudf::data_type{dist_params.element_type});
return element_size * pow(single_level_mean, dist_params.max_depth);
}

struct avg_element_size_fn {
struct non_fixed_width_size_fn {
template <typename T>
size_t operator()(data_profile const& profile)
{
return avg_element_size<T>(profile);
return non_fixed_width_size<T>(profile);
}
};

size_t avg_element_bytes(data_profile const& profile, cudf::type_id tid)
size_t avg_element_size(data_profile const& profile, cudf::data_type dtype)
{
return cudf::type_dispatcher(cudf::data_type(tid), avg_element_size_fn{}, profile);
if (cudf::is_fixed_width(dtype)) { return cudf::size_of(dtype); }
return cudf::type_dispatcher(dtype, non_fixed_width_size_fn{}, profile);
}

/**
Expand Down Expand Up @@ -419,7 +420,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
random_value_fn<uint32_t>{profile.get_distribution_params<cudf::string_view>().length_params};
auto valid_dist = std::bernoulli_distribution{1. - profile.get_null_frequency()};

auto const avg_string_len = avg_element_size<cudf::string_view>(profile);
auto const avg_string_len = non_fixed_width_size<cudf::string_view>(profile);
auto const cardinality = std::min(profile.get_cardinality(), num_rows);
string_column_data samples(cardinality, cardinality * avg_string_len);
for (cudf::size_type si = 0; si < cardinality; ++si) {
Expand Down Expand Up @@ -593,7 +594,7 @@ std::unique_ptr<cudf::table> create_random_table(std::vector<cudf::type_id> cons
auto const out_dtype_ids = repeat_dtypes(dtype_ids, num_cols);
size_t const avg_row_bytes =
std::accumulate(out_dtype_ids.begin(), out_dtype_ids.end(), 0ul, [&](size_t sum, auto tid) {
return sum + avg_element_bytes(profile, tid);
return sum + avg_element_size(profile, cudf::data_type(tid));
});
cudf::size_type const num_rows = table_bytes.size / avg_row_bytes;

Expand Down
Loading

0 comments on commit 2456ed7

Please sign in to comment.