Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Feb 9, 2022
2 parents e363967 + 3fe168d commit 171377a
Show file tree
Hide file tree
Showing 112 changed files with 2,292 additions and 1,269 deletions.
449 changes: 224 additions & 225 deletions CHANGELOG.md

Large diffs are not rendered by default.

20 changes: 9 additions & 11 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,9 @@ if buildAll || hasArg libcudf; then
fi

# get the current count before the compile starts
FILES_IN_CCACHE=""
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v ccache)" ]]; then
FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
echo "$FILES_IN_CCACHE"
# zero the ccache statistics
ccache -z
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
# zero the sccache statistics
sccache --zero-stats
fi

cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
Expand All @@ -216,11 +213,12 @@ if buildAll || hasArg libcudf; then
echo "Formatting build metrics"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
MSG="<p>"
# get some ccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
MSG="${MSG}<br/>$FILES_IN_CCACHE"
HIT_RATE=$(ccache -s | grep "cache hit rate")
MSG="${MSG}<br/>${HIT_RATE}"
# get some sccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
CACHE_HITS=$(sccache -s | grep "Cache hits \+ [0-9]\+$" | awk '{ print $NF }')
HIT_RATE=$(echo - | awk "{printf \"%.2f\n\", $CACHE_HITS / $COMPILE_REQUESTS * 100}")
MSG="${MSG}<br/>cache hit rate ${HIT_RATE} %"
fi
MSG="${MSG}<br/>parallel setting: $PARALLEL_LEVEL"
MSG="${MSG}<br/>parallel build time: $compile_total seconds"
Expand Down
6 changes: 6 additions & 0 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then
export VERSION_SUFFIX=`date +%y%m%d`
fi

export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"
export CMAKE_CXX_COMPILER_LAUNCHER="sccache"
export CMAKE_C_COMPILER_LAUNCHER="sccache"

################################################################################
# SETUP - Check environment
################################################################################
Expand Down Expand Up @@ -77,6 +81,8 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS
mkdir -p ${CONDA_BLD_DIR}/libcudf/work
cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
gpuci_logger "sccache stats"
sccache --show-stats

# Copy libcudf build metrics results
LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build
Expand Down
10 changes: 7 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export DASK_DISTRIBUTED_GIT_TAG='2022.01.0'
# ucx-py version
export UCX_PY_VERSION='0.25.*'

export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"
export CMAKE_CXX_COMPILER_LAUNCHER="sccache"
export CMAKE_C_COMPILER_LAUNCHER="sccache"

################################################################################
# TRAP - Setup trap for removing jitify cache
################################################################################
Expand Down Expand Up @@ -245,15 +249,15 @@ fi

cd "$WORKSPACE/python/cudf"
gpuci_logger "Python py.test for cuDF"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope

cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term

cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 6 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term

gpuci_logger "Test notebooks"
"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
Expand Down
10 changes: 5 additions & 5 deletions ci/utils/nbtestlog2junitxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from enum import Enum


startingPatt = re.compile("^STARTING: ([\w\.\-]+)$")
skippingPatt = re.compile("^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
exitCodePatt = re.compile("^EXIT CODE: (\d+)$")
folderPatt = re.compile("^FOLDER: ([\w\.\-]+)$")
timePatt = re.compile("^real\s+([\d\.ms]+)$")
startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$")
skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$")
folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$")
timePatt = re.compile(r"^real\s+([\d\.ms]+)$")
linePatt = re.compile("^" + ("-" * 80) + "$")


Expand Down
8 changes: 5 additions & 3 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ build:
- PARALLEL_LEVEL
- VERSION_SUFFIX
- PROJECT_FLASH
- CCACHE_DIR
- CCACHE_NOHASHDIR
- CCACHE_COMPILERCHECK
- CMAKE_GENERATOR
- CMAKE_C_COMPILER_LAUNCHER
- CMAKE_CXX_COMPILER_LAUNCHER
- CMAKE_CUDA_COMPILER_LAUNCHER
- SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64]
- SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64]
- SCCACHE_BUCKET=rapids-sccache
- SCCACHE_REGION=us-west-2
- SCCACHE_IDLE_TIMEOUT=32768
run_exports:
- {{ pin_subpackage("libcudf", max_pin="x.x") }}

Expand Down
23 changes: 19 additions & 4 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ add_custom_command(
function(ConfigureBench CMAKE_BENCH_NAME)
add_executable(${CMAKE_BENCH_NAME} ${ARGN})
set_target_properties(
${CMAKE_BENCH_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main
Expand All @@ -69,19 +70,33 @@ function(ConfigureBench CMAKE_BENCH_NAME)
APPEND
COMMENT "Adding ${CMAKE_BENCH_NAME}"
)

install(
TARGETS ${CMAKE_BENCH_NAME}
COMPONENT testing
DESTINATION bin/benchmarks/libcudf
EXCLUDE_FROM_ALL
)
endfunction()

# This function takes in a benchmark name and benchmark source for nvbench benchmarks and handles
# setting all of the associated properties and linking to build the benchmark
function(ConfigureNVBench CMAKE_BENCH_NAME)
add_executable(${CMAKE_BENCH_NAME} ${ARGN})
set_target_properties(
${CMAKE_BENCH_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen nvbench::main
)
install(
TARGETS ${CMAKE_BENCH_NAME}
COMPONENT testing
DESTINATION bin/benchmarks/libcudf
EXCLUDE_FROM_ALL
)
endfunction()

# ##################################################################################################
Expand Down
27 changes: 14 additions & 13 deletions cpp/benchmarks/common/generate_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,45 +65,46 @@ T get_distribution_mean(distribution_params<T> const& dist)
}

// Utilities to determine the mean size of an element, given the data profile
template <typename T>
std::enable_if_t<cudf::is_fixed_width<T>(), size_t> avg_element_size(data_profile const& profile)
template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
{
return sizeof(T);
CUDF_FAIL("Should not be called, use `size_of` for this type instead");
}

template <typename T>
std::enable_if_t<!cudf::is_fixed_width<T>(), size_t> avg_element_size(data_profile const& profile)
template <typename T, CUDF_ENABLE_IF(!cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
{
CUDF_FAIL("not implemented!");
}

template <>
size_t avg_element_size<cudf::string_view>(data_profile const& profile)
size_t non_fixed_width_size<cudf::string_view>(data_profile const& profile)
{
auto const dist = profile.get_distribution_params<cudf::string_view>().length_params;
return get_distribution_mean(dist);
}

template <>
size_t avg_element_size<cudf::list_view>(data_profile const& profile)
size_t non_fixed_width_size<cudf::list_view>(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::list_view>();
auto const single_level_mean = get_distribution_mean(dist_params.length_params);
auto const element_size = cudf::size_of(cudf::data_type{dist_params.element_type});
return element_size * pow(single_level_mean, dist_params.max_depth);
}

struct avg_element_size_fn {
struct non_fixed_width_size_fn {
template <typename T>
size_t operator()(data_profile const& profile)
{
return avg_element_size<T>(profile);
return non_fixed_width_size<T>(profile);
}
};

size_t avg_element_bytes(data_profile const& profile, cudf::type_id tid)
size_t avg_element_size(data_profile const& profile, cudf::data_type dtype)
{
return cudf::type_dispatcher(cudf::data_type(tid), avg_element_size_fn{}, profile);
if (cudf::is_fixed_width(dtype)) { return cudf::size_of(dtype); }
return cudf::type_dispatcher(dtype, non_fixed_width_size_fn{}, profile);
}

/**
Expand Down Expand Up @@ -419,7 +420,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
random_value_fn<uint32_t>{profile.get_distribution_params<cudf::string_view>().length_params};
auto valid_dist = std::bernoulli_distribution{1. - profile.get_null_frequency()};

auto const avg_string_len = avg_element_size<cudf::string_view>(profile);
auto const avg_string_len = non_fixed_width_size<cudf::string_view>(profile);
auto const cardinality = std::min(profile.get_cardinality(), num_rows);
string_column_data samples(cardinality, cardinality * avg_string_len);
for (cudf::size_type si = 0; si < cardinality; ++si) {
Expand Down Expand Up @@ -593,7 +594,7 @@ std::unique_ptr<cudf::table> create_random_table(std::vector<cudf::type_id> cons
auto const out_dtype_ids = repeat_dtypes(dtype_ids, num_cols);
size_t const avg_row_bytes =
std::accumulate(out_dtype_ids.begin(), out_dtype_ids.end(), 0ul, [&](size_t sum, auto tid) {
return sum + avg_element_bytes(profile, tid);
return sum + avg_element_size(profile, cudf::data_type(tid));
});
cudf::size_type const num_rows = table_bytes.size / avg_row_bytes;

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ enum class binary_operator : int32_t {
PMOD, ///< positive modulo operator
///< If remainder is negative, this returns (remainder + divisor) % divisor
///< else, it returns (dividend % divisor)
PYMOD, ///< operator % but following python's sign rules for negatives
PYMOD, ///< operator % but following Python's sign rules for negatives
POW, ///< lhs ^ rhs
LOG_BASE, ///< logarithm to the base
ATAN2, ///< 2-argument arctangent
Expand Down
Loading

0 comments on commit 171377a

Please sign in to comment.