From 7068aa5aa226a3d5f967e8038a860fd6fd1e70e0 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Thu, 9 Feb 2023 14:46:47 +0100 Subject: [PATCH 1/9] Log nvcc compile time by default This will create a csv file in the cpp/build directory that records the compilation of each translation unit and how long each phase of the nvcc compilation took. There does not seem to be a downside to enabling this, and it will be very helpful to diagnose build issues. To analyze the file, the following python code will help. It requires pandas, matplotlib, and seaborn: ------------------------------------------------------------ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pathlib import Path from matplotlib import colors df = pd.read_csv("./nvcc_compile_log.csv") df = df.rename(columns=str.strip) df["seconds"] = df["metric"] / 1000 df["file"] = df["source file name"] df["phase"] = df["phase name"].str.strip() def categorize_time(s): if s < 60: return "less than a minute" else: return "more than a minute" dfp = df.query("phase!='nvcc (driver)'").pivot("file", values="seconds", columns="phase") dfp_sum = dfp.sum(axis="columns") df_fraction = dfp.divide(dfp_sum, axis="index") df_fraction["total time"] = dfp_sum df_fraction = df_fraction.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="fraction") dfp["total time"] = dfp_sum df_absolute = dfp.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="seconds") df_fraction["time category"] = dfp["total time"].apply(categorize_time) df_absolute["time category"] = dfp["total time"].apply(categorize_time) palette = { "gcc (preprocessing 4)": colors.hsv_to_rgb((0, 1, 1)), 'cudafe++': colors.hsv_to_rgb((0, 1, .75)), 'gcc (compiling)': colors.hsv_to_rgb((0, 1, .4)), "gcc (preprocessing 1)": colors.hsv_to_rgb((.33, 1, 1)), 'cicc': colors.hsv_to_rgb((.33, 1, 0.75)), 'ptxas': colors.hsv_to_rgb((.33, 1, 0.4)), 'fatbinary': "grey", } sns.displot( df_absolute.sort_values("total time"), y="file", hue="phase", hue_order=reversed(["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)', "gcc (preprocessing 1)", 'cicc', 'ptxas', 'fatbinary', ]), palette=palette, weights="seconds", multiple="stack", kind="hist", height=20, ) plt.xlabel("seconds"); plt.savefig('absolute_compile_times.png') sns.displot( df_fraction.sort_values('total time'), y="file", hue="phase", hue_order=reversed(["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)', "gcc (preprocessing 1)", 'cicc', 'ptxas', 'fatbinary', ]), palette=palette, weights="fraction", multiple="stack", kind="hist", height=15, ) plt.xlabel("fraction"); plt.savefix("relative_compile_times.png") --- cpp/cmake/modules/ConfigureCUDA.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 5e68ca5bc4..1422cb07ac 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -21,6 +21,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) endif() +list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv") list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") From c6dff969e935d8edd17b18034c31fff59eb0f28c Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Thu, 9 Feb 2023 15:48:31 +0100 Subject: [PATCH 2/9] Move default location of compile log Hopefully this prevents segfaults in CI. --- cpp/cmake/modules/ConfigureCUDA.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 1422cb07ac..e1a8725af8 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -21,7 +21,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) endif() -list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv") +list(APPEND RAFT_CUDA_FLAGS "--time=CMakeFiles/nvcc_compile_log.csv") list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") From d175ed6203de923f32cf60322fd68af1c2ff96d4 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Fri, 10 Feb 2023 10:19:36 +0100 Subject: [PATCH 3/9] build.sh: Add --time option to log nvcc compile time The time option is disabled by default. When enabled, writes a log of compilation times to cpp/build/nvcc_compile_log.csv. This is not supported in CI, as it leads to seg faults. --- build.sh | 10 +++++++++- cpp/CMakeLists.txt | 1 + cpp/cmake/modules/ConfigureCUDA.cmake | 5 ++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index b5a72f4205..b75c4e72fe 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libraft pylibraft raft-dask docs tests bench clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn -h" +VALIDARGS="clean libraft pylibraft raft-dask docs tests bench clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -45,6 +45,8 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument) --cache-tool= - pass the build cache tool (eg: ccache, sccache, distcc) that will be used to speedup the build process. + --time - Enable nvcc compilation time logging into cpp/build/nvcc_compile_log.csv. + Results can be interpreted with cpp/scripts/analyze_nvcc_log.py -h - print this text default action (no args) is to build libraft, tests, pylibraft and raft-dask targets @@ -71,6 +73,7 @@ BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_ CACHE_ARGS="" NVTX=ON +LOG_COMPILE_TIME=OFF CLEAN=0 UNINSTALL=0 DISABLE_DEPRECATION_WARNINGS=ON @@ -297,6 +300,10 @@ fi if hasArg --no-nvtx; then NVTX=OFF fi +if hasArg --time; then + echo "-- Logging compile times to cpp/build/nvcc_compile_log.csv" + LOG_COMPILE_TIME=ON +fi if hasArg --show_depr_warn; then DISABLE_DEPRECATION_WARNINGS=OFF fi @@ -356,6 +363,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DRAFT_COMPILE_LIBRARIES=${COMPILE_LIBRARIES} \ -DRAFT_NVTX=${NVTX} \ + -DCUDA_LOG_COMPILE_TIME=${LOG_COMPILE_TIME} \ -DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \ -DBUILD_TESTS=${BUILD_TESTS} \ -DBUILD_BENCH=${BUILD_BENCH} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bdaacb4a85..7c618cbd20 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -52,6 +52,7 @@ option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF ) option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF) +option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_log.csv" OFF) option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON) option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON) option(DISABLE_OPENMP "Disable OpenMP" OFF) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index e1a8725af8..95dbcfad07 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -21,7 +21,10 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) endif() -list(APPEND RAFT_CUDA_FLAGS "--time=CMakeFiles/nvcc_compile_log.csv") +if(CUDA_LOG_COMPILE_TIME) + list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv") +endif() + list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") From 9acd1d09ba3dc108331e5536bbdbb7dd87f486b2 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Fri, 10 Feb 2023 11:10:59 +0100 Subject: [PATCH 4/9] Add script to analyze nvcc compile time log --- cpp/cmake/modules/ConfigureCUDA.cmake | 2 +- cpp/scripts/analyze_nvcc_log.py | 143 ++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100755 cpp/scripts/analyze_nvcc_log.py diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 95dbcfad07..c733d46985 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/scripts/analyze_nvcc_log.py b/cpp/scripts/analyze_nvcc_log.py new file mode 100755 index 0000000000..5f54fcd9ad --- /dev/null +++ b/cpp/scripts/analyze_nvcc_log.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from pathlib import Path +from matplotlib import colors + +def main(input_path): + input_path = Path(input_path) + print("-- loading data") + df = pd.read_csv(input_path) + + print("-- analyzing data") + # Strip spaces from column names + df = df.rename(columns=str.strip) + df["seconds"] = df["metric"] / 1000 + df["file"] = df["source file name"] + df["phase"] = df["phase name"].str.strip() + + def categorize_time(s): + if s < 60: + return "less than a minute" + else: + return "more than a minute" + + dfp = (df + # Remove nvcc driver entries. They don't contain a source file name + .query("phase!='nvcc (driver)'") + # Make a pivot table containing files as row, phase (preprocessing, + # cicc, etc.) as column and the total times as table entries. NOTE: + # if compiled for multiple archs, the archs will be summed. + .pivot_table(index="file", values="seconds", columns="phase", aggfunc='sum')) + + dfp_sum = dfp.sum(axis="columns") + + df_fraction = dfp.divide(dfp_sum, axis="index") + df_fraction["total time"] = dfp_sum + df_fraction = df_fraction.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="fraction") + + dfp["total time"] = dfp_sum + df_absolute = dfp.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="seconds") + + df_fraction["time category"] = dfp["total time"].apply(categorize_time) + df_absolute["time category"] = dfp["total time"].apply(categorize_time) + + # host: light red to dark red (preprocessing, cudafe, gcc (compiling)) + # device: ligt green to dark green (preprocessing, cicc, ptxas) + palette = { + "gcc (preprocessing 4)": colors.hsv_to_rgb((0, 1, 1)), + 'cudafe++': colors.hsv_to_rgb((0, 1, .75)), + 'gcc (compiling)': colors.hsv_to_rgb((0, 1, .4)), + "gcc (preprocessing 1)": colors.hsv_to_rgb((.33, 1, 1)), + 'cicc': colors.hsv_to_rgb((.33, 1, 0.75)), + 'ptxas': colors.hsv_to_rgb((.33, 1, 0.4)), + 'fatbinary': "grey", + } + + print("-- Ten longest translation units:") + colwidth = pd.get_option('display.max_colwidth') - 1 + dfp = dfp.reset_index() + dfp["file"] = dfp["file"].apply(lambda s: s[-colwidth:]) + print(dfp.sort_values("total time", ascending=False).reset_index().loc[:10]) + + print("-- Plotting absolute compile times") + abs_out_path = f"{input_path}.absolute.compile_times.png" + sns.displot( + df_absolute.sort_values("total time").reset_index(), + y="file", + hue="phase", + hue_order=reversed( + ["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)', + "gcc (preprocessing 1)", 'cicc', 'ptxas', + 'fatbinary', + ]), + palette=palette, + weights="seconds", + multiple="stack", + kind="hist", + height=20, + ) + plt.xlabel("seconds"); + plt.savefig(abs_out_path) + print(f"-- Wrote absolute compile time plot to {abs_out_path}") + + print("-- Plotting relative compile times") + rel_out_path = f"{input_path}.relative.compile_times.png" + sns.displot( + df_fraction.sort_values('total time').reset_index(), + y="file", + hue="phase", + hue_order=reversed(["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)', + "gcc (preprocessing 1)", 'cicc', 'ptxas', + 'fatbinary', + ]), + palette=palette, + weights="fraction", + multiple="stack", + kind="hist", + height=15, + ) + plt.xlabel("fraction"); + plt.savefig(rel_out_path) + print(f"-- Wrote relative compile time plot to {rel_out_path}") + +if __name__ == "__main__": + if len(sys.argv) != 2: + printf("""NVCC log analyzer + + Analyzes nvcc logs and outputs a figure with highest ranking translation + units. + + Usage: + python analyze_nvcc_log.py + cpp/scripts/analyze_nvcc_log.py + + Generate the nvcc log file by adding: + + list(APPEND RAFT_CUDA_FLAGS "--time=CMakeFiles/nvcc_compile_log.csv") + + to cpp/cmake/modules/ConfigureCUDA.cmake. + """) + + input_path = Path(sys.argv[1]) + if not input_path.exists(): + print(f"Path {input_path} does not exist.") + else: + main(input_path) From 2aec639b5e67f3a8c7f6f0d6e5faa9538d405a31 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Tue, 21 Mar 2023 18:12:29 +0100 Subject: [PATCH 5/9] Test nvcc log per source file in CI --- cpp/CMakeLists.txt | 15 +++++++++++++++ cpp/cmake/modules/ConfigureCUDA.cmake | 4 ---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7c618cbd20..a88dbba249 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -463,6 +463,21 @@ if(RAFT_COMPILE_LIBRARY) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(raft_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") + # For each source file in raft_lib generate a CSV file in cpp/build with filename + # nvcc_log_[...].csv if(CUDA_LOG_COMPILE_TIME) + get_target_property(sources raft_lib SOURCES) + foreach(source IN LISTS sources) + cmake_path(IS_ABSOLUTE source is_abs) + if(is_abs) + cmake_path( + RELATIVE_PATH source BASE_DIRECTORY ${PROJECT_SOURCE_DIR} + ) # convert to relative path if not already one + endif() + string(MAKE_C_IDENTIFIER "nvcc_log_${source}" filename) # convert to valid filename + set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS "--time=${filename}.csv") + endforeach() + # endif() + endif() if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index c733d46985..90cdfbc67c 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -21,10 +21,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) endif() -if(CUDA_LOG_COMPILE_TIME) - list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv") -endif() - list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") From 713e7b89c0d5f372d6ada0da47ac28128c7899e9 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Tue, 21 Mar 2023 18:47:18 +0100 Subject: [PATCH 6/9] Try to fix segmentation faults (again) Perhaps the file system outside the CMakeFiles/ directory is not writable. --- cpp/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a88dbba249..e3606c8051 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -474,7 +474,9 @@ if(RAFT_COMPILE_LIBRARY) ) # convert to relative path if not already one endif() string(MAKE_C_IDENTIFIER "nvcc_log_${source}" filename) # convert to valid filename - set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS "--time=${filename}.csv") + set_source_files_properties( + ${source} PROPERTIES COMPILE_FLAGS "--time=CMakeFiles/${filename}.csv" + ) endforeach() # endif() From 1aa43839caf31e9255b175f9af668346a454fe89 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 29 Mar 2023 16:37:38 +0200 Subject: [PATCH 7/9] Revert "Try to fix segmentation faults (again)" This reverts commit 713e7b89c0d5f372d6ada0da47ac28128c7899e9. --- cpp/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e3606c8051..a88dbba249 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -474,9 +474,7 @@ if(RAFT_COMPILE_LIBRARY) ) # convert to relative path if not already one endif() string(MAKE_C_IDENTIFIER "nvcc_log_${source}" filename) # convert to valid filename - set_source_files_properties( - ${source} PROPERTIES COMPILE_FLAGS "--time=CMakeFiles/${filename}.csv" - ) + set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS "--time=${filename}.csv") endforeach() # endif() From 1872afd770f34006d2344f6193331ca81c59daa6 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 29 Mar 2023 16:37:48 +0200 Subject: [PATCH 8/9] Revert "Test nvcc log per source file in CI" This reverts commit 2aec639b5e67f3a8c7f6f0d6e5faa9538d405a31. --- cpp/CMakeLists.txt | 15 --------------- cpp/cmake/modules/ConfigureCUDA.cmake | 4 ++++ 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a88dbba249..7c618cbd20 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -463,21 +463,6 @@ if(RAFT_COMPILE_LIBRARY) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(raft_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") - # For each source file in raft_lib generate a CSV file in cpp/build with filename - # nvcc_log_[...].csv if(CUDA_LOG_COMPILE_TIME) - get_target_property(sources raft_lib SOURCES) - foreach(source IN LISTS sources) - cmake_path(IS_ABSOLUTE source is_abs) - if(is_abs) - cmake_path( - RELATIVE_PATH source BASE_DIRECTORY ${PROJECT_SOURCE_DIR} - ) # convert to relative path if not already one - endif() - string(MAKE_C_IDENTIFIER "nvcc_log_${source}" filename) # convert to valid filename - set_source_files_properties(${source} PROPERTIES COMPILE_FLAGS "--time=${filename}.csv") - endforeach() - # endif() - endif() if(TARGET raft_lib AND (NOT TARGET raft::raft_lib)) diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 90cdfbc67c..c733d46985 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -21,6 +21,10 @@ if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) endif() +if(CUDA_LOG_COMPILE_TIME) + list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv") +endif() + list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") From 1b716efdcc286c0c41abe58351e6ad63dfbabe03 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 29 Mar 2023 16:55:56 +0200 Subject: [PATCH 9/9] Implement review feedback --- cpp/scripts/analyze_nvcc_log.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cpp/scripts/analyze_nvcc_log.py b/cpp/scripts/analyze_nvcc_log.py index 5f54fcd9ad..d06e05d265 100755 --- a/cpp/scripts/analyze_nvcc_log.py +++ b/cpp/scripts/analyze_nvcc_log.py @@ -33,12 +33,6 @@ def main(input_path): df["file"] = df["source file name"] df["phase"] = df["phase name"].str.strip() - def categorize_time(s): - if s < 60: - return "less than a minute" - else: - return "more than a minute" - dfp = (df # Remove nvcc driver entries. They don't contain a source file name .query("phase!='nvcc (driver)'") @@ -56,9 +50,6 @@ def categorize_time(s): dfp["total time"] = dfp_sum df_absolute = dfp.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="seconds") - df_fraction["time category"] = dfp["total time"].apply(categorize_time) - df_absolute["time category"] = dfp["total time"].apply(categorize_time) - # host: light red to dark red (preprocessing, cudafe, gcc (compiling)) # device: ligt green to dark green (preprocessing, cicc, ptxas) palette = {