Skip to content

Commit

Permalink
build.sh: Add option to log nvcc compile times (#1262)
Browse files Browse the repository at this point in the history
Add `--time` option to `build.sh` that enables compile time logging of `nvcc`. 

Also, add a script `cpp/scripts/analyze_nvcc_log.py` to find the translation units that take the longest time. 

Output looks like: 

```
$ cpp/scripts/analyze_nvcc_log.py cpp/build/nvcc_compile_log.csv
-- loading data
-- analyzing data
-- Ten longest translation units:
phase  index                                               file        cicc   cudafe++  fatbinary  gcc (compiling)  gcc (preprocessing 1)  gcc (preprocessing 4)        ptxas   total time
0         10  ions/detail/canberra_double_double_double_int.cu    42.431063  10.601856   0.020979         6.747153               3.721194               2.093567  1618.390375  1684.006186
1         11  zations/detail/canberra_float_float_float_int.cu    36.928960   9.804138   0.011537         6.796088               3.481156               1.790703  1584.262875  1643.075457
2         85  ors/specializations/refine_d_uint64_t_uint8_t.cu   602.935531  14.980877   0.529673        36.300566               6.270717               2.889723   933.622969  1597.530056
3         84  bors/specializations/refine_d_uint64_t_int8_t.cu   606.513281  16.243960   0.729282        39.981113               5.608029               3.028493   897.241469  1569.345628
4         53  stance/neighbors/ivfpq_search_int8_t_uint64_t.cu   841.049750   8.233967   1.025554        24.248578               4.069022               1.747108   631.193734  1511.567713
5         52  istance/neighbors/ivfpq_search_float_uint64_t.cu   837.241437   8.145278   1.042313        24.400606               3.433528               1.882623   627.786672  1503.932457
6         54  tance/neighbors/ivfpq_search_uint8_t_uint64_t.cu   846.706656   8.371286   1.025517        24.094691               3.432749               1.645345   618.319234  1503.595479
7         76  izations/detail/ivfpq_search_uint8_t_uint64_t.cu   698.726266   7.086368   1.050021        39.727723               3.259101               1.333935   406.509937  1157.693351
8         74  alizations/detail/ivfpq_search_float_uint64_t.cu   706.702516   6.905794   1.049731        39.923895               2.814361               2.057154   395.604000  1155.057450
9         75  lizations/detail/ivfpq_search_int8_t_uint64_t.cu   689.390281   6.483386   1.025864        39.865668               3.121696               1.297788   409.099562  1150.284245
10        83  hbors/specializations/refine_d_uint64_t_float.cu   334.705594  15.466444   0.680270        36.551977               5.405133               2.947568   715.708781  1111.465767
-- Plotting absolute compile times
-- Wrote absolute compile time plot to cpp/build/nvcc_compile_log.csv.absolute.compile_times.png
-- Plotting relative compile times
-- Wrote relative compile time plot to cpp/build/nvcc_compile_log.csv.relative.compile_times.png
```

Authors:
  - Allard Hendriksen (https://github.com/ahendriksen)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1262
  • Loading branch information
ahendriksen authored Mar 29, 2023
1 parent c2cb779 commit e963f5a
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 2 deletions.
10 changes: 9 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ARGS=$*
# scripts, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn -h"
VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --time -h"
HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-prims=<targets>] [--limit-bench-ann=<targets>]
where <target> is:
clean - remove all existing build artifacts and configuration (start over)
Expand Down Expand Up @@ -48,6 +48,8 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
--cache-tool=<tool> - pass the build cache tool (eg: ccache, sccache, distcc) that will be used
to speedup the build process.
--time - Enable nvcc compilation time logging into cpp/build/nvcc_compile_log.csv.
Results can be interpreted with cpp/scripts/analyze_nvcc_log.py
-h - print this text
default action (no args) is to build libraft, tests, pylibraft and raft-dask targets
Expand Down Expand Up @@ -75,6 +77,7 @@ BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_

CACHE_ARGS=""
NVTX=ON
LOG_COMPILE_TIME=OFF
CLEAN=0
UNINSTALL=0
DISABLE_DEPRECATION_WARNINGS=ON
Expand Down Expand Up @@ -322,6 +325,10 @@ fi
if hasArg --no-nvtx; then
NVTX=OFF
fi
if hasArg --time; then
echo "-- Logging compile times to cpp/build/nvcc_compile_log.csv"
LOG_COMPILE_TIME=ON
fi
if hasArg --show_depr_warn; then
DISABLE_DEPRECATION_WARNINGS=OFF
fi
Expand Down Expand Up @@ -379,6 +386,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DRAFT_COMPILE_LIBRARY=${COMPILE_LIBRARY} \
-DRAFT_NVTX=${NVTX} \
-DCUDA_LOG_COMPILE_TIME=${LOG_COMPILE_TIME} \
-DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \
-DBUILD_TESTS=${BUILD_TESTS} \
-DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ option(CUDA_ENABLE_LINEINFO
"Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
)
option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_log.csv" OFF)
option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON)
option(DISABLE_OPENMP "Disable OpenMP" OFF)
Expand Down
6 changes: 5 additions & 1 deletion cpp/cmake/modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -21,6 +21,10 @@ if(CMAKE_COMPILER_IS_GNUCXX)
list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
endif()

if(CUDA_LOG_COMPILE_TIME)
list(APPEND RAFT_CUDA_FLAGS "--time=nvcc_compile_log.csv")
endif()

list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
list(APPEND RAFT_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM")
list(APPEND RAFT_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM")
Expand Down
134 changes: 134 additions & 0 deletions cpp/scripts/analyze_nvcc_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from matplotlib import colors

def main(input_path):
input_path = Path(input_path)
print("-- loading data")
df = pd.read_csv(input_path)

print("-- analyzing data")
# Strip spaces from column names
df = df.rename(columns=str.strip)
df["seconds"] = df["metric"] / 1000
df["file"] = df["source file name"]
df["phase"] = df["phase name"].str.strip()

dfp = (df
# Remove nvcc driver entries. They don't contain a source file name
.query("phase!='nvcc (driver)'")
# Make a pivot table containing files as row, phase (preprocessing,
# cicc, etc.) as column and the total times as table entries. NOTE:
# if compiled for multiple archs, the archs will be summed.
.pivot_table(index="file", values="seconds", columns="phase", aggfunc='sum'))

dfp_sum = dfp.sum(axis="columns")

df_fraction = dfp.divide(dfp_sum, axis="index")
df_fraction["total time"] = dfp_sum
df_fraction = df_fraction.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="fraction")

dfp["total time"] = dfp_sum
df_absolute = dfp.melt(ignore_index=False, id_vars="total time", var_name="phase", value_name="seconds")

# host: light red to dark red (preprocessing, cudafe, gcc (compiling))
# device: ligt green to dark green (preprocessing, cicc, ptxas)
palette = {
"gcc (preprocessing 4)": colors.hsv_to_rgb((0, 1, 1)),
'cudafe++': colors.hsv_to_rgb((0, 1, .75)),
'gcc (compiling)': colors.hsv_to_rgb((0, 1, .4)),
"gcc (preprocessing 1)": colors.hsv_to_rgb((.33, 1, 1)),
'cicc': colors.hsv_to_rgb((.33, 1, 0.75)),
'ptxas': colors.hsv_to_rgb((.33, 1, 0.4)),
'fatbinary': "grey",
}

print("-- Ten longest translation units:")
colwidth = pd.get_option('display.max_colwidth') - 1
dfp = dfp.reset_index()
dfp["file"] = dfp["file"].apply(lambda s: s[-colwidth:])
print(dfp.sort_values("total time", ascending=False).reset_index().loc[:10])

print("-- Plotting absolute compile times")
abs_out_path = f"{input_path}.absolute.compile_times.png"
sns.displot(
df_absolute.sort_values("total time").reset_index(),
y="file",
hue="phase",
hue_order=reversed(
["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)',
"gcc (preprocessing 1)", 'cicc', 'ptxas',
'fatbinary',
]),
palette=palette,
weights="seconds",
multiple="stack",
kind="hist",
height=20,
)
plt.xlabel("seconds");
plt.savefig(abs_out_path)
print(f"-- Wrote absolute compile time plot to {abs_out_path}")

print("-- Plotting relative compile times")
rel_out_path = f"{input_path}.relative.compile_times.png"
sns.displot(
df_fraction.sort_values('total time').reset_index(),
y="file",
hue="phase",
hue_order=reversed(["gcc (preprocessing 4)", 'cudafe++', 'gcc (compiling)',
"gcc (preprocessing 1)", 'cicc', 'ptxas',
'fatbinary',
]),
palette=palette,
weights="fraction",
multiple="stack",
kind="hist",
height=15,
)
plt.xlabel("fraction");
plt.savefig(rel_out_path)
print(f"-- Wrote relative compile time plot to {rel_out_path}")

if __name__ == "__main__":
if len(sys.argv) != 2:
printf("""NVCC log analyzer
Analyzes nvcc logs and outputs a figure with highest ranking translation
units.
Usage:
python analyze_nvcc_log.py <nvcc_log_file.csv>
cpp/scripts/analyze_nvcc_log.py <nvcc_log_file.csv>
Generate the nvcc log file by adding:
list(APPEND RAFT_CUDA_FLAGS "--time=CMakeFiles/nvcc_compile_log.csv")
to cpp/cmake/modules/ConfigureCUDA.cmake.
""")

input_path = Path(sys.argv[1])
if not input_path.exists():
print(f"Path {input_path} does not exist.")
else:
main(input_path)

0 comments on commit e963f5a

Please sign in to comment.