diff --git a/build.sh b/build.sh index bee66d819b4..7cbd0fceb5a 100755 --- a/build.sh +++ b/build.sh @@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then # Record build times if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then echo "Formatting build metrics" - python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml - MSG="

" + MSG="" # get some sccache stats after the compile if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }') @@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"} echo "Metrics output dir: [$BMR_DIR]" mkdir -p ${BMR_DIR} - python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html + MSG_OUTFILE="$(mktemp)" + echo "$MSG" > "${MSG_OUTFILE}" + python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log fi diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py index 9cb8afbff9f..3fe503f749e 100755 --- a/cpp/scripts/sort_ninja_log.py +++ b/cpp/scripts/sort_ninja_log.py @@ -1,10 +1,11 @@ # -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # import argparse import os import sys import xml.etree.ElementTree as ET +from pathlib import Path from xml.dom import minidom parser = argparse.ArgumentParser() @@ -22,52 +23,50 @@ "--msg", type=str, default=None, - help="optional message to include in html output", + help="optional text file to include at the top of the html output", +) +parser.add_argument( + "--cmp_log", + type=str, + default=None, + help="optional baseline ninja_log to compare results", ) args = parser.parse_args() log_file = args.log_file -log_path = os.path.dirname(os.path.abspath(log_file)) - output_fmt = args.fmt +cmp_file = args.cmp_log # build a map of the log entries -entries = {} -with open(log_file) as log: - last = 0 - files = {} - for line in log: - entry = line.split() - if len(entry) > 4: - obj_file = entry[3] - file_size = ( - os.path.getsize(os.path.join(log_path, obj_file)) - if os.path.exists(obj_file) - else 0 - ) - start = int(entry[0]) - end = int(entry[1]) - # logic based on ninjatracing - if end < last: - files = {} - last = end - files.setdefault(entry[4], (entry[3], start, end, file_size)) - - # build entries from files dict - for entry in files.values(): - entries[entry[0]] = (entry[1], entry[2], entry[3]) - -# check file could be loaded and we have entries to report -if len(entries) == 0: - print("Could not parse", log_file) - exit() +def build_log_map(log_file): + entries = {} + log_path = os.path.dirname(os.path.abspath(log_file)) + with open(log_file) as log: + last = 0 + files = {} + for line in log: + entry = line.split() + if len(entry) > 4: + obj_file = entry[3] + file_size = ( + os.path.getsize(os.path.join(log_path, obj_file)) + if os.path.exists(obj_file) + else 0 + ) + start = int(entry[0]) + end = int(entry[1]) + # logic based on ninjatracing + if end < last: + files = {} + last = end + files.setdefault(entry[4], (entry[3], start, end, file_size)) + + # build entries from files dict + for entry in files.values(): + entries[entry[0]] = (entry[1], entry[2], entry[3]) + + return entries -# sort the entries by build-time (descending order) -sorted_list = sorted( - list(entries.keys()), - key=lambda k: entries[k][1] - entries[k][0], - reverse=True, -) # output results in XML format def output_xml(entries, sorted_list, args): @@ -147,14 +146,46 @@ def assign_entries_to_threads(entries): return (results, end_time) -# output chart results in HTML format -def output_html(entries, sorted_list, args): +# format the build-time +def format_build_time(input_time): + build_time = abs(input_time) + build_time_str = str(build_time) + " ms" + if build_time > 120000: # 2 minutes + minutes = int(build_time / 60000) + seconds = int(((build_time / 60000) - minutes) * 60) + build_time_str = "{:d}:{:02d} min".format(minutes, seconds) + elif build_time > 1000: + build_time_str = "{:.3f} s".format(build_time / 1000) + if input_time < 0: + build_time_str = "-" + build_time_str + return build_time_str + + +# format file size +def format_file_size(input_size): + file_size = abs(input_size) + file_size_str = "" + if file_size > 1000000: + file_size_str = "{:.3f} MB".format(file_size / 1000000) + elif file_size > 1000: + file_size_str = "{:.3f} KB".format(file_size / 1000) + elif file_size > 0: + file_size_str = str(file_size) + " bytes" + if input_size < 0: + file_size_str = "-" + file_size_str + return file_size_str + + +# Output chart results in HTML format +# Builds a standalone html file with no javascript or styles +def output_html(entries, sorted_list, cmp_entries, args): print("Build Metrics Report") - # Note: Jenkins does not support javascript nor style defined in the html - # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/ print("") if args.msg is not None: - print("

", args.msg, "

") + msg_file = Path(args.msg) + if msg_file.is_file(): + msg = msg_file.read_text() + print("

", msg, "

") # map entries to threads # the end_time is used to scale all the entries to a fixed output width @@ -201,15 +232,8 @@ def output_html(entries, sorted_list, args): # adjust for the cellspacing prev_end = end + int(end_time / 500) - # format the build-time build_time = end - start - build_time_str = str(build_time) + " ms" - if build_time > 120000: # 2 minutes - minutes = int(build_time / 60000) - seconds = int(((build_time / 60000) - minutes) * 60) - build_time_str = "{:d}:{:02d} min".format(minutes, seconds) - elif build_time > 1000: - build_time_str = "{:.3f} s".format(build_time / 1000) + build_time_str = format_build_time(build_time) # assign color and accumulate legend values color = white @@ -248,7 +272,7 @@ def output_html(entries, sorted_list, args): # done with this entry print("") # update the entry with just the computed output info - entries[name] = (build_time_str, color, entry[2]) + entries[name] = (build_time, color, entry[2]) # add a filler column at the end of each row print("") @@ -259,30 +283,53 @@ def output_html(entries, sorted_list, args): # output detail table in build-time descending order print("") print( - "", - "", - "", - sep="", + "", "", "", sep="" ) + if cmp_entries: + print("", sep="") + print("") + for name in sorted_list: entry = entries[name] - build_time_str = entry[0] + build_time = entry[0] color = entry[1] file_size = entry[2] - # format file size - file_size_str = "" - if file_size > 1000000: - file_size_str = "{:.3f} MB".format(file_size / 1000000) - elif file_size > 1000: - file_size_str = "{:.3f} KB".format(file_size / 1000) - elif file_size > 0: - file_size_str = str(file_size) + " bytes" + build_time_str = format_build_time(build_time) + file_size_str = format_file_size(file_size) # output entry row print("", sep="", end="") print("", sep="", end="") - print("", sep="") + print("", sep="", end="") + # output diff column + cmp_entry = ( + cmp_entries[name] if cmp_entries and name in cmp_entries else None + ) + if cmp_entry: + diff_time = build_time - (cmp_entry[1] - cmp_entry[0]) + diff_time_str = format_build_time(diff_time) + diff_color = white + diff_percent = int((diff_time / build_time) * 100) + if build_time > 60000: + if diff_percent > 20: + diff_color = red + diff_time_str = "" + diff_time_str + "" + elif diff_percent < -20: + diff_color = green + diff_time_str = "" + diff_time_str + "" + elif diff_percent > 0: + diff_color = yellow + print( + "", + sep="", + end="", + ) + print("") print("
FileCompile timeSize
FileCompile timeSizet-cmp
", name, "", build_time_str, "", file_size_str, "
", file_size_str, "", + diff_time_str, + "

") @@ -296,22 +343,62 @@ def output_html(entries, sorted_list, args): print("", summary["green"], "") print("time < 1 second") print("", summary["white"], "") - print("") + print("") + + if cmp_entries: + print("") + print("time increase > 20%") + print("time increase > 0") + print("time decrease > 20%") + print( + "time change < 20%% or build time < 1 minute", + ) + print("
") + + print("") # output results in CSV format -def output_csv(entries, sorted_list, args): - print("time,size,file") +def output_csv(entries, sorted_list, cmp_entries, args): + print("time,size,file", end="") + if cmp_entries: + print(",diff", end="") + print() for name in sorted_list: entry = entries[name] build_time = entry[1] - entry[0] file_size = entry[2] - print(build_time, file_size, name, sep=",") + cmp_entry = ( + cmp_entries[name] if cmp_entries and name in cmp_entries else None + ) + print(build_time, file_size, name, sep=",", end="") + if cmp_entry: + diff_time = build_time - (cmp_entry[1] - cmp_entry[0]) + print(",", diff_time, sep="", end="") + print() + + +# parse log file into map +entries = build_log_map(log_file) +if len(entries) == 0: + print("Could not parse", log_file) + exit() + +# sort the entries by build-time (descending order) +sorted_list = sorted( + list(entries.keys()), + key=lambda k: entries[k][1] - entries[k][0], + reverse=True, +) +# load the comparison build log if available +cmp_entries = build_log_map(cmp_file) if cmp_file else None if output_fmt == "xml": output_xml(entries, sorted_list, args) elif output_fmt == "html": - output_html(entries, sorted_list, args) + output_html(entries, sorted_list, cmp_entries, args) else: - output_csv(entries, sorted_list, args) + output_csv(entries, sorted_list, cmp_entries, args)