diff --git a/build.sh b/build.sh index 45074a6645f..c2eba134c35 100755 --- a/build.sh +++ b/build.sh @@ -230,6 +230,7 @@ if buildAll || hasArg libcudf; then fi echo "$MSG" python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html + cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log fi if [[ ${INSTALL_TARGET} != "" ]]; then diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index f23296038f2..6f19f174da0 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -85,6 +85,7 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then gpuci_logger "Copying build metrics results" mkdir -p "$WORKSPACE/build-metrics" cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html" + cp "$LIBCUDF_BUILD_DIR/ninja.log" "$WORKSPACE/build-metrics/ninja.log" fi gpuci_logger "Build conda pkg for libcudf_kafka" diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py index bac6697da82..33c369b254f 100755 --- a/cpp/scripts/sort_ninja_log.py +++ b/cpp/scripts/sort_ninja_log.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # import argparse import os @@ -34,49 +34,63 @@ # build a map of the log entries entries = {} with open(log_file, "r") as log: + last = 0 + files = {} for line in log: entry = line.split() if len(entry) > 4: - elapsed = int(entry[1]) - int(entry[0]) obj_file = entry[3] file_size = ( os.path.getsize(os.path.join(log_path, obj_file)) if os.path.exists(obj_file) else 0 ) - entries[entry[3]] = (elapsed, file_size) + start = int(entry[0]) + end = int(entry[1]) + # logic based on ninjatracing + if end < last: + files = {} + last = end + files.setdefault(entry[4], (entry[3], start, end, file_size)) -# check file could be loaded + # build entries from files dict + for entry in files.values(): + entries[entry[0]] = (entry[1], entry[2], entry[3]) + +# check file could be loaded and we have entries to report if len(entries) == 0: print("Could not parse", log_file) exit() -# sort the keys by build time (descending order) -keys = list(entries.keys()) -sl = sorted(keys, key=lambda k: entries[k][0], reverse=True) +# sort the entries by build-time (descending order) +sorted_list = sorted( + list(entries.keys()), + key=lambda k: entries[k][1] - entries[k][0], + reverse=True, +) -if output_fmt == "xml": - # output results in XML format +# output results in XML format +def output_xml(entries, sorted_list, args): root = ET.Element("testsuites") testsuite = ET.Element( "testsuite", attrib={ "name": "build-time", - "tests": str(len(keys)), + "tests": str(len(sorted_list)), "failures": str(0), "errors": str(0), }, ) root.append(testsuite) - for key in sl: - entry = entries[key] - elapsed = float(entry[0]) / 1000 + for name in sorted_list: + entry = entries[name] + build_time = float(entry[1] - entry[0]) / 1000 item = ET.Element( "testcase", attrib={ "classname": "BuildTime", - "name": key, - "time": str(elapsed), + "name": name, + "time": str(build_time), }, ) testsuite.append(item) @@ -85,62 +99,219 @@ xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ") print(xmlstr) -elif output_fmt == "html": - # output results in HTML format - print("Sorted Ninja Build Times") - # Note: Jenkins does not support style defined in the html + +# utility converts a millisecond value to a colum width in pixels +def time_to_width(value, end): + # map a value from (0,end) to (0,1000) + r = (float(value) / float(end)) * 1000.0 + return int(r) + + +# assign each entry to a thread by analyzing the start/end times and +# slotting them into thread buckets where they fit +def assign_entries_to_threads(entries): + # first sort the entries' keys by end timestamp + sorted_keys = sorted( + list(entries.keys()), key=lambda k: entries[k][1], reverse=True + ) + + # build the chart data by assigning entries to threads + results = {} + threads = [] + for name in sorted_keys: + entry = entries[name] + + # assign this entry by finding the first available thread identified + # by the thread's current start time greater than the entry's end time + tid = -1 + for t in range(len(threads)): + if threads[t] >= entry[1]: + threads[t] = entry[0] + tid = t + break + + # if no current thread found, create a new one with this entry + if tid < 0: + threads.append(entry[0]) + tid = len(threads) - 1 + + # add entry name to the array associated with this tid + if tid not in results.keys(): + results[tid] = [] + results[tid].append(name) + + # first entry has the last end time + end_time = entries[sorted_keys[0]][1] + + # return the threaded entries and the last end time + return (results, end_time) + + +# output chart results in HTML format +def output_html(entries, sorted_list, args): + print("Build Metrics Report") + # Note: Jenkins does not support javascript nor style defined in the html # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/ print("") if args.msg is not None: print("

", args.msg, "

") - print("") - print( - "", - "", - "", - sep="", - ) - summary = {"red": 0, "yellow": 0, "green": 0} + + # map entries to threads + # the end_time is used to scale all the entries to a fixed output width + threads, end_time = assign_entries_to_threads(entries) + + # color ranges for build times + summary = {"red": 0, "yellow": 0, "green": 0, "white": 0} red = "bgcolor='#FFBBD0'" yellow = "bgcolor='#FFFF80'" green = "bgcolor='#AAFFBD'" - for key in sl: - result = entries[key] - elapsed = result[0] - color = green - if elapsed > 300000: # 5 minutes - color = red - summary["red"] += 1 - elif elapsed > 120000: # 2 minutes - color = yellow - summary["yellow"] += 1 - else: - summary["green"] += 1 + white = "bgcolor='#FFFFFF'" + + # create the build-time chart + print("
FileCompile time
(ms)
Size
(bytes)
") + for tid in range(len(threads)): + names = threads[tid] + # sort the names for this thread by start time + names = sorted(names, key=lambda k: entries[k][0]) + + # use the last entry's end time as the total row size + # (this is an estimate and does not have to be exact) + last_entry = entries[names[len(names) - 1]] + last_time = time_to_width(last_entry[1], end_time) print( - "", + "") + + # done with the chart + print("
", - key, - "", - result[0], - "", - result[1], - "
", sep="", ) - print("

") + + prev_end = 0 # used for spacing between entries + + # write out each entry for this thread as a column for a single row + for name in names: + entry = entries[name] + start = entry[0] + end = entry[1] + + # this handles minor gaps between end of the + # previous entry and the start of the next + if prev_end > 0 and start > prev_end: + size = time_to_width(start - prev_end, end_time) + print("") + # adjust for the cellspacing + prev_end = end + int(end_time / 500) + + # format the build-time + build_time = end - start + build_time_str = str(build_time) + " ms" + if build_time > 120000: # 2 minutes + minutes = int(build_time / 60000) + seconds = int(((build_time / 60000) - minutes) * 60) + build_time_str = "{:d}:{:02d} min".format(minutes, seconds) + elif build_time > 1000: + build_time_str = "{:.3f} s".format(build_time / 1000) + + # assign color and accumulate legend values + color = white + if build_time > 300000: # 5 minutes + color = red + summary["red"] += 1 + elif build_time > 120000: # 2 minutes + color = yellow + summary["yellow"] += 1 + elif build_time > 1000: # 1 second + color = green + summary["green"] += 1 + else: + summary["white"] += 1 + + # compute the pixel width based on build-time + size = max(time_to_width(build_time, end_time), 2) + # output the column for this entry + print("") + # update the entry with just the computed output info + entries[name] = (build_time_str, color, entry[2]) + + # add a filler column at the end of each row + print("
", end="") + # use a slightly smaller, fixed-width font + print("", end="") + + # add the file-name if it fits, otherwise, truncate the name + file_name = os.path.basename(name) + if len(file_name) + 3 > size / 7: + abbr_size = int(size / 7) - 3 + if abbr_size > 1: + print(file_name[:abbr_size], "...", sep="", end="") + else: + print(file_name, end="") + # done with this entry + print("

") + + # output detail table in build-time descending order + print("") + print( + "", + "", + "", + sep="", + ) + for name in sorted_list: + entry = entries[name] + build_time_str = entry[0] + color = entry[1] + file_size = entry[2] + + # format file size + file_size_str = "" + if file_size > 1000000: + file_size_str = "{:.3f} MB".format(file_size / 1000000) + elif file_size > 1000: + file_size_str = "{:.3f} KB".format(file_size / 1000) + elif file_size > 0: + file_size_str = str(file_size) + " bytes" + + # output entry row + print("", sep="", end="") + print("", sep="", end="") + print("", sep="") + + print("
FileCompile timeSize
", name, "", build_time_str, "", file_size_str, "

") + # include summary table with color legend + print("") print("time > 5 minutes") print("") print("2 minutes < time < 5 minutes") print("") - print("time < 2 minutes") + print("1 second < time < 2 minutes") print("") + print("time < 1 second") + print("") print("
", summary["red"], "
", summary["yellow"], "
", summary["green"], "
", summary["white"], "
") -else: - # output results in CSV format + +# output results in CSV format +def output_csv(entries, sorted_list, args): print("time,size,file") - for key in sl: - result = entries[key] - print(result[0], result[1], key, sep=",") + for name in sorted_list: + entry = entries[name] + build_time = entry[1] - entry[0] + file_size = entry[2] + print(build_time, file_size, name, sep=",") + + +if output_fmt == "xml": + output_xml(entries, sorted_list, args) +elif output_fmt == "html": + output_html(entries, sorted_list, args) +else: + output_csv(entries, sorted_list, args)