Add optional text file support to ninja-log utility (#12823)

Adds support for `sort_ninja_log.py` utility to accept an optional text file to be included at the top of the generated html report. This allows it to be used asynchronously outside the build step and helps enable using it in C++ builds in non-cudf repos. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - AJ Schmidt (https://github.com/ajschmidt8) URL: #12823
rapidsai · Mar 20, 2023 · aff1c9f · aff1c9f
1 parent d171fda
commit aff1c9f
Show file tree

Hide file tree

Showing 2 changed files with 164 additions and 76 deletions.
diff --git a/build.sh b/build.sh
@@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then
     # Record build times
     if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
         echo "Formatting build metrics"
-        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
-        MSG="<p>"
+        MSG=""
         # get some sccache stats after the compile
         if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
            COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
@@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then
         BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
         echo "Metrics output dir: [$BMR_DIR]"
         mkdir -p ${BMR_DIR}
-        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html
+        MSG_OUTFILE="$(mktemp)"
+        echo "$MSG" > "${MSG_OUTFILE}"
+        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html
         cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
     fi
 

diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py
@@ -1,10 +1,11 @@
 #
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 #
 import argparse
 import os
 import sys
 import xml.etree.ElementTree as ET
+from pathlib import Path
 from xml.dom import minidom
 
 parser = argparse.ArgumentParser()
@@ -22,52 +23,50 @@
     "--msg",
     type=str,
     default=None,
-    help="optional message to include in html output",
+    help="optional text file to include at the top of the html output",
+)
+parser.add_argument(
+    "--cmp_log",
+    type=str,
+    default=None,
+    help="optional baseline ninja_log to compare results",
 )
 args = parser.parse_args()
 
 log_file = args.log_file
-log_path = os.path.dirname(os.path.abspath(log_file))
-
 output_fmt = args.fmt
+cmp_file = args.cmp_log
 
 # build a map of the log entries
-entries = {}
-with open(log_file) as log:
-    last = 0
-    files = {}
-    for line in log:
-        entry = line.split()
-        if len(entry) > 4:
-            obj_file = entry[3]
-            file_size = (
-                os.path.getsize(os.path.join(log_path, obj_file))
-                if os.path.exists(obj_file)
-                else 0
-            )
-            start = int(entry[0])
-            end = int(entry[1])
-            # logic based on ninjatracing
-            if end < last:
-                files = {}
-            last = end
-            files.setdefault(entry[4], (entry[3], start, end, file_size))
-
-    # build entries from files dict
-    for entry in files.values():
-        entries[entry[0]] = (entry[1], entry[2], entry[3])
-
-# check file could be loaded and we have entries to report
-if len(entries) == 0:
-    print("Could not parse", log_file)
-    exit()
+def build_log_map(log_file):
+    entries = {}
+    log_path = os.path.dirname(os.path.abspath(log_file))
+    with open(log_file) as log:
+        last = 0
+        files = {}
+        for line in log:
+            entry = line.split()
+            if len(entry) > 4:
+                obj_file = entry[3]
+                file_size = (
+                    os.path.getsize(os.path.join(log_path, obj_file))
+                    if os.path.exists(obj_file)
+                    else 0
+                )
+                start = int(entry[0])
+                end = int(entry[1])
+                # logic based on ninjatracing
+                if end < last:
+                    files = {}
+                last = end
+                files.setdefault(entry[4], (entry[3], start, end, file_size))
+
+        # build entries from files dict
+        for entry in files.values():
+            entries[entry[0]] = (entry[1], entry[2], entry[3])
+
+    return entries
 
-# sort the entries by build-time (descending order)
-sorted_list = sorted(
-    list(entries.keys()),
-    key=lambda k: entries[k][1] - entries[k][0],
-    reverse=True,
-)
 
 # output results in XML format
 def output_xml(entries, sorted_list, args):
@@ -147,14 +146,46 @@ def assign_entries_to_threads(entries):
     return (results, end_time)
 
 
-# output chart results in HTML format
-def output_html(entries, sorted_list, args):
+# format the build-time
+def format_build_time(input_time):
+    build_time = abs(input_time)
+    build_time_str = str(build_time) + " ms"
+    if build_time > 120000:  # 2 minutes
+        minutes = int(build_time / 60000)
+        seconds = int(((build_time / 60000) - minutes) * 60)
+        build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
+    elif build_time > 1000:
+        build_time_str = "{:.3f} s".format(build_time / 1000)
+    if input_time < 0:
+        build_time_str = "-" + build_time_str
+    return build_time_str
+
+
+# format file size
+def format_file_size(input_size):
+    file_size = abs(input_size)
+    file_size_str = ""
+    if file_size > 1000000:
+        file_size_str = "{:.3f} MB".format(file_size / 1000000)
+    elif file_size > 1000:
+        file_size_str = "{:.3f} KB".format(file_size / 1000)
+    elif file_size > 0:
+        file_size_str = str(file_size) + " bytes"
+    if input_size < 0:
+        file_size_str = "-" + file_size_str
+    return file_size_str
+
+
+# Output chart results in HTML format
+# Builds a standalone html file with no javascript or styles
+def output_html(entries, sorted_list, cmp_entries, args):
     print("<html><head><title>Build Metrics Report</title>")
-    # Note: Jenkins does not support javascript nor style defined in the html
-    # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/
     print("</head><body>")
     if args.msg is not None:
-        print("<p>", args.msg, "</p>")
+        msg_file = Path(args.msg)
+        if msg_file.is_file():
+            msg = msg_file.read_text()
+            print("<p>", msg, "</p>")
 
     # map entries to threads
     # the end_time is used to scale all the entries to a fixed output width
@@ -201,15 +232,8 @@ def output_html(entries, sorted_list, args):
             # adjust for the cellspacing
             prev_end = end + int(end_time / 500)
 
-            # format the build-time
             build_time = end - start
-            build_time_str = str(build_time) + " ms"
-            if build_time > 120000:  # 2 minutes
-                minutes = int(build_time / 60000)
-                seconds = int(((build_time / 60000) - minutes) * 60)
-                build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
-            elif build_time > 1000:
-                build_time_str = "{:.3f} s".format(build_time / 1000)
+            build_time_str = format_build_time(build_time)
 
             # assign color and accumulate legend values
             color = white
@@ -248,7 +272,7 @@ def output_html(entries, sorted_list, args):
             # done with this entry
             print("</font></td>")
             # update the entry with just the computed output info
-            entries[name] = (build_time_str, color, entry[2])
+            entries[name] = (build_time, color, entry[2])
 
         # add a filler column at the end of each row
         print("<td width='*'></td></tr></table></td></tr>")
@@ -259,30 +283,53 @@ def output_html(entries, sorted_list, args):
     # output detail table in build-time descending order
     print("<table id='detail' bgcolor='#EEEEEE'>")
     print(
-        "<tr><th>File</th>",
-        "<th>Compile time</th>",
-        "<th>Size</th><tr>",
-        sep="",
+        "<tr><th>File</th>", "<th>Compile time</th>", "<th>Size</th>", sep=""
     )
+    if cmp_entries:
+        print("<th>t-cmp</th>", sep="")
+    print("</tr>")
+
     for name in sorted_list:
         entry = entries[name]
-        build_time_str = entry[0]
+        build_time = entry[0]
         color = entry[1]
         file_size = entry[2]
 
-        # format file size
-        file_size_str = ""
-        if file_size > 1000000:
-            file_size_str = "{:.3f} MB".format(file_size / 1000000)
-        elif file_size > 1000:
-            file_size_str = "{:.3f} KB".format(file_size / 1000)
-        elif file_size > 0:
-            file_size_str = str(file_size) + " bytes"
+        build_time_str = format_build_time(build_time)
+        file_size_str = format_file_size(file_size)
 
         # output entry row
         print("<tr ", color, "><td>", name, "</td>", sep="", end="")
         print("<td align='right'>", build_time_str, "</td>", sep="", end="")
-        print("<td align='right'>", file_size_str, "</td></tr>", sep="")
+        print("<td align='right'>", file_size_str, "</td>", sep="", end="")
+        # output diff column
+        cmp_entry = (
+            cmp_entries[name] if cmp_entries and name in cmp_entries else None
+        )
+        if cmp_entry:
+            diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
+            diff_time_str = format_build_time(diff_time)
+            diff_color = white
+            diff_percent = int((diff_time / build_time) * 100)
+            if build_time > 60000:
+                if diff_percent > 20:
+                    diff_color = red
+                    diff_time_str = "<b>" + diff_time_str + "</b>"
+                elif diff_percent < -20:
+                    diff_color = green
+                    diff_time_str = "<b>" + diff_time_str + "</b>"
+                elif diff_percent > 0:
+                    diff_color = yellow
+            print(
+                "<td align='right' ",
+                diff_color,
+                ">",
+                diff_time_str,
+                "</td>",
+                sep="",
+                end="",
+            )
+        print("</tr>")
 
     print("</table><br/>")
 
@@ -296,22 +343,62 @@ def output_html(entries, sorted_list, args):
     print("<td align='right'>", summary["green"], "</td></tr>")
     print("<tr><td", white, ">time &lt; 1 second</td>")
     print("<td align='right'>", summary["white"], "</td></tr>")
-    print("</table></body></html>")
+    print("</table>")
+
+    if cmp_entries:
+        print("<table id='legend' border='2' bgcolor='#EEEEEE'>")
+        print("<tr><td", red, ">time increase &gt; 20%</td></tr>")
+        print("<tr><td", yellow, ">time increase &gt; 0</td></tr>")
+        print("<tr><td", green, ">time decrease &gt; 20%</td></tr>")
+        print(
+            "<tr><td",
+            white,
+            ">time change &lt; 20%% or build time &lt; 1 minute</td></tr>",
+        )
+        print("</table>")
+
+    print("</body></html>")
 
 
 # output results in CSV format
-def output_csv(entries, sorted_list, args):
-    print("time,size,file")
+def output_csv(entries, sorted_list, cmp_entries, args):
+    print("time,size,file", end="")
+    if cmp_entries:
+        print(",diff", end="")
+    print()
     for name in sorted_list:
         entry = entries[name]
         build_time = entry[1] - entry[0]
         file_size = entry[2]
-        print(build_time, file_size, name, sep=",")
+        cmp_entry = (
+            cmp_entries[name] if cmp_entries and name in cmp_entries else None
+        )
+        print(build_time, file_size, name, sep=",", end="")
+        if cmp_entry:
+            diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
+            print(",", diff_time, sep="", end="")
+        print()
+
+
+# parse log file into map
+entries = build_log_map(log_file)
+if len(entries) == 0:
+    print("Could not parse", log_file)
+    exit()
+
+# sort the entries by build-time (descending order)
+sorted_list = sorted(
+    list(entries.keys()),
+    key=lambda k: entries[k][1] - entries[k][0],
+    reverse=True,
+)
 
+# load the comparison build log if available
+cmp_entries = build_log_map(cmp_file) if cmp_file else None
 
 if output_fmt == "xml":
     output_xml(entries, sorted_list, args)
 elif output_fmt == "html":
-    output_html(entries, sorted_list, args)
+    output_html(entries, sorted_list, cmp_entries, args)
 else:
-    output_csv(entries, sorted_list, args)
+    output_csv(entries, sorted_list, cmp_entries, args)