Skip to content

Commit

Permalink
Add optional text file support to ninja-log utility (#12823)
Browse files Browse the repository at this point in the history
Adds support for `sort_ninja_log.py` utility to accept an optional text file to be included at the top of the generated html report. This allows it to be used asynchronously outside the build step and helps enable using it in C++ builds in non-cudf repos.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: #12823
  • Loading branch information
davidwendt authored Mar 20, 2023
1 parent d171fda commit aff1c9f
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 76 deletions.
7 changes: 4 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,7 @@ if buildAll || hasArg libcudf; then
# Record build times
if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
MSG="<p>"
MSG=""
# get some sccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
Expand All @@ -318,7 +317,9 @@ if buildAll || hasArg libcudf; then
BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIB_BUILD_DIR}"}
echo "Metrics output dir: [$BMR_DIR]"
mkdir -p ${BMR_DIR}
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${BMR_DIR}/ninja_log.html
MSG_OUTFILE="$(mktemp)"
echo "$MSG" > "${MSG_OUTFILE}"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "${MSG_OUTFILE}" > ${BMR_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${BMR_DIR}/ninja.log
fi

Expand Down
233 changes: 160 additions & 73 deletions cpp/scripts/sort_ninja_log.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
import argparse
import os
import sys
import xml.etree.ElementTree as ET
from pathlib import Path
from xml.dom import minidom

parser = argparse.ArgumentParser()
Expand All @@ -22,52 +23,50 @@
"--msg",
type=str,
default=None,
help="optional message to include in html output",
help="optional text file to include at the top of the html output",
)
parser.add_argument(
"--cmp_log",
type=str,
default=None,
help="optional baseline ninja_log to compare results",
)
args = parser.parse_args()

log_file = args.log_file
log_path = os.path.dirname(os.path.abspath(log_file))

output_fmt = args.fmt
cmp_file = args.cmp_log

# build a map of the log entries
entries = {}
with open(log_file) as log:
last = 0
files = {}
for line in log:
entry = line.split()
if len(entry) > 4:
obj_file = entry[3]
file_size = (
os.path.getsize(os.path.join(log_path, obj_file))
if os.path.exists(obj_file)
else 0
)
start = int(entry[0])
end = int(entry[1])
# logic based on ninjatracing
if end < last:
files = {}
last = end
files.setdefault(entry[4], (entry[3], start, end, file_size))

# build entries from files dict
for entry in files.values():
entries[entry[0]] = (entry[1], entry[2], entry[3])

# check file could be loaded and we have entries to report
if len(entries) == 0:
print("Could not parse", log_file)
exit()
def build_log_map(log_file):
entries = {}
log_path = os.path.dirname(os.path.abspath(log_file))
with open(log_file) as log:
last = 0
files = {}
for line in log:
entry = line.split()
if len(entry) > 4:
obj_file = entry[3]
file_size = (
os.path.getsize(os.path.join(log_path, obj_file))
if os.path.exists(obj_file)
else 0
)
start = int(entry[0])
end = int(entry[1])
# logic based on ninjatracing
if end < last:
files = {}
last = end
files.setdefault(entry[4], (entry[3], start, end, file_size))

# build entries from files dict
for entry in files.values():
entries[entry[0]] = (entry[1], entry[2], entry[3])

return entries

# sort the entries by build-time (descending order)
sorted_list = sorted(
list(entries.keys()),
key=lambda k: entries[k][1] - entries[k][0],
reverse=True,
)

# output results in XML format
def output_xml(entries, sorted_list, args):
Expand Down Expand Up @@ -147,14 +146,46 @@ def assign_entries_to_threads(entries):
return (results, end_time)


# output chart results in HTML format
def output_html(entries, sorted_list, args):
# format the build-time
def format_build_time(input_time):
build_time = abs(input_time)
build_time_str = str(build_time) + " ms"
if build_time > 120000: # 2 minutes
minutes = int(build_time / 60000)
seconds = int(((build_time / 60000) - minutes) * 60)
build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
elif build_time > 1000:
build_time_str = "{:.3f} s".format(build_time / 1000)
if input_time < 0:
build_time_str = "-" + build_time_str
return build_time_str


# format file size
def format_file_size(input_size):
file_size = abs(input_size)
file_size_str = ""
if file_size > 1000000:
file_size_str = "{:.3f} MB".format(file_size / 1000000)
elif file_size > 1000:
file_size_str = "{:.3f} KB".format(file_size / 1000)
elif file_size > 0:
file_size_str = str(file_size) + " bytes"
if input_size < 0:
file_size_str = "-" + file_size_str
return file_size_str


# Output chart results in HTML format
# Builds a standalone html file with no javascript or styles
def output_html(entries, sorted_list, cmp_entries, args):
print("<html><head><title>Build Metrics Report</title>")
# Note: Jenkins does not support javascript nor style defined in the html
# https://www.jenkins.io/doc/book/security/configuring-content-security-policy/
print("</head><body>")
if args.msg is not None:
print("<p>", args.msg, "</p>")
msg_file = Path(args.msg)
if msg_file.is_file():
msg = msg_file.read_text()
print("<p>", msg, "</p>")

# map entries to threads
# the end_time is used to scale all the entries to a fixed output width
Expand Down Expand Up @@ -201,15 +232,8 @@ def output_html(entries, sorted_list, args):
# adjust for the cellspacing
prev_end = end + int(end_time / 500)

# format the build-time
build_time = end - start
build_time_str = str(build_time) + " ms"
if build_time > 120000: # 2 minutes
minutes = int(build_time / 60000)
seconds = int(((build_time / 60000) - minutes) * 60)
build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
elif build_time > 1000:
build_time_str = "{:.3f} s".format(build_time / 1000)
build_time_str = format_build_time(build_time)

# assign color and accumulate legend values
color = white
Expand Down Expand Up @@ -248,7 +272,7 @@ def output_html(entries, sorted_list, args):
# done with this entry
print("</font></td>")
# update the entry with just the computed output info
entries[name] = (build_time_str, color, entry[2])
entries[name] = (build_time, color, entry[2])

# add a filler column at the end of each row
print("<td width='*'></td></tr></table></td></tr>")
Expand All @@ -259,30 +283,53 @@ def output_html(entries, sorted_list, args):
# output detail table in build-time descending order
print("<table id='detail' bgcolor='#EEEEEE'>")
print(
"<tr><th>File</th>",
"<th>Compile time</th>",
"<th>Size</th><tr>",
sep="",
"<tr><th>File</th>", "<th>Compile time</th>", "<th>Size</th>", sep=""
)
if cmp_entries:
print("<th>t-cmp</th>", sep="")
print("</tr>")

for name in sorted_list:
entry = entries[name]
build_time_str = entry[0]
build_time = entry[0]
color = entry[1]
file_size = entry[2]

# format file size
file_size_str = ""
if file_size > 1000000:
file_size_str = "{:.3f} MB".format(file_size / 1000000)
elif file_size > 1000:
file_size_str = "{:.3f} KB".format(file_size / 1000)
elif file_size > 0:
file_size_str = str(file_size) + " bytes"
build_time_str = format_build_time(build_time)
file_size_str = format_file_size(file_size)

# output entry row
print("<tr ", color, "><td>", name, "</td>", sep="", end="")
print("<td align='right'>", build_time_str, "</td>", sep="", end="")
print("<td align='right'>", file_size_str, "</td></tr>", sep="")
print("<td align='right'>", file_size_str, "</td>", sep="", end="")
# output diff column
cmp_entry = (
cmp_entries[name] if cmp_entries and name in cmp_entries else None
)
if cmp_entry:
diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
diff_time_str = format_build_time(diff_time)
diff_color = white
diff_percent = int((diff_time / build_time) * 100)
if build_time > 60000:
if diff_percent > 20:
diff_color = red
diff_time_str = "<b>" + diff_time_str + "</b>"
elif diff_percent < -20:
diff_color = green
diff_time_str = "<b>" + diff_time_str + "</b>"
elif diff_percent > 0:
diff_color = yellow
print(
"<td align='right' ",
diff_color,
">",
diff_time_str,
"</td>",
sep="",
end="",
)
print("</tr>")

print("</table><br/>")

Expand All @@ -296,22 +343,62 @@ def output_html(entries, sorted_list, args):
print("<td align='right'>", summary["green"], "</td></tr>")
print("<tr><td", white, ">time &lt; 1 second</td>")
print("<td align='right'>", summary["white"], "</td></tr>")
print("</table></body></html>")
print("</table>")

if cmp_entries:
print("<table id='legend' border='2' bgcolor='#EEEEEE'>")
print("<tr><td", red, ">time increase &gt; 20%</td></tr>")
print("<tr><td", yellow, ">time increase &gt; 0</td></tr>")
print("<tr><td", green, ">time decrease &gt; 20%</td></tr>")
print(
"<tr><td",
white,
">time change &lt; 20%% or build time &lt; 1 minute</td></tr>",
)
print("</table>")

print("</body></html>")


# output results in CSV format
def output_csv(entries, sorted_list, args):
print("time,size,file")
def output_csv(entries, sorted_list, cmp_entries, args):
print("time,size,file", end="")
if cmp_entries:
print(",diff", end="")
print()
for name in sorted_list:
entry = entries[name]
build_time = entry[1] - entry[0]
file_size = entry[2]
print(build_time, file_size, name, sep=",")
cmp_entry = (
cmp_entries[name] if cmp_entries and name in cmp_entries else None
)
print(build_time, file_size, name, sep=",", end="")
if cmp_entry:
diff_time = build_time - (cmp_entry[1] - cmp_entry[0])
print(",", diff_time, sep="", end="")
print()


# parse log file into map
entries = build_log_map(log_file)
if len(entries) == 0:
print("Could not parse", log_file)
exit()

# sort the entries by build-time (descending order)
sorted_list = sorted(
list(entries.keys()),
key=lambda k: entries[k][1] - entries[k][0],
reverse=True,
)

# load the comparison build log if available
cmp_entries = build_log_map(cmp_file) if cmp_file else None

if output_fmt == "xml":
output_xml(entries, sorted_list, args)
elif output_fmt == "html":
output_html(entries, sorted_list, args)
output_html(entries, sorted_list, cmp_entries, args)
else:
output_csv(entries, sorted_list, args)
output_csv(entries, sorted_list, cmp_entries, args)

0 comments on commit aff1c9f

Please sign in to comment.