Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timing chart for libcudf build metrics report page #10038

Merged
merged 23 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
06a3e74
Add timing chart to libcudf build page
davidwendt Jan 13, 2022
787166c
attempt force recompile libcudf
davidwendt Jan 13, 2022
561a4a2
use fixed-width font and nowrap
davidwendt Jan 14, 2022
5cac058
cleanup code and add comments
davidwendt Jan 14, 2022
a632820
add more comments
davidwendt Jan 14, 2022
1ccae0a
fix report title
davidwendt Jan 15, 2022
ece7cf0
reduce font size for chart text
davidwendt Jan 15, 2022
de375a4
recompile everything test
davidwendt Jan 18, 2022
1e233bb
Merge branch 'branch-22.02' into build-time-chart
davidwendt Jan 18, 2022
b8dc503
test publish ninja.log as html
davidwendt Jan 19, 2022
bf6278c
copy ninja_log to build-metrics dir
davidwendt Jan 19, 2022
3cd66ce
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 20, 2022
dd5f8af
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 20, 2022
a7702de
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 21, 2022
2997866
undo changes to types.hpp
davidwendt Jan 21, 2022
a4d8922
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 21, 2022
7c73454
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 24, 2022
df31d88
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 25, 2022
8e15d33
format large times as minutes:seconds
davidwendt Jan 25, 2022
c82419b
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 26, 2022
ff326e3
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 27, 2022
88217b3
change ninja_log.html to ninja.log
davidwendt Jan 27, 2022
6e776d1
Merge branch 'branch-22.04' into build-time-chart
davidwendt Jan 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ if buildAll || hasArg libcudf; then
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log
fi

if [[ ${INSTALL_TARGET} != "" ]]; then
Expand Down
1 change: 1 addition & 0 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
gpuci_logger "Copying build metrics results"
mkdir -p "$WORKSPACE/build-metrics"
cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
cp "$LIBCUDF_BUILD_DIR/ninja.log" "$WORKSPACE/build-metrics/ninja_log.html"
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
fi

gpuci_logger "Build conda pkg for libcudf_kafka"
Expand Down
281 changes: 226 additions & 55 deletions cpp/scripts/sort_ninja_log.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
#
import argparse
import os
Expand Down Expand Up @@ -34,49 +34,63 @@
# build a map of the log entries
entries = {}
with open(log_file, "r") as log:
last = 0
files = {}
for line in log:
entry = line.split()
if len(entry) > 4:
elapsed = int(entry[1]) - int(entry[0])
obj_file = entry[3]
file_size = (
os.path.getsize(os.path.join(log_path, obj_file))
if os.path.exists(obj_file)
else 0
)
entries[entry[3]] = (elapsed, file_size)
start = int(entry[0])
end = int(entry[1])
# logic based on ninjatracing
if end < last:
files = {}
last = end
files.setdefault(entry[4], (entry[3], start, end, file_size))

# check file could be loaded
# build entries from files dict
for entry in files.values():
entries[entry[0]] = (entry[1], entry[2], entry[3])

# check file could be loaded and we have entries to report
if len(entries) == 0:
print("Could not parse", log_file)
exit()

# sort the keys by build time (descending order)
keys = list(entries.keys())
sl = sorted(keys, key=lambda k: entries[k][0], reverse=True)
# sort the entries by build-time (descending order)
sorted_list = sorted(
list(entries.keys()),
key=lambda k: entries[k][1] - entries[k][0],
reverse=True,
)

if output_fmt == "xml":
# output results in XML format
# output results in XML format
def output_xml(entries, sorted_list, args):
root = ET.Element("testsuites")
testsuite = ET.Element(
"testsuite",
attrib={
"name": "build-time",
"tests": str(len(keys)),
"tests": str(len(sorted_list)),
"failures": str(0),
"errors": str(0),
},
)
root.append(testsuite)
for key in sl:
entry = entries[key]
elapsed = float(entry[0]) / 1000
for name in sorted_list:
entry = entries[name]
build_time = float(entry[1] - entry[0]) / 1000
item = ET.Element(
"testcase",
attrib={
"classname": "BuildTime",
"name": key,
"time": str(elapsed),
"name": name,
"time": str(build_time),
},
)
testsuite.append(item)
Expand All @@ -85,62 +99,219 @@
xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ")
print(xmlstr)

elif output_fmt == "html":
# output results in HTML format
print("<html><head><title>Sorted Ninja Build Times</title>")
# Note: Jenkins does not support style defined in the html

# utility converts a millisecond value to a colum width in pixels
def time_to_width(value, end):
# map a value from (0,end) to (0,1000)
r = (float(value) / float(end)) * 1000.0
return int(r)


# assign each entry to a thread by analyzing the start/end times and
# slotting them into thread buckets where they fit
def assign_entries_to_threads(entries):
# first sort the entries' keys by end timestamp
sorted_keys = sorted(
list(entries.keys()), key=lambda k: entries[k][1], reverse=True
)

# build the chart data by assigning entries to threads
results = {}
threads = []
for name in sorted_keys:
entry = entries[name]

# assign this entry by finding the first available thread identified
# by the thread's current start time greater than the entry's end time
tid = -1
for t in range(len(threads)):
if threads[t] >= entry[1]:
threads[t] = entry[0]
tid = t
break

# if no current thread found, create a new one with this entry
if tid < 0:
threads.append(entry[0])
tid = len(threads) - 1

# add entry name to the array associated with this tid
if tid not in results.keys():
results[tid] = []
results[tid].append(name)

# first entry has the last end time
end_time = entries[sorted_keys[0]][1]

# return the threaded entries and the last end time
return (results, end_time)


# output chart results in HTML format
def output_html(entries, sorted_list, args):
print("<html><head><title>Build Metrics Report</title>")
# Note: Jenkins does not support javascript nor style defined in the html
# https://www.jenkins.io/doc/book/security/configuring-content-security-policy/
print("</head><body>")
if args.msg is not None:
print("<p>", args.msg, "</p>")
print("<table>")
print(
"<tr><th>File</th>",
"<th>Compile time<br/>(ms)</th>",
"<th>Size<br/>(bytes)</th><tr>",
sep="",
)
summary = {"red": 0, "yellow": 0, "green": 0}

# map entries to threads
# the end_time is used to scale all the entries to a fixed output width
threads, end_time = assign_entries_to_threads(entries)

# color ranges for build times
summary = {"red": 0, "yellow": 0, "green": 0, "white": 0}
red = "bgcolor='#FFBBD0'"
yellow = "bgcolor='#FFFF80'"
green = "bgcolor='#AAFFBD'"
for key in sl:
result = entries[key]
elapsed = result[0]
color = green
if elapsed > 300000: # 5 minutes
color = red
summary["red"] += 1
elif elapsed > 120000: # 2 minutes
color = yellow
summary["yellow"] += 1
else:
summary["green"] += 1
white = "bgcolor='#FFFFFF'"

# create the build-time chart
print("<table id='chart' width='1000px' bgcolor='#BBBBBB'>")
for tid in range(len(threads)):
names = threads[tid]
# sort the names for this thread by start time
names = sorted(names, key=lambda k: entries[k][0])

# use the last entry's end time as the total row size
# (this is an estimate and does not have to be exact)
last_entry = entries[names[len(names) - 1]]
last_time = time_to_width(last_entry[1], end_time)
print(
"<tr ",
color,
"><td>",
key,
"</td><td align='right'>",
result[0],
"</td><td align='right'>",
result[1],
"</td></tr>",
"<tr><td><table width='",
last_time,
"px' border='0' cellspacing='1' cellpadding='0'><tr>",
sep="",
)
print("</table><br/><table border='2'>")

prev_end = 0 # used for spacing between entries

# write out each entry for this thread as a column for a single row
for name in names:
entry = entries[name]
start = entry[0]
end = entry[1]

# this handles minor gaps between end of the
# previous entry and the start of the next
if prev_end > 0 and start > prev_end:
size = time_to_width(start - prev_end, end_time)
print("<td width='", size, "px'></td>")
# adjust for the cellspacing
prev_end = end + int(end_time / 500)

# format the build-time
build_time = end - start
build_time_str = str(build_time) + " ms"
if build_time > 120000: # 2 minutes
minutes = int(build_time / 60000)
seconds = int(((build_time / 60000) - minutes) * 60)
build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
elif build_time > 1000:
build_time_str = "{:.3f} s".format(build_time / 1000)

# assign color and accumulate legend values
color = white
if build_time > 300000: # 5 minutes
color = red
summary["red"] += 1
elif build_time > 120000: # 2 minutes
color = yellow
summary["yellow"] += 1
elif build_time > 1000: # 1 second
color = green
summary["green"] += 1
else:
summary["white"] += 1

# compute the pixel width based on build-time
size = max(time_to_width(build_time, end_time), 2)
# output the column for this entry
print("<td height='20px' width='", size, "px' ", sep="", end="")
# title text is shown as hover-text by most browsers
print(color, "title='", end="")
print(name, "\n", build_time_str, "' ", sep="", end="")
# centers the name if it fits in the box
print("align='center' nowrap>", end="")
# use a slightly smaller, fixed-width font
print("<font size='-2' face='courier'>", end="")

# add the file-name if it fits, otherwise, truncate the name
file_name = os.path.basename(name)
if len(file_name) + 3 > size / 7:
abbr_size = int(size / 7) - 3
if abbr_size > 1:
print(file_name[:abbr_size], "...", sep="", end="")
else:
print(file_name, end="")
# done with this entry
print("</font></td>")
# update the entry with just the computed output info
entries[name] = (build_time_str, color, entry[2])

# add a filler column at the end of each row
print("<td width='*'></td></tr></table></td></tr>")

# done with the chart
print("</table><br/>")

# output detail table in build-time descending order
print("<table id='detail' bgcolor='#EEEEEE'>")
print(
"<tr><th>File</th>",
"<th>Compile time</th>",
"<th>Size</th><tr>",
sep="",
)
for name in sorted_list:
entry = entries[name]
build_time_str = entry[0]
color = entry[1]
file_size = entry[2]

# format file size
file_size_str = ""
if file_size > 1000000:
file_size_str = "{:.3f} MB".format(file_size / 1000000)
elif file_size > 1000:
file_size_str = "{:.3f} KB".format(file_size / 1000)
elif file_size > 0:
file_size_str = str(file_size) + " bytes"

# output entry row
print("<tr ", color, "><td>", name, "</td>", sep="", end="")
print("<td align='right'>", build_time_str, "</td>", sep="", end="")
print("<td align='right'>", file_size_str, "</td></tr>", sep="")

print("</table><br/>")

# include summary table with color legend
print("<table id='legend' border='2' bgcolor='#EEEEEE'>")
print("<tr><td", red, ">time &gt; 5 minutes</td>")
print("<td align='right'>", summary["red"], "</td></tr>")
print("<tr><td", yellow, ">2 minutes &lt; time &lt; 5 minutes</td>")
print("<td align='right'>", summary["yellow"], "</td></tr>")
print("<tr><td", green, ">time &lt; 2 minutes</td>")
print("<tr><td", green, ">1 second &lt; time &lt; 2 minutes</td>")
print("<td align='right'>", summary["green"], "</td></tr>")
print("<tr><td", white, ">time &lt; 1 second</td>")
print("<td align='right'>", summary["white"], "</td></tr>")
print("</table></body></html>")

else:
# output results in CSV format

# output results in CSV format
def output_csv(entries, sorted_list, args):
print("time,size,file")
for key in sl:
result = entries[key]
print(result[0], result[1], key, sep=",")
for name in sorted_list:
entry = entries[name]
build_time = entry[1] - entry[0]
file_size = entry[2]
print(build_time, file_size, name, sep=",")


if output_fmt == "xml":
output_xml(entries, sorted_list, args)
elif output_fmt == "html":
output_html(entries, sorted_list, args)
else:
output_csv(entries, sorted_list, args)