Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

blocks-import-script: add --markdown flag and remove --csv-output flag #2985

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 59 additions & 18 deletions scripts/block-import-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,39 @@ def formatBins(df: pd.DataFrame, bins: int):
return df


def write_markdown_output(df_stats, df, baseline_name, contender_name):
"""Write statistics in markdown table format"""
total_blocks = df.block_number.max() - df.block_number.min()
time_xt = df.time_x.sum()
time_yt = df.time_y.sum()
timet = time_yt - time_xt

print(f"\n## {os.path.basename(baseline_name)} vs {os.path.basename(contender_name)}\n")

print("| Block Range | BPS Baseline | BPS Contender | TPS Baseline | TPS Contender | Time Baseline | Time Contender | BPS Diff | TPS Diff | Time Diff |")
print("|------------|--------------|---------------|--------------|---------------|---------------|----------------|----------|----------|-----------|")

for idx, row in df_stats.iterrows():
print(f"| {str(idx)} | {row['bps_x']:.2f} | {row['bps_y']:.2f} | {row['tps_x']:.2f} | {row['tps_y']:.2f} | {prettySecs(row['time_x'])} | {prettySecs(row['time_y'])} | {row['bpsd']:.2%} | {row['tpsd']:.2%} | {row['timed']:.2%} |")

print("\n## Summary\n")

print("| Metric | Value |")
print("|--------|-------|")

print(f"| Total Blocks | {total_blocks} |")
print(f"| Baseline Time | {prettySecs(time_xt)} |")
print(f"| Contender Time | {prettySecs(time_yt)} |")
print(f"| Time Difference | {prettySecs(timet)} |")
print(f"| Time Difference % | {(timet/time_xt):.2%} |")

print("\n## Legend\n")
print("- BPS Diff: Blocks per second difference (+)")
print("- TPS Diff: Transactions per second difference")
print("- Time Diff: Time to process difference (-)")
print("\n(+) = more is better, (-) = less is better")


def write_csv_output(df_stats, df, csv_path):
"""Write statistics to a CSV file"""
total_blocks = df.block_number.max() - df.block_number.min()
Expand Down Expand Up @@ -88,12 +121,32 @@ def write_csv_output(df_stats, df, csv_path):
csv_writer.writerow(['Time Difference %', f"{(timet/time_xt):.2%}"])


def write_standard_output(df_stats, df, baseline_name, contender_name):
print(f"{os.path.basename(baseline_name)} vs {os.path.basename(contender_name)}")
print(df_stats.to_string(
formatters=dict.fromkeys(["bpsd", "tpsd", "timed"], "{:,.2%}".format)
| dict.fromkeys(["bps_x", "bps_y", "tps_x", "tps_y"], "{:,.2f}".format)
| dict.fromkeys(["time_x", "time_y"], prettySecs),
))

total_blocks = df.block_number.max() - df.block_number.min()
time_xt = df.time_x.sum()
time_yt = df.time_y.sum()
timet = time_yt - time_xt

print(f"\nblocks: {total_blocks}, baseline: {prettySecs(time_xt)}, contender: {prettySecs(time_yt)}")
print(f"Time (total): {prettySecs(timet)}, {(timet/time_xt):.2%}")
print("\nbpsd = blocks per sec diff (+), tpsd = txs per sec diff, timed = time to process diff (-)")
print("+ = more is better, - = less is better")


def main():
parser = argparse.ArgumentParser()
parser.add_argument("baseline")
parser.add_argument("contender")
parser.add_argument("--plot", action="store_true")
parser.add_argument("--csv-output", type=str, help="Path to output CSV file")
parser.add_argument("--markdown", action="store_true", help="Output in markdown table format")
parser.add_argument(
"--bins",
default=10,
Expand Down Expand Up @@ -122,8 +175,8 @@ def main():
print(f"Contender range: {min(contender.index)} to {max(contender.index)}")
exit(1)

baseline = baseline.loc[baseline.index >= start and baseline.index <= end]
contender = contender.loc[contender.index >= start and contender.index <= end]
baseline = baseline.loc[start:end]
contender = contender.loc[start:end]

# Join the two frames then interpolate - this helps dealing with runs that
# haven't been using the same chunking and/or max-blocks
Expand Down Expand Up @@ -176,23 +229,11 @@ def main():
if args.csv_output:
write_csv_output(stats_df, df, args.csv_output)

print(f"{os.path.basename(args.baseline)} vs {os.path.basename(args.contender)}")
print(stats_df.to_string(
formatters=dict.fromkeys(["bpsd", "tpsd", "timed"], "{:,.2%}".format)
| dict.fromkeys(["bps_x", "bps_y", "tps_x", "tps_y"], "{:,.2f}".format)
| dict.fromkeys(["time_x", "time_y"], prettySecs),
))

total_blocks = df.block_number.max() - df.block_number.min()
time_xt = df.time_x.sum()
time_yt = df.time_y.sum()
timet = time_yt - time_xt

print(f"\nblocks: {total_blocks}, baseline: {prettySecs(time_xt)}, contender: {prettySecs(time_yt)}")
print(f"Time (total): {prettySecs(timet)}, {(timet/time_xt):.2%}")
print("\nbpsd = blocks per sec diff (+), tpsd = txs per sec diff, timed = time to process diff (-)")
print("+ = more is better, - = less is better")
if args.markdown:
write_markdown_output(stats_df, df, args.baseline, args.contender)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to treat markdown in the same way as CSV?
Currently it's CSV (optional) + MD or standard output. I would make it CSV (optional) + MD (optional) + standard output.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currently markdown is optional. If we don't pass --markdown flag only tab spaced data is returned in standard output.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in latest commit I've made it more explicit.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

csv is also optional.
I think standard output is good for stdout, but for file formats better save them separately (csv, md,..)


else:
write_standard_output(stats_df, df, args.baseline, args.contender)

if __name__ == "__main__":
main()
Loading