From 60aba883c8991bdc26d2d93ec018d97a9495acf6 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 11 Dec 2023 16:04:10 -0500 Subject: [PATCH 1/7] Modify summarize_metrics to return a table of data rather than printing directly --- abm/lib/common.py | 9 ++++++--- abm/lib/history.py | 4 +++- abm/lib/invocation.py | 4 +++- abm/lib/workflow.py | 4 +++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index dbb2402..57f2d05 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -194,6 +194,7 @@ def find_executable(name): def summarize_metrics(gi, jobs: list): + table = [] header = [ "id", "history_id", @@ -221,8 +222,8 @@ def summarize_metrics(gi, jobs: list): # "swaptotal", # "uname" ] - - print(','.join(header)) + table.append(header) + # print(','.join(header)) for job in jobs: job_metrics = gi.jobs.get_metrics(job['id']) row = [] @@ -238,7 +239,9 @@ def summarize_metrics(gi, jobs: list): row.append(metrics[key]) else: row.append('') - print(','.join(row), end='\n') + # print(','.join(row), end='\n') + table.append(row) + return table def metrics_to_dict(metrics: list, accept: list): diff --git a/abm/lib/history.py b/abm/lib/history.py index 656fee0..133ab2d 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -368,7 +368,9 @@ def summarize(context: Context, args: list): # job['workflow_id'] = invocation['workflow_id'] # all_jobs.append(job) # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0])) - summarize_metrics(gi, all_jobs) + table = summarize_metrics(gi, all_jobs) + for row in table: + print(','.join(row)) def wait(context: Context, args: list): diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index 0e6807f..e18eccf 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -36,4 +36,6 @@ def summarize(context: Context, args: list): job['invocation_id'] = id job['workflow_id'] = '' all_jobs.append(job) - summarize_metrics(gi, all_jobs) + table = summarize_metrics(gi, all_jobs) + for row in table: + print(','.join(row)) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 0ba36f7..94a4ab5 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -250,4 +250,6 @@ def summarize(context: Context, args: list): job['invocation_id'] = id job['workflow_id'] = wid all_jobs.append(job) - summarize_metrics(gi, all_jobs) + table = summarize_metrics(gi, all_jobs) + for row in table: + print(','.join(row)) From b27956acafb9d331584536a59111f62d9a82d223 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 11 Dec 2023 16:55:33 -0500 Subject: [PATCH 2/7] Add --markdown option to summarize commands --- abm/lib/common.py | 8 ++++++++ abm/lib/experiment.py | 24 +++++++++++++++++++----- abm/lib/history.py | 14 +++++++++++--- abm/lib/invocation.py | 5 +++++ abm/lib/menu.yml | 14 +++++++------- abm/lib/workflow.py | 5 +++++ 6 files changed, 55 insertions(+), 15 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 57f2d05..244477b 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -244,6 +244,14 @@ def summarize_metrics(gi, jobs: list): return table +def print_markdown_table(table: list) -> None: + print('| ID | History | Tool | CPU | Memory | Runtime |') + print('|---|---|---|---|---|---|') + for row in table[1:]: + line = ' | '.join( row[i] for i in [0,2,4,7,11,15]) + print(f'| {line} |') + + def metrics_to_dict(metrics: list, accept: list): result = dict() for m in metrics: diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 9dead51..3e7954b 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -114,31 +114,37 @@ def summarize(context: Context, args: list): :param args[0]: The path to the directory containing metrics filees :return: None """ + markdown = False separator = None input_dirs = [] make_row = make_table_row header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)" for arg in args: if arg in ['-t', '--tsv']: - if separator is not None: + if separator is not None or markdown: print('ERROR: The output format is specified more than once') return print('tsv') separator = '\t' elif arg in ['-c', '--csv']: - if separator is not None: + if separator is not None or markdown: print('ERROR: The output format is specified more than once') return separator = ',' print('csv') elif arg in ['-m', '--model']: - if separator is not None: + if separator is not None or markdown: print('ERROR: The output format is specified more than once') return print('making a model') separator = ',' make_row = make_model_row header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2" + elif arg == '--markdown': + if separator is not None or markdown: + print('ERROR: The output format is specified more than once') + return + markdown = True else: # print(f"Input dir {arg}") input_dirs.append(arg) @@ -149,7 +155,11 @@ def summarize(context: Context, args: list): if separator is None: separator = ',' - print(header_row) + if markdown: + print("|Run|Job Conf|Tool|Tool Version|State|Runtime (Sec)|CPU|Max Memory|") + print("|---|---|---|---|---|---|---|---|") + else: + print(header_row) for input_dir in input_dirs: for file in os.listdir(input_dir): input_path = os.path.join(input_dir, file) @@ -162,7 +172,11 @@ def summarize(context: Context, args: list): # print('Ignoring upload tool') continue row = make_row(data) - print(separator.join([str(x) for x in row])) + if markdown: + line = ' | '.join(row[i] for i in [0,2,6,7,8,11,12,14]) + print(f'| {line} |') + else: + print(separator.join([str(x) for x in row])) except Exception as e: # Silently fail to allow the remainder of the table to be generated. print(f"Unable to process {input_path}") diff --git a/abm/lib/history.py b/abm/lib/history.py index 133ab2d..04e1fcb 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -8,7 +8,7 @@ import yaml from bioblend.galaxy.objects import GalaxyInstance from lib.common import (Context, connect, find_history, parse_profile, - print_json, summarize_metrics) + print_json, summarize_metrics, print_markdown_table) # # History related functions @@ -339,6 +339,11 @@ def tag(context: Context, args: list): def summarize(context: Context, args: list): + markdown = False + if '--markdown' in args: + markdown = True + args.remove('--markdown') + if len(args) == 0: print("ERROR: Provide one or more history ID values.") return @@ -369,8 +374,11 @@ def summarize(context: Context, args: list): # all_jobs.append(job) # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0])) table = summarize_metrics(gi, all_jobs) - for row in table: - print(','.join(row)) + if markdown: + print_markdown_table(table) + else: + for row in table: + print(','.join(row)) def wait(context: Context, args: list): diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index e18eccf..be4368c 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -25,6 +25,11 @@ def doList(context: Context, args: list): def summarize(context: Context, args: list): + markdown = False + if '--markdown' in args: + markdown = True + args.remove('--markdown') + if len(args) == 0: print("ERROR: Provide one or more invocation ID values.") return diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 9aafa1f..c3374bd 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -69,8 +69,8 @@ handler: workflow.inputs - name: [summary, summarize] handler: workflow.summarize - help: generate a CSV with job metrics for all workflow runs - params: ID [ID ...] + help: generate a CSV or markdown table with job metrics for all workflow runs + params: "ID [ID ...] [--markdown]" - name: ['test'] handler: workflow.test help: run some test code @@ -158,8 +158,8 @@ help: show detailed information about a history - name: [summarize, summary, table] handler: history.summarize - params: "ID [ID...]" - help: Generate a CSV table with runtime metrics for all jobs in the history. + params: "ID [ID...] [--markdown]" + help: Generate a CSV or markdown table with runtime metrics for all jobs in the history. - name: [publish, pub] handler: history.publish help: publish the given history @@ -250,7 +250,7 @@ - name: [summarize, summary] help: summarize metrics to a CSV or TSV file. handler: experiment.summarize - params: "[-c, --csv, -t, --tsv]" + params: "[-c, --csv, -t, --tsv, --markdown]" - name: [test] help: playground code handler: experiment.test @@ -263,8 +263,8 @@ handler: invocation.doList params: "[-w|--workflow ID] [-h|--history ID]" - name: [summarize] - help: generate a CSV of job metrics for an invocation - params: ID + help: generate a CSV or markdown table of job metrics for an invocation + params: "ID [--markdown]" handler: invocation.summarize - name: [helm] help: execute a helm command diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 94a4ab5..1de403c 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -236,6 +236,11 @@ def rename(context: Context, args: list): def summarize(context: Context, args: list): + markdown = False + if '--markdown' in args: + markdown = True + args.remove('--markdown') + if len(args) == 0: print("ERROR: Provide one or more workflow ID values.") return From 228ac9f13c4d6c2307e59b00141803b4b6965aec Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 09:36:01 -0500 Subject: [PATCH 3/7] Bump to dev version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index 4985c3c..b9270b9 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.8.0-dev.6 +2.9.0-dev.0 From 94f23e030b109b097c7a55f4a75ea707d2482d26 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 10:36:11 -0500 Subject: [PATCH 4/7] Finish summarize --markdown implementations --- abm/lib/common.py | 12 ++++++++---- abm/lib/experiment.py | 2 +- abm/lib/invocation.py | 9 ++++++--- abm/lib/workflow.py | 9 ++++++--- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 244477b..39d611f 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -227,12 +227,16 @@ def summarize_metrics(gi, jobs: list): for job in jobs: job_metrics = gi.jobs.get_metrics(job['id']) row = [] + toolid = job.get('tool_id', 'unknown') + if '/' in toolid: + parts = toolid.split('/') + toolid = f'{parts[-2]}/{parts[-1]}' metrics = metrics_to_dict(job_metrics, header) metrics['id'] = job.get('id', 'unknown') metrics['history_id'] = job.get('history_id', 'unknown') metrics['history_name'] = job.get('history_name', 'unknown') metrics['state'] = job.get('state', 'unknown') - metrics['tool_id'] = job.get('tool_id', 'unknown') + metrics['tool_id'] = toolid metrics['invocation_id'] = job.get('invocation_id', 'unknown') for key in header: if key in metrics: @@ -245,10 +249,10 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: - print('| ID | History | Tool | CPU | Memory | Runtime |') - print('|---|---|---|---|---|---|') + print('| ID | History | State |Tool | CPU | Memory | Runtime |') + print('|---|---|---|---|---|---|---|') for row in table[1:]: - line = ' | '.join( row[i] for i in [0,2,4,7,11,15]) + line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) print(f'| {line} |') diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 3e7954b..e533773 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -9,7 +9,7 @@ import benchmark import helm import yaml -from common import Context, load_profiles +from common import Context, load_profiles, print_markdown_table INVOCATIONS_DIR = "invocations" METRICS_DIR = "metrics" diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index be4368c..c531471 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -1,4 +1,4 @@ -from common import Context, connect, print_json, summarize_metrics +from common import Context, connect, print_json, summarize_metrics, print_markdown_table def doList(context: Context, args: list): @@ -42,5 +42,8 @@ def summarize(context: Context, args: list): job['workflow_id'] = '' all_jobs.append(job) table = summarize_metrics(gi, all_jobs) - for row in table: - print(','.join(row)) + if markdown: + print_markdown_table(table) + else: + for row in table: + print(','.join(row)) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 1de403c..54c79a3 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -7,7 +7,7 @@ import planemo import requests import yaml -from common import Context, connect, summarize_metrics +from common import Context, connect, summarize_metrics, print_markdown_table from planemo.galaxy.workflows import install_shed_repos from planemo.runnable import for_path, for_uri @@ -256,5 +256,8 @@ def summarize(context: Context, args: list): job['workflow_id'] = wid all_jobs.append(job) table = summarize_metrics(gi, all_jobs) - for row in table: - print(','.join(row)) + if markdown: + print_markdown_table(table) + else: + for row in table: + print(','.join(row)) From 837b60bd82af0d3d996fe4d65372e6cc7d098b4f Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 15:16:08 -0500 Subject: [PATCH 5/7] Bump dev version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index b9270b9..7299291 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.0 +2.9.0-dev.1 From 7bbafe477e88f31d66d38a385beea08a0f005017 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 15:16:58 -0500 Subject: [PATCH 6/7] Add units to markdown table header row. Better formatting for floats --- abm/lib/common.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 39d611f..38ca619 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -249,11 +249,19 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: - print('| ID | History | State |Tool | CPU | Memory | Runtime |') - print('|---|---|---|---|---|---|---|') + print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|') + print('|---|---|---|---|---|---|') + GB = 1024 * 1024 * 1024 for row in table[1:]: - line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) - print(f'| {line} |') + history = row[2] + state = row[3] + tool_id = row[4] + cpu = float(row[7]) / 10**9 + memory = float(row[11]) / GB + runtime = float(row[15]) + # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) + # print(f'| {line} |') + print(f'| {tool_id} | {history} | {state} | {cpu:6.1f} | {memory:3.3f} | {runtime:6.1f} |') def metrics_to_dict(metrics: list, accept: list): From bfe272b110c98d118ffa3ae3558a3a94fd022d91 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 18:09:08 -0500 Subject: [PATCH 7/7] Better column formatting for markdown tables --- abm/lib/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 38ca619..4eccbb9 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -250,7 +250,7 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|') - print('|---|---|---|---|---|---|') + print('|---|---|---|---:|---:|---:|') GB = 1024 * 1024 * 1024 for row in table[1:]: history = row[2] @@ -261,7 +261,7 @@ def print_markdown_table(table: list) -> None: runtime = float(row[15]) # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) # print(f'| {line} |') - print(f'| {tool_id} | {history} | {state} | {cpu:6.1f} | {memory:3.3f} | {runtime:6.1f} |') + print(f'| {tool_id} | {history} | {state} | {cpu:5.1f} | {memory:3.3f} | {runtime:5.1f} |') def metrics_to_dict(metrics: list, accept: list):