From 60aba883c8991bdc26d2d93ec018d97a9495acf6 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 11 Dec 2023 16:04:10 -0500 Subject: [PATCH 01/56] Modify summarize_metrics to return a table of data rather than printing directly --- abm/lib/common.py | 9 ++++++--- abm/lib/history.py | 4 +++- abm/lib/invocation.py | 4 +++- abm/lib/workflow.py | 4 +++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index dbb2402..57f2d05 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -194,6 +194,7 @@ def find_executable(name): def summarize_metrics(gi, jobs: list): + table = [] header = [ "id", "history_id", @@ -221,8 +222,8 @@ def summarize_metrics(gi, jobs: list): # "swaptotal", # "uname" ] - - print(','.join(header)) + table.append(header) + # print(','.join(header)) for job in jobs: job_metrics = gi.jobs.get_metrics(job['id']) row = [] @@ -238,7 +239,9 @@ def summarize_metrics(gi, jobs: list): row.append(metrics[key]) else: row.append('') - print(','.join(row), end='\n') + # print(','.join(row), end='\n') + table.append(row) + return table def metrics_to_dict(metrics: list, accept: list): diff --git a/abm/lib/history.py b/abm/lib/history.py index 656fee0..133ab2d 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -368,7 +368,9 @@ def summarize(context: Context, args: list): # job['workflow_id'] = invocation['workflow_id'] # all_jobs.append(job) # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0])) - summarize_metrics(gi, all_jobs) + table = summarize_metrics(gi, all_jobs) + for row in table: + print(','.join(row)) def wait(context: Context, args: list): diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index 0e6807f..e18eccf 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -36,4 +36,6 @@ def summarize(context: Context, args: list): job['invocation_id'] = id job['workflow_id'] = '' all_jobs.append(job) - summarize_metrics(gi, all_jobs) + table = summarize_metrics(gi, all_jobs) + for row in table: + print(','.join(row)) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 0ba36f7..94a4ab5 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -250,4 +250,6 @@ def summarize(context: Context, args: list): job['invocation_id'] = id job['workflow_id'] = wid all_jobs.append(job) - summarize_metrics(gi, all_jobs) + table = summarize_metrics(gi, all_jobs) + for row in table: + print(','.join(row)) From b27956acafb9d331584536a59111f62d9a82d223 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 11 Dec 2023 16:55:33 -0500 Subject: [PATCH 02/56] Add --markdown option to summarize commands --- abm/lib/common.py | 8 ++++++++ abm/lib/experiment.py | 24 +++++++++++++++++++----- abm/lib/history.py | 14 +++++++++++--- abm/lib/invocation.py | 5 +++++ abm/lib/menu.yml | 14 +++++++------- abm/lib/workflow.py | 5 +++++ 6 files changed, 55 insertions(+), 15 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 57f2d05..244477b 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -244,6 +244,14 @@ def summarize_metrics(gi, jobs: list): return table +def print_markdown_table(table: list) -> None: + print('| ID | History | Tool | CPU | Memory | Runtime |') + print('|---|---|---|---|---|---|') + for row in table[1:]: + line = ' | '.join( row[i] for i in [0,2,4,7,11,15]) + print(f'| {line} |') + + def metrics_to_dict(metrics: list, accept: list): result = dict() for m in metrics: diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 9dead51..3e7954b 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -114,31 +114,37 @@ def summarize(context: Context, args: list): :param args[0]: The path to the directory containing metrics filees :return: None """ + markdown = False separator = None input_dirs = [] make_row = make_table_row header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)" for arg in args: if arg in ['-t', '--tsv']: - if separator is not None: + if separator is not None or markdown: print('ERROR: The output format is specified more than once') return print('tsv') separator = '\t' elif arg in ['-c', '--csv']: - if separator is not None: + if separator is not None or markdown: print('ERROR: The output format is specified more than once') return separator = ',' print('csv') elif arg in ['-m', '--model']: - if separator is not None: + if separator is not None or markdown: print('ERROR: The output format is specified more than once') return print('making a model') separator = ',' make_row = make_model_row header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2" + elif arg == '--markdown': + if separator is not None or markdown: + print('ERROR: The output format is specified more than once') + return + markdown = True else: # print(f"Input dir {arg}") input_dirs.append(arg) @@ -149,7 +155,11 @@ def summarize(context: Context, args: list): if separator is None: separator = ',' - print(header_row) + if markdown: + print("|Run|Job Conf|Tool|Tool Version|State|Runtime (Sec)|CPU|Max Memory|") + print("|---|---|---|---|---|---|---|---|") + else: + print(header_row) for input_dir in input_dirs: for file in os.listdir(input_dir): input_path = os.path.join(input_dir, file) @@ -162,7 +172,11 @@ def summarize(context: Context, args: list): # print('Ignoring upload tool') continue row = make_row(data) - print(separator.join([str(x) for x in row])) + if markdown: + line = ' | '.join(row[i] for i in [0,2,6,7,8,11,12,14]) + print(f'| {line} |') + else: + print(separator.join([str(x) for x in row])) except Exception as e: # Silently fail to allow the remainder of the table to be generated. print(f"Unable to process {input_path}") diff --git a/abm/lib/history.py b/abm/lib/history.py index 133ab2d..04e1fcb 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -8,7 +8,7 @@ import yaml from bioblend.galaxy.objects import GalaxyInstance from lib.common import (Context, connect, find_history, parse_profile, - print_json, summarize_metrics) + print_json, summarize_metrics, print_markdown_table) # # History related functions @@ -339,6 +339,11 @@ def tag(context: Context, args: list): def summarize(context: Context, args: list): + markdown = False + if '--markdown' in args: + markdown = True + args.remove('--markdown') + if len(args) == 0: print("ERROR: Provide one or more history ID values.") return @@ -369,8 +374,11 @@ def summarize(context: Context, args: list): # all_jobs.append(job) # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0])) table = summarize_metrics(gi, all_jobs) - for row in table: - print(','.join(row)) + if markdown: + print_markdown_table(table) + else: + for row in table: + print(','.join(row)) def wait(context: Context, args: list): diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index e18eccf..be4368c 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -25,6 +25,11 @@ def doList(context: Context, args: list): def summarize(context: Context, args: list): + markdown = False + if '--markdown' in args: + markdown = True + args.remove('--markdown') + if len(args) == 0: print("ERROR: Provide one or more invocation ID values.") return diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 9aafa1f..c3374bd 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -69,8 +69,8 @@ handler: workflow.inputs - name: [summary, summarize] handler: workflow.summarize - help: generate a CSV with job metrics for all workflow runs - params: ID [ID ...] + help: generate a CSV or markdown table with job metrics for all workflow runs + params: "ID [ID ...] [--markdown]" - name: ['test'] handler: workflow.test help: run some test code @@ -158,8 +158,8 @@ help: show detailed information about a history - name: [summarize, summary, table] handler: history.summarize - params: "ID [ID...]" - help: Generate a CSV table with runtime metrics for all jobs in the history. + params: "ID [ID...] [--markdown]" + help: Generate a CSV or markdown table with runtime metrics for all jobs in the history. - name: [publish, pub] handler: history.publish help: publish the given history @@ -250,7 +250,7 @@ - name: [summarize, summary] help: summarize metrics to a CSV or TSV file. handler: experiment.summarize - params: "[-c, --csv, -t, --tsv]" + params: "[-c, --csv, -t, --tsv, --markdown]" - name: [test] help: playground code handler: experiment.test @@ -263,8 +263,8 @@ handler: invocation.doList params: "[-w|--workflow ID] [-h|--history ID]" - name: [summarize] - help: generate a CSV of job metrics for an invocation - params: ID + help: generate a CSV or markdown table of job metrics for an invocation + params: "ID [--markdown]" handler: invocation.summarize - name: [helm] help: execute a helm command diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 94a4ab5..1de403c 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -236,6 +236,11 @@ def rename(context: Context, args: list): def summarize(context: Context, args: list): + markdown = False + if '--markdown' in args: + markdown = True + args.remove('--markdown') + if len(args) == 0: print("ERROR: Provide one or more workflow ID values.") return From 228ac9f13c4d6c2307e59b00141803b4b6965aec Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 09:36:01 -0500 Subject: [PATCH 03/56] Bump to dev version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index 4985c3c..b9270b9 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.8.0-dev.6 +2.9.0-dev.0 From 94f23e030b109b097c7a55f4a75ea707d2482d26 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 10:36:11 -0500 Subject: [PATCH 04/56] Finish summarize --markdown implementations --- abm/lib/common.py | 12 ++++++++---- abm/lib/experiment.py | 2 +- abm/lib/invocation.py | 9 ++++++--- abm/lib/workflow.py | 9 ++++++--- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 244477b..39d611f 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -227,12 +227,16 @@ def summarize_metrics(gi, jobs: list): for job in jobs: job_metrics = gi.jobs.get_metrics(job['id']) row = [] + toolid = job.get('tool_id', 'unknown') + if '/' in toolid: + parts = toolid.split('/') + toolid = f'{parts[-2]}/{parts[-1]}' metrics = metrics_to_dict(job_metrics, header) metrics['id'] = job.get('id', 'unknown') metrics['history_id'] = job.get('history_id', 'unknown') metrics['history_name'] = job.get('history_name', 'unknown') metrics['state'] = job.get('state', 'unknown') - metrics['tool_id'] = job.get('tool_id', 'unknown') + metrics['tool_id'] = toolid metrics['invocation_id'] = job.get('invocation_id', 'unknown') for key in header: if key in metrics: @@ -245,10 +249,10 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: - print('| ID | History | Tool | CPU | Memory | Runtime |') - print('|---|---|---|---|---|---|') + print('| ID | History | State |Tool | CPU | Memory | Runtime |') + print('|---|---|---|---|---|---|---|') for row in table[1:]: - line = ' | '.join( row[i] for i in [0,2,4,7,11,15]) + line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) print(f'| {line} |') diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 3e7954b..e533773 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -9,7 +9,7 @@ import benchmark import helm import yaml -from common import Context, load_profiles +from common import Context, load_profiles, print_markdown_table INVOCATIONS_DIR = "invocations" METRICS_DIR = "metrics" diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index be4368c..c531471 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -1,4 +1,4 @@ -from common import Context, connect, print_json, summarize_metrics +from common import Context, connect, print_json, summarize_metrics, print_markdown_table def doList(context: Context, args: list): @@ -42,5 +42,8 @@ def summarize(context: Context, args: list): job['workflow_id'] = '' all_jobs.append(job) table = summarize_metrics(gi, all_jobs) - for row in table: - print(','.join(row)) + if markdown: + print_markdown_table(table) + else: + for row in table: + print(','.join(row)) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 1de403c..54c79a3 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -7,7 +7,7 @@ import planemo import requests import yaml -from common import Context, connect, summarize_metrics +from common import Context, connect, summarize_metrics, print_markdown_table from planemo.galaxy.workflows import install_shed_repos from planemo.runnable import for_path, for_uri @@ -256,5 +256,8 @@ def summarize(context: Context, args: list): job['workflow_id'] = wid all_jobs.append(job) table = summarize_metrics(gi, all_jobs) - for row in table: - print(','.join(row)) + if markdown: + print_markdown_table(table) + else: + for row in table: + print(','.join(row)) From 837b60bd82af0d3d996fe4d65372e6cc7d098b4f Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 15:16:08 -0500 Subject: [PATCH 05/56] Bump dev version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index b9270b9..7299291 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.0 +2.9.0-dev.1 From 7bbafe477e88f31d66d38a385beea08a0f005017 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 15:16:58 -0500 Subject: [PATCH 06/56] Add units to markdown table header row. Better formatting for floats --- abm/lib/common.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 39d611f..38ca619 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -249,11 +249,19 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: - print('| ID | History | State |Tool | CPU | Memory | Runtime |') - print('|---|---|---|---|---|---|---|') + print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|') + print('|---|---|---|---|---|---|') + GB = 1024 * 1024 * 1024 for row in table[1:]: - line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) - print(f'| {line} |') + history = row[2] + state = row[3] + tool_id = row[4] + cpu = float(row[7]) / 10**9 + memory = float(row[11]) / GB + runtime = float(row[15]) + # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) + # print(f'| {line} |') + print(f'| {tool_id} | {history} | {state} | {cpu:6.1f} | {memory:3.3f} | {runtime:6.1f} |') def metrics_to_dict(metrics: list, accept: list): From bfe272b110c98d118ffa3ae3558a3a94fd022d91 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 18:09:08 -0500 Subject: [PATCH 07/56] Better column formatting for markdown tables --- abm/lib/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 38ca619..4eccbb9 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -250,7 +250,7 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|') - print('|---|---|---|---|---|---|') + print('|---|---|---|---:|---:|---:|') GB = 1024 * 1024 * 1024 for row in table[1:]: history = row[2] @@ -261,7 +261,7 @@ def print_markdown_table(table: list) -> None: runtime = float(row[15]) # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) # print(f'| {line} |') - print(f'| {tool_id} | {history} | {state} | {cpu:6.1f} | {memory:3.3f} | {runtime:6.1f} |') + print(f'| {tool_id} | {history} | {state} | {cpu:5.1f} | {memory:3.3f} | {runtime:5.1f} |') def metrics_to_dict(metrics: list, accept: list): From 61b1b2964bd9fe702723ff0205aa115138d01e54 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 20:08:13 -0500 Subject: [PATCH 08/56] Change order that benchmarks are executed during an experiment. --- abm/lib/experiment.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index e533773..0b72b2d 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -76,16 +76,16 @@ def run_on_cloud(cloud: str, config: dict): if not helm.update(context, [f"rules/{conf}.yml", namespace, chart]): log.warning(f"job configuration not found: rules/{conf}.yml") continue - for n in range(config['runs']): - history_name_prefix = f"{n+1} {cloud} {conf}" - for workflow_conf in config['benchmark_confs']: + for workflow_conf in config['benchmark_confs']: + for n in range(config['runs']): + history_name_prefix = f"{n+1} {cloud} {conf}" benchmark.run( context, workflow_conf, history_name_prefix, config['name'] ) else: - for n in range(config['runs']): - history_name_prefix = f"{n+1} {cloud}" - for workflow_conf in config['benchmark_confs']: + for workflow_conf in config['benchmark_confs']: + for n in range(config['runs']): + history_name_prefix = f"{n+1} {cloud}" benchmark.run( context, workflow_conf, history_name_prefix, config['name'] ) From 9d0eb6db3e871e8c8f81eb61d0e1c6ed37811f75 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 21:03:18 -0500 Subject: [PATCH 09/56] Add --timeout option to job.wait --- abm/lib/job.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/abm/lib/job.py b/abm/lib/job.py index c0b8ff5..efd8bfa 100644 --- a/abm/lib/job.py +++ b/abm/lib/job.py @@ -1,8 +1,8 @@ +import datetime import json import logging import time -from pprint import pprint - +import argparse from .common import Context, connect, find_history, print_json log = logging.getLogger('abm') @@ -54,18 +54,27 @@ def show(context: Context, args: list): def wait(context: Context, args: list): - if len(args) != 1: - print("ERROR: Invalid parameters. Job ID is required") - return + parser = argparse.ArgumentParser() + parser.add_argument('job_id') + parser.add_argument('-t', '--timeout', default=-1) + params = parser.parse_args(args) + timeout = params.timeout + job_id = params.job_id gi = connect(context) - state = "Unknown" + start_time = time.time() # we only interested in precision to the second waiting = True while waiting: - job = gi.jobs.show_job(args[0], full_details=False) + job = gi.jobs.show_job(job_id, full_details=False) + if job is None or len(job) == 0: + print(f"Job {job_id} not found.") + return state = job["state"] + if timeout > 0: + if time.time() - start_time > timeout: + waiting = False if state == "ok" or state == "error": waiting = False - else: + if waiting: time.sleep(15) print(json.dumps(job, indent=4)) From dfb2d5026490433a9df94cafd1a5694e220a5148 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 21:32:46 -0500 Subject: [PATCH 10/56] Use --run-number to specify starting int when numbering benchmark runs --- abm/lib/experiment.py | 17 +++++++++++------ abm/lib/menu.yml | 6 +++--- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index e533773..b04a397 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -1,3 +1,4 @@ +import argparse import json import logging import os @@ -26,18 +27,20 @@ def run(context: Context, args: list): :return: True if the benchmarks completed sucessfully. False otherwise. """ + parser = argparse.ArgumentParser() + parser.add_argument('benchmark_path') + parser.add_argument('-r', '--run-number', default=-1) + argv = parser.parse_args(args) - if len(args) == 0: - print("ERROR: No benchmarking configuration provided.") - return False + benchmark_path = argv.benchmark_path - benchmark_path = args[0] if not os.path.exists(benchmark_path): print(f"ERROR: Benchmarking configuration not found {benchmark_path}") return False with open(benchmark_path, 'r') as f: config = yaml.safe_load(f) + config['start_at'] = argv.run_number profiles = load_profiles() # latch = CountdownLatch(len(config['cloud'])) @@ -66,6 +69,8 @@ def run_on_cloud(cloud: str, config: dict): context = Context(cloud) namespace = 'galaxy' chart = 'anvil/galaxykubeman' + start = config['start_at'] + end = start + config['runs'] if 'galaxy' in config: namespace = config['galaxy']['namespace'] chart = config['galaxy']['chart'] @@ -76,14 +81,14 @@ def run_on_cloud(cloud: str, config: dict): if not helm.update(context, [f"rules/{conf}.yml", namespace, chart]): log.warning(f"job configuration not found: rules/{conf}.yml") continue - for n in range(config['runs']): + for n in range(start, end): history_name_prefix = f"{n+1} {cloud} {conf}" for workflow_conf in config['benchmark_confs']: benchmark.run( context, workflow_conf, history_name_prefix, config['name'] ) else: - for n in range(config['runs']): + for n in range(start, end): history_name_prefix = f"{n+1} {cloud}" for workflow_conf in config['benchmark_confs']: benchmark.run( diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index c3374bd..948d44a 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -208,7 +208,7 @@ - name: [wait] help: Wait for a job to finish running handler: job.wait - params: ID + params: "ID [-T|--timeout SECONDS]" - name: [ metrics, stats ] help: display runtime metrics for the job, or a list of jobs contained in a history handler: job.metrics @@ -244,9 +244,9 @@ standalone: true menu: - name: [run] - help: run all benchmarks in an experiment + help: run all benchmarks in an experiment. Use --run-number to specify staring counter. handler: experiment.run - params: PATH + params: "PATH [-r|--run-number N]" - name: [summarize, summary] help: summarize metrics to a CSV or TSV file. handler: experiment.summarize From e9d037037b07b4dbb103b60cf4c53995acd448ee Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 12 Dec 2023 21:35:23 -0500 Subject: [PATCH 11/56] Handle case with --run-number is not specified --- abm/lib/experiment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index b04a397..94de288 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -70,6 +70,8 @@ def run_on_cloud(cloud: str, config: dict): namespace = 'galaxy' chart = 'anvil/galaxykubeman' start = config['start_at'] + if start < 0: + start = 1 end = start + config['runs'] if 'galaxy' in config: namespace = config['galaxy']['namespace'] From 01f624cfc963fdd9da8086ef62c0fd72c7fea3fa Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 13 Dec 2023 10:08:39 -0500 Subject: [PATCH 12/56] Improve mardown for experiment summarize --- abm/lib/experiment.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index e533773..686301a 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -4,6 +4,7 @@ import threading import traceback from datetime import timedelta +from pprint import pprint from time import perf_counter import benchmark @@ -156,10 +157,11 @@ def summarize(context: Context, args: list): separator = ',' if markdown: - print("|Run|Job Conf|Tool|Tool Version|State|Runtime (Sec)|CPU|Max Memory|") - print("|---|---|---|---|---|---|---|---|") + print("|Run|Job Conf|Tool|State|Runtime (Sec)|CPU (Sec) |Max Memory (GB)|") + print("|---|---|---|---|---:|---:|---:|") else: print(header_row) + GB = 1024 * 1024 * 1024 for input_dir in input_dirs: for file in os.listdir(input_dir): input_path = os.path.join(input_dir, file) @@ -173,8 +175,13 @@ def summarize(context: Context, args: list): continue row = make_row(data) if markdown: - line = ' | '.join(row[i] for i in [0,2,6,7,8,11,12,14]) - print(f'| {line} |') + runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}" + cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}" + memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}" + # print(runtime, cpu, memory) + print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |") + # line = ' | '.join(row[i] for i in [0,2,6,7,10,11,13]) + # print(f'| {line} |') else: print(separator.join([str(x) for x in row])) except Exception as e: From c371f14154fec8f0faa4d25bce35f545eeb646f7 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 13 Dec 2023 12:40:17 -0500 Subject: [PATCH 13/56] Add --sort-by option to experiment.summarize --- abm/lib/experiment.py | 133 +++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 40 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 686301a..9b924bf 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -1,3 +1,4 @@ +import argparse import json import logging import os @@ -120,35 +121,68 @@ def summarize(context: Context, args: list): input_dirs = [] make_row = make_table_row header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)" - for arg in args: - if arg in ['-t', '--tsv']: - if separator is not None or markdown: - print('ERROR: The output format is specified more than once') - return - print('tsv') - separator = '\t' - elif arg in ['-c', '--csv']: - if separator is not None or markdown: - print('ERROR: The output format is specified more than once') - return - separator = ',' - print('csv') - elif arg in ['-m', '--model']: - if separator is not None or markdown: - print('ERROR: The output format is specified more than once') - return - print('making a model') - separator = ',' - make_row = make_model_row - header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2" - elif arg == '--markdown': - if separator is not None or markdown: - print('ERROR: The output format is specified more than once') - return - markdown = True - else: - # print(f"Input dir {arg}") - input_dirs.append(arg) + # for arg in args: + # if arg in ['-t', '--tsv']: + # if separator is not None or markdown: + # print('ERROR: The output format is specified more than once') + # return + # print('tsv') + # separator = '\t' + # elif arg in ['-c', '--csv']: + # if separator is not None or markdown: + # print('ERROR: The output format is specified more than once') + # return + # separator = ',' + # print('csv') + # elif arg in ['-m', '--model']: + # if separator is not None or markdown: + # print('ERROR: The output format is specified more than once') + # return + # print('making a model') + # separator = ',' + # make_row = make_model_row + # header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2" + # elif arg == '--markdown': + # if separator is not None or markdown: + # print('ERROR: The output format is specified more than once') + # return + # markdown = True + # else: + # # print(f"Input dir {arg}") + # input_dirs.append(arg) + + parser = argparse.ArgumentParser() + parser.add_argument('dirs', nargs='*') + parser.add_argument('-c', '--csv', action='store_true') + parser.add_argument('-t', '--tsv', action='store_true') + parser.add_argument('-m', '--model', action='store_true') + parser.add_argument('--markdown', action='store_true') + parser.add_argument('-s', '--sort-by', choices=['cpu', 'runtime', 'memory']) + argv = parser.parse_args(args) + + count = 0 + if argv.csv: + separator = ',' + count += 1 + if argv.tsv: + separator = '\t' + count += 1 + if argv.model: + separator = ',' + make_row = make_model_row + count += 1 + if argv.markdown: + markdown = True + count += 1 + + if count == 0: + print("ERROR: no output format selected") + return + if count > 1: + print("ERROR: multiple output formats selected") + return + + input_dirs = argv.dirs if len(input_dirs) == 0: input_dirs.append('metrics') @@ -161,7 +195,9 @@ def summarize(context: Context, args: list): print("|---|---|---|---|---:|---:|---:|") else: print(header_row) - GB = 1024 * 1024 * 1024 + + table = list() + GB = float(1073741824) for input_dir in input_dirs: for file in os.listdir(input_dir): input_path = os.path.join(input_dir, file) @@ -174,16 +210,7 @@ def summarize(context: Context, args: list): # print('Ignoring upload tool') continue row = make_row(data) - if markdown: - runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}" - cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}" - memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}" - # print(runtime, cpu, memory) - print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |") - # line = ' | '.join(row[i] for i in [0,2,6,7,10,11,13]) - # print(f'| {line} |') - else: - print(separator.join([str(x) for x in row])) + table.append(row) except Exception as e: # Silently fail to allow the remainder of the table to be generated. print(f"Unable to process {input_path}") @@ -191,6 +218,32 @@ def summarize(context: Context, args: list): traceback.print_exc() # pass + def comparator(row): + print('key', row[key]) + print('type', type(row[key])) + return row[key] + + if argv.sort_by: + key = 0 + if argv.sort_by == 'runtime': + key = 10 + elif argv.sort_by == 'cpu': + key = 11 + elif argv.sort_by == 'memory': + key = 13 + table.sort(key=lambda row: -1 if row[key] == '' else float(row[key]), reverse=True) + + if markdown: + for row in table: + runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}" + cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}" + memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}" + # memory = float(row[13]) / GB + print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |") + else: + for row in table: + print(separator.join([str(x) for x in row])) + accept_metrics = [ 'galaxy_slots', From ac9088046ffd0181f2b017356cac1fe93d86e0ea Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 13 Dec 2023 12:49:38 -0500 Subject: [PATCH 14/56] Dev 2 version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index 7299291..f43ee2a 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.1 +2.9.0-dev.2 From c3a56781e4a7e44296f5d3116ed1362a1f4fffa5 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 13 Dec 2023 14:16:05 -0500 Subject: [PATCH 15/56] Fix exception generating markdown if cell is empty --- abm/lib/common.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 4eccbb9..09b2c60 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -249,19 +249,19 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: - print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|') + print('| Tool ID | History | State | Memory (GB) | Runtime (sec)|') print('|---|---|---|---:|---:|---:|') GB = 1024 * 1024 * 1024 for row in table[1:]: history = row[2] state = row[3] tool_id = row[4] - cpu = float(row[7]) / 10**9 - memory = float(row[11]) / GB - runtime = float(row[15]) + # cpu = '' if row[7] == '' else float(row[7]) / 10**9 + memory = '' if row[11] == '' else f"{float(row[11]) / GB:3.3f}" + runtime = '' if row[15] == '' else f"{float(row[15]):5.1f}" # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) # print(f'| {line} |') - print(f'| {tool_id} | {history} | {state} | {cpu:5.1f} | {memory:3.3f} | {runtime:5.1f} |') + print(f'| {tool_id} | {history} | {state} | {memory} | {runtime} |') def metrics_to_dict(metrics: list, accept: list): From 1572f1171c502bdf6855e85e1650bdc05af8af37 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 13 Dec 2023 14:19:12 -0500 Subject: [PATCH 16/56] Version dev.3 --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index f43ee2a..0496284 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.2 +2.9.0-dev.3 From e8940b89a7f91102c3937c25d5303f73bf83aa3e Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 13 Dec 2023 14:57:25 -0500 Subject: [PATCH 17/56] Dev version 4 --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index 0496284..a7902d7 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.3 +2.9.0-dev.4 From c0086e7490412e3b93fa112edc3ba93feebcd929 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Thu, 14 Dec 2023 11:09:01 -0500 Subject: [PATCH 18/56] Add --sort-by to all summarize commands --- abm/lib/common.py | 79 +++++++++++++++++++++++++++---------------- abm/lib/experiment.py | 70 ++++++++++++-------------------------- abm/lib/history.py | 51 ++++++++++++++-------------- abm/lib/invocation.py | 31 +++++++++++------ abm/lib/workflow.py | 49 ++++++++++++--------------- 5 files changed, 139 insertions(+), 141 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 09b2c60..7814fc6 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -192,37 +192,41 @@ def find_executable(name): # "swaptotal", # "uname" +table_header = [ + "id", + "history_id", + "history_name", + "state", + "tool_id", + "invocation_id", + "workflow_id", + "cpuacct.usage", + # "end_epoch", + "galaxy_memory_mb", + "galaxy_slots", + # "memory.failcnt", + "memory.limit_in_bytes", + "memory.max_usage_in_bytes", + # "memory.memsw.limit_in_bytes", + # "memory.memsw.max_usage_in_bytes", + # "memory.oom_control.oom_kill_disable", + # "memory.oom_control.under_oom", + "memory.soft_limit_in_bytes", + "memtotal", + "processor_count", + "runtime_seconds", + # "start_epoch", + # "swaptotal", + # "uname" +] + +def print_table_header(): + print(','.join(table_header)) + def summarize_metrics(gi, jobs: list): table = [] - header = [ - "id", - "history_id", - "history_name", - "state", - "tool_id", - "invocation_id", - "workflow_id", - "cpuacct.usage", - # "end_epoch", - "galaxy_memory_mb", - "galaxy_slots", - # "memory.failcnt", - "memory.limit_in_bytes", - "memory.max_usage_in_bytes", - # "memory.memsw.limit_in_bytes", - # "memory.memsw.max_usage_in_bytes", - # "memory.oom_control.oom_kill_disable", - # "memory.oom_control.under_oom", - "memory.soft_limit_in_bytes", - "memtotal", - "processor_count", - "runtime_seconds", - # "start_epoch", - # "swaptotal", - # "uname" - ] - table.append(header) + # table.append(header) # print(','.join(header)) for job in jobs: job_metrics = gi.jobs.get_metrics(job['id']) @@ -231,14 +235,14 @@ def summarize_metrics(gi, jobs: list): if '/' in toolid: parts = toolid.split('/') toolid = f'{parts[-2]}/{parts[-1]}' - metrics = metrics_to_dict(job_metrics, header) + metrics = metrics_to_dict(job_metrics, table_header) metrics['id'] = job.get('id', 'unknown') metrics['history_id'] = job.get('history_id', 'unknown') metrics['history_name'] = job.get('history_name', 'unknown') metrics['state'] = job.get('state', 'unknown') metrics['tool_id'] = toolid metrics['invocation_id'] = job.get('invocation_id', 'unknown') - for key in header: + for key in table_header: if key in metrics: row.append(metrics[key]) else: @@ -334,3 +338,18 @@ def make_result(data): def _make_dataset_element(name, value): # print(f"Making dataset element for {name} = {value}({type(value)})") return dataset_collections.HistoryDatasetElement(name=name, id=value) + +def get_float_key(column: int): + def get_key(row: list): + if row[column] == '': + return -1 + return float(row[column]) + return get_key + +def get_str_key(column: int): + # print(f"Getting string key for column {column}") + def get_key(row: list): + # print(f"Sorting by column {column} key {row[column]}") + return row[column] + return get_key + diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 81c0c5c..2c03563 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -11,7 +11,7 @@ import benchmark import helm import yaml -from common import Context, load_profiles, print_markdown_table +from common import Context, load_profiles, print_markdown_table, get_str_key, get_float_key INVOCATIONS_DIR = "invocations" METRICS_DIR = "metrics" @@ -124,38 +124,8 @@ def summarize(context: Context, args: list): """ markdown = False separator = None - input_dirs = [] make_row = make_table_row header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)" - # for arg in args: - # if arg in ['-t', '--tsv']: - # if separator is not None or markdown: - # print('ERROR: The output format is specified more than once') - # return - # print('tsv') - # separator = '\t' - # elif arg in ['-c', '--csv']: - # if separator is not None or markdown: - # print('ERROR: The output format is specified more than once') - # return - # separator = ',' - # print('csv') - # elif arg in ['-m', '--model']: - # if separator is not None or markdown: - # print('ERROR: The output format is specified more than once') - # return - # print('making a model') - # separator = ',' - # make_row = make_model_row - # header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2" - # elif arg == '--markdown': - # if separator is not None or markdown: - # print('ERROR: The output format is specified more than once') - # return - # markdown = True - # else: - # # print(f"Input dir {arg}") - # input_dirs.append(arg) parser = argparse.ArgumentParser() parser.add_argument('dirs', nargs='*') @@ -163,7 +133,7 @@ def summarize(context: Context, args: list): parser.add_argument('-t', '--tsv', action='store_true') parser.add_argument('-m', '--model', action='store_true') parser.add_argument('--markdown', action='store_true') - parser.add_argument('-s', '--sort-by', choices=['cpu', 'runtime', 'memory']) + parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool']) argv = parser.parse_args(args) count = 0 @@ -197,8 +167,8 @@ def summarize(context: Context, args: list): separator = ',' if markdown: - print("|Run|Job Conf|Tool|State|Runtime (Sec)|CPU (Sec) |Max Memory (GB)|") - print("|---|---|---|---|---:|---:|---:|") + print("|Run|Inputs|Job Conf|Tool|State|Runtime (Sec)|Max Memory (GB)|") + print("|---|---|---|---|---|---:|---:|") else: print(header_row) @@ -218,34 +188,38 @@ def summarize(context: Context, args: list): row = make_row(data) table.append(row) except Exception as e: - # Silently fail to allow the remainder of the table to be generated. print(f"Unable to process {input_path}") print(e) traceback.print_exc() + # Silently fail to allow the remainder of the table to be generated. # pass - def comparator(row): - print('key', row[key]) - print('type', type(row[key])) - return row[key] - + reverse = True if argv.sort_by: - key = 0 + comp = get_str_key(6) if argv.sort_by == 'runtime': - key = 10 - elif argv.sort_by == 'cpu': - key = 11 + # key = 10 + comp = get_float_key(10) + # elif argv.sort_by == 'cpu': + # comp = get_float_comparator(11) + # #key = 11 elif argv.sort_by == 'memory': - key = 13 - table.sort(key=lambda row: -1 if row[key] == '' else float(row[key]), reverse=True) + comp = get_float_key(13) + # key = 13 + elif argv.sort_by == 'tool': + # print('Getting string key accessor.') + comp = get_str_key(6) + reverse = False + # table.sort(key=lambda row: -1 if row[key] == '' else float(row[key]), reverse=True) + table.sort(key=comp, reverse=reverse) if markdown: for row in table: runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}" - cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}" + # cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}" memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}" # memory = float(row[13]) / GB - print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |") + print(f"| {row[0]} | {row[5].split(' ')[0]} |{row[2]} | {row[6]} | {row[7]} | {runtime} | {memory} |") else: for row in table: print(separator.join([str(x) for x in row])) diff --git a/abm/lib/history.py b/abm/lib/history.py index 04e1fcb..1f6dda4 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -1,3 +1,4 @@ +import argparse import json import os import sys @@ -8,7 +9,8 @@ import yaml from bioblend.galaxy.objects import GalaxyInstance from lib.common import (Context, connect, find_history, parse_profile, - print_json, summarize_metrics, print_markdown_table) + print_json, summarize_metrics, print_markdown_table, + get_float_key, get_str_key, print_table_header) # # History related functions @@ -339,18 +341,20 @@ def tag(context: Context, args: list): def summarize(context: Context, args: list): - markdown = False - if '--markdown' in args: - markdown = True - args.remove('--markdown') + parser = argparse.ArgumentParser() + parser.add_argument('id_list', nargs='+') + parser.add_argument('--markdown', action='store_true') + parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool']) + argv = parser.parse_args(args) - if len(args) == 0: + if len(argv.id_list) == 0: print("ERROR: Provide one or more history ID values.") return gi = connect(context) all_jobs = [] - while len(args) > 0: - hid = find_history(gi, args.pop(0)) + id_list = argv.id_list + while len(id_list) > 0: + hid = find_history(gi, id_list.pop(0)) history = gi.histories.show_history(history_id=hid) jobs = gi.jobs.get_jobs(history_id=hid) for job in jobs: @@ -358,25 +362,23 @@ def summarize(context: Context, args: list): job['history_id'] = hid job['history_name'] = history['name'] job['workflow_id'] = '' - # if 'workflow_id' in invocation: - # job['workflow_id'] = invocation['workflow_id'] all_jobs.append(job) - # invocations = gi.invocations.get_invocations(history_id=hid) - # for invocation in invocations: - # id = invocation['id'] - # #jobs = gi.jobs.get_jobs(history_id=hid, invocation_id=id) - # jobs = gi.jobs.get_jobs(history_id=hid) - # for job in jobs: - # job['invocation_id'] = id - # job['history_id'] = hid - # if 'workflow_id' in invocation: - # job['workflow_id'] = invocation['workflow_id'] - # all_jobs.append(job) - # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0])) table = summarize_metrics(gi, all_jobs) - if markdown: + if argv.sort_by: + reverse = True + get_key = None + if argv.sort_by == 'runtime': + get_key = get_float_key(15) + elif argv.sort_by == 'memory': + get_key = get_float_key(11) + elif argv.sort_by == 'tool': + get_key = get_str_key(4) + reverse = False + table.sort(key=get_key, reverse=reverse) + if argv.markdown: print_markdown_table(table) else: + print_table_header() for row in table: print(','.join(row)) @@ -437,9 +439,6 @@ def wait_for(gi: GalaxyInstance, history_id: str): waiting = False if waiting: time.sleep(30) - # elif state == 'paused': - # paused += 1 - # print(f"{job['id']}\t{job['state']}\t{job['update_time']}\t{job['tool_id']}") class JobStates: diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index c531471..00baa9f 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -1,4 +1,6 @@ -from common import Context, connect, print_json, summarize_metrics, print_markdown_table +import argparse +from common import Context, connect, print_json, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \ + print_table_header def doList(context: Context, args: list): @@ -25,14 +27,11 @@ def doList(context: Context, args: list): def summarize(context: Context, args: list): - markdown = False - if '--markdown' in args: - markdown = True - args.remove('--markdown') - - if len(args) == 0: - print("ERROR: Provide one or more invocation ID values.") - return + parser = argparse.ArgumentParser() + parser.add_argument('id', nargs=1) + parser.add_argument('--markdown', action='store_true') + parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool']) + argv = parser.parse_args(args) gi = connect(context) id = args[0] all_jobs = [] @@ -42,8 +41,20 @@ def summarize(context: Context, args: list): job['workflow_id'] = '' all_jobs.append(job) table = summarize_metrics(gi, all_jobs) - if markdown: + if argv.sort_by: + reverse = True + get_key = None + if argv.sort_by == 'runtime': + get_key = get_float_key(15) + elif argv.sort_by == 'memory': + get_key = get_float_key(11) + elif argv.sort_by == 'tool': + get_key = get_str_key(4) + reverse = False + table.sort(key=get_key, reverse=reverse) + if argv.markdown: print_markdown_table(table) else: + print_table_header() for row in table: print(','.join(row)) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 54c79a3..1e21e77 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -1,13 +1,14 @@ +import argparse import json import logging import os from pathlib import Path from pprint import pprint -import planemo import requests import yaml -from common import Context, connect, summarize_metrics, print_markdown_table +from common import Context, connect, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \ + print_table_header from planemo.galaxy.workflows import install_shed_repos from planemo.runnable import for_path, for_uri @@ -81,12 +82,6 @@ def import_from_url(context: Context, args: list): input_text = response.text with open(cached_file, 'w') as f: f.write(input_text) - - # response = requests.get(url) - # if (response.status_code != 200): - # print(f"ERROR: There was a problem downloading the workflow: {response.status_code}") - # print(response.reason) - # return try: workflow = json.loads(input_text) except Exception as e: @@ -164,30 +159,21 @@ def invocation(context: Context, args: list): print("ERROR: Invalid paramaeters. A workflow ID invocation ID are required") return workflow_id = None - invocation_id = None while len(args) > 0: arg = args.pop(0) if arg in ['-w', '--work', '--workflow']: print("Setting workflow id") workflow_id = args.pop(0) - # elif arg in ['-i', '--invoke', '--invocation']: - # invocation_id = args.pop(0) - # print("Setting invocation id") else: print(f'Invalid parameter: "{arg}') return if workflow_id is None: print("ERROR: No workflow ID provided") return - # if invocation_id is None: - # print("ERROR: No invocation ID provided") - # return gi = connect(context) - # result = gi.workflows.show_invocation(workflow_id, invocation_id) invocations = gi.invocations.get_invocations( workflow_id=workflow_id, view='element', step_details=True ) - # print(json.dumps(result, indent=4)) print('ID\tState\tWorkflow\tHistory') for invocation in invocations: id = invocation['id'] @@ -236,16 +222,13 @@ def rename(context: Context, args: list): def summarize(context: Context, args: list): - markdown = False - if '--markdown' in args: - markdown = True - args.remove('--markdown') - - if len(args) == 0: - print("ERROR: Provide one or more workflow ID values.") - return + parser = argparse.ArgumentParser() + parser.add_argument('id', nargs=1) + parser.add_argument('--markdown', action='store_true') + parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool']) + argv = parser.parse_args(args) gi = connect(context) - wid = args[0] + wid = argv.id all_jobs = [] invocations = gi.invocations.get_invocations(workflow_id=wid) for invocation in invocations: @@ -256,8 +239,20 @@ def summarize(context: Context, args: list): job['workflow_id'] = wid all_jobs.append(job) table = summarize_metrics(gi, all_jobs) - if markdown: + if argv.sort_by: + reverse = True + get_key = None + if argv.sort_by == 'runtime': + get_key = get_float_key(15) + elif argv.sort_by == 'memory': + get_key = get_float_key(11) + elif argv.sort_by == 'tool': + get_key = get_str_key(4) + reverse = False + table.sort(key=get_key, reverse=reverse) + if argv.markdown: print_markdown_table(table) else: + print_table_header() for row in table: print(','.join(row)) From a253ed15bb068f6ef0c72ed02500cdcd7f104df1 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Thu, 14 Dec 2023 13:53:58 -0500 Subject: [PATCH 19/56] Fix header for markdown tables --- abm/VERSION | 2 +- abm/lib/common.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/abm/VERSION b/abm/VERSION index a7902d7..32d0ce3 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.4 +2.9.0-dev.5 diff --git a/abm/lib/common.py b/abm/lib/common.py index 7814fc6..13223ac 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -254,7 +254,7 @@ def summarize_metrics(gi, jobs: list): def print_markdown_table(table: list) -> None: print('| Tool ID | History | State | Memory (GB) | Runtime (sec)|') - print('|---|---|---|---:|---:|---:|') + print('|---|---|---:|---:|---:|') GB = 1024 * 1024 * 1024 for row in table[1:]: history = row[2] @@ -263,8 +263,6 @@ def print_markdown_table(table: list) -> None: # cpu = '' if row[7] == '' else float(row[7]) / 10**9 memory = '' if row[11] == '' else f"{float(row[11]) / GB:3.3f}" runtime = '' if row[15] == '' else f"{float(row[15]):5.1f}" - # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15]) - # print(f'| {line} |') print(f'| {tool_id} | {history} | {state} | {memory} | {runtime} |') From 3d627d74ff95e8fd13e2fcab0fa297cd0e4615aa Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Thu, 14 Dec 2023 20:41:51 -0500 Subject: [PATCH 20/56] Add invocation.show --- abm/lib/invocation.py | 11 ++++++++++- abm/lib/menu.yml | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index 00baa9f..adabbc5 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -1,6 +1,6 @@ import argparse from common import Context, connect, print_json, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \ - print_table_header + print_table_header, print_yaml def doList(context: Context, args: list): @@ -26,6 +26,15 @@ def doList(context: Context, args: list): print(f'{id}\t{state}\t{workflow}\t{history}') +def show(context: Context, args: list): + if len(args) == 0: + print("ERROR: no invocation ID was provided") + return + gi = connect(context) + invocation = gi.invocations.show_invocation(args[0]) + print_yaml(invocation) + + def summarize(context: Context, args: list): parser = argparse.ArgumentParser() parser.add_argument('id', nargs=1) diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 948d44a..3d6d3e7 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -262,6 +262,10 @@ help: list all invocations. handler: invocation.doList params: "[-w|--workflow ID] [-h|--history ID]" + - name: [show] + help: display information about the workflow invocation + params: ID + handler: invocation.show - name: [summarize] help: generate a CSV or markdown table of job metrics for an invocation params: "ID [--markdown]" From dac22bc4b10126ede560933c0e8d0944b1cb35d3 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 15 Dec 2023 12:14:19 -0500 Subject: [PATCH 21/56] Adde help goal to the Makefile --- Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile b/Makefile index 10fc6f2..2a6432c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,15 @@ .PHONY: dist +help: + @echo + @echo "GOALS" + @echo " clean - deletes the dist directory and egg-info" + @echo " dist - creates the distribution package (wheel)" + @echo " format - runs Black and isort" + @echo " test-deploy - deploys to test.pypi.org" + @echo " deploy - deploys to pypi.org" + @echo " release - creates a GitHub release package" + @echo + dist: python3 setup.py sdist bdist_wheel From 24e327a5c5e658b714e661220ef4520237b50361 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 15 Dec 2023 13:07:42 -0500 Subject: [PATCH 22/56] Improve history name lookup --- abm/lib/common.py | 17 +++++++++++++++-- abm/lib/invocation.py | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 13223ac..1e3d797 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -224,6 +224,18 @@ def print_table_header(): print(','.join(table_header)) +history_name_cache = dict() +def get_history_name(gi, hid: str) -> str: + if hid in history_name_cache: + return history_name_cache[hid] + history = gi.histories.show_history(hid) + if history is None: + return 'unknown' + name = history['name'] + history_name_cache[hid] = name + return name + + def summarize_metrics(gi, jobs: list): table = [] # table.append(header) @@ -237,8 +249,9 @@ def summarize_metrics(gi, jobs: list): toolid = f'{parts[-2]}/{parts[-1]}' metrics = metrics_to_dict(job_metrics, table_header) metrics['id'] = job.get('id', 'unknown') - metrics['history_id'] = job.get('history_id', 'unknown') - metrics['history_name'] = job.get('history_name', 'unknown') + hid = job.get('history_id', 'unknown') + metrics['history_id'] = hid + metrics['history_name'] = get_history_name(gi, hid) metrics['state'] = job.get('state', 'unknown') metrics['tool_id'] = toolid metrics['invocation_id'] = job.get('invocation_id', 'unknown') diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index adabbc5..84e3906 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -42,7 +42,7 @@ def summarize(context: Context, args: list): parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool']) argv = parser.parse_args(args) gi = connect(context) - id = args[0] + id = argv.id[0] all_jobs = [] jobs = gi.jobs.get_jobs(invocation_id=id) for job in jobs: From 4958a7af3baaeb15128488ea793461e67d6e162d Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 15 Dec 2023 14:04:21 -0500 Subject: [PATCH 23/56] Round up memory and runtime values if they would display zeroes --- abm/lib/common.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 1e3d797..9903b6e 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -2,6 +2,7 @@ import os import subprocess import sys +from math import ceil import bioblend.galaxy import lib @@ -270,12 +271,18 @@ def print_markdown_table(table: list) -> None: print('|---|---|---:|---:|---:|') GB = 1024 * 1024 * 1024 for row in table[1:]: + # memory = '' + # if row[11] != '': + # memory = float(row[11]) / GB + # if memory < 0.1: + # memory = 0.1 + # memory = f"{memory:3.1f}" history = row[2] state = row[3] tool_id = row[4] # cpu = '' if row[7] == '' else float(row[7]) / 10**9 - memory = '' if row[11] == '' else f"{float(row[11]) / GB:3.3f}" - runtime = '' if row[15] == '' else f"{float(row[15]):5.1f}" + memory = '' if row[11] == '' else f"{max(0.1, float(row[11]) / GB):3.1f}" + runtime = '' if row[15] == '' else f"{max(1, float(row[15])):5.0f}" print(f'| {tool_id} | {history} | {state} | {memory} | {runtime} |') From d0c526b2b2d0919f1131edb0dbae8886638aeb05 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 15 Dec 2023 16:35:01 -0500 Subject: [PATCH 24/56] Limit the number of attempts a job will be restarted. --- abm/lib/history.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/abm/lib/history.py b/abm/lib/history.py index 1f6dda4..f3799df 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -16,6 +16,8 @@ # History related functions # +# The number of times a failed job will be restarted. +RESTART_MAX = 3 def longest_name(histories: list): longest = 0 @@ -397,10 +399,21 @@ def wait(context: Context, args: list): wait_for(gi, history_id) +def kill_all_jobs(gi: GalaxyInstance, job_list:list): + cancel_states = ['new', 'running', 'paused'] + for job in job_list: + if job['state'] in cancel_states: + print(f"Cancelling job {job['tool_id']}") + gi.jobs.cancel_job(job['id']) + else: + print(f"Job {job['id']} for tool {job['tool_id']} is in state {job['state']}") + + def wait_for(gi: GalaxyInstance, history_id: str): errored = [] waiting = True job_states = JobStates() + restart_counts = dict() while waiting: restart = [] status_counts = dict() @@ -421,9 +434,18 @@ def wait_for(gi: GalaxyInstance, history_id: str): elif state == 'error': terminal += 1 if id not in errored: - restart.append(id) + tool = job['tool_id'] + if tool in restart_counts: + restart_counts[tool] += 1 + else: + restart_counts[tool] = 1 + if restart_counts[tool] < RESTART_MAX: + restart.append(id) + else: + kill_all_jobs(gi, job_list) + waiting = False errored.append(id) - if len(restart) > 0: + if len(restart) > 0 and waiting: for job in restart: print(f"Restaring job {job}") try: From 32077fa704a45f4aa12dca023961a7cc5b2732ee Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 15 Dec 2023 16:44:00 -0500 Subject: [PATCH 25/56] Add document to menu.yml for the --sort-by option --- abm/lib/menu.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 3d6d3e7..083abe6 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -70,7 +70,7 @@ - name: [summary, summarize] handler: workflow.summarize help: generate a CSV or markdown table with job metrics for all workflow runs - params: "ID [ID ...] [--markdown]" + params: "ID [ID ...] [--markdown] [-s|--sort-by (tool,runtime,memory)" - name: ['test'] handler: workflow.test help: run some test code @@ -158,7 +158,7 @@ help: show detailed information about a history - name: [summarize, summary, table] handler: history.summarize - params: "ID [ID...] [--markdown]" + params: "ID [ID...] [--markdown] [-s|--sort-by (tool,runtime,memory)]" help: Generate a CSV or markdown table with runtime metrics for all jobs in the history. - name: [publish, pub] handler: history.publish @@ -250,7 +250,7 @@ - name: [summarize, summary] help: summarize metrics to a CSV or TSV file. handler: experiment.summarize - params: "[-c, --csv, -t, --tsv, --markdown]" + params: "[-c, --csv, -t, --tsv, --markdown] [-s|--sort-by (tool,runtime,memory)]" - name: [test] help: playground code handler: experiment.test @@ -268,7 +268,7 @@ handler: invocation.show - name: [summarize] help: generate a CSV or markdown table of job metrics for an invocation - params: "ID [--markdown]" + params: "ID [--markdown] [-s|--sort-by (tool, runtime, memory)]" handler: invocation.summarize - name: [helm] help: execute a helm command From 7c2ad51a77ab9b109dcfa70f747f5f9b5fc14b14 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 15 Dec 2023 16:48:38 -0500 Subject: [PATCH 26/56] Add documentation for experiment.summarize --markdown --- abm/lib/menu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 083abe6..607220a 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -248,7 +248,7 @@ handler: experiment.run params: "PATH [-r|--run-number N]" - name: [summarize, summary] - help: summarize metrics to a CSV or TSV file. + help: summarize metrics to a CSV, TSV or markdown file. handler: experiment.summarize params: "[-c, --csv, -t, --tsv, --markdown] [-s|--sort-by (tool,runtime,memory)]" - name: [test] From 211bab9c3c0ca296f85b2bc82e22ee47de84995c Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 18 Dec 2023 11:38:30 -0500 Subject: [PATCH 27/56] Pass env to all helm invocations --- abm/lib/helm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/abm/lib/helm.py b/abm/lib/helm.py index 52a6a60..da92bc3 100644 --- a/abm/lib/helm.py +++ b/abm/lib/helm.py @@ -59,7 +59,7 @@ def update(context: Context, args: list): print('Waiting for the new deployments to come online') # Give kubernetes a moment to start processing the update. time.sleep(5) - wait_until_ready(namespace) + wait_until_ready(namespace, env) return True @@ -133,9 +133,9 @@ def wait_for(kubectl: str, namespace: str, name: str, env: dict): # wait_for(kubectl, namespace, 'galaxy-job', env) # wait_for(kubectl, namespace, 'galaxy-web', env) # wait_for(kubectl, namespace, 'galaxy-workflow', env) -def wait_until_ready(namespace: str): +def wait_until_ready(namespace: str, env: dict): kubectl = find_executable('kubectl') - data = run(f"{kubectl} get deployment -n {namespace} -o json") + data = run(f"{kubectl} get deployment -n {namespace} -o json", env) deployment_data = json.loads(data) deployments = list() for deployment in deployment_data['items']: @@ -146,7 +146,7 @@ def wait_until_ready(namespace: str): for deployment in deployments: print( run( - f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch" + f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch", env ) ) From 3f6890a8dacb9a039b84dae5041b7e4f43520711 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 18 Dec 2023 11:39:11 -0500 Subject: [PATCH 28/56] Add try_for method to retry api calls --- abm/lib/common.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/abm/lib/common.py b/abm/lib/common.py index 9903b6e..96a21eb 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -24,6 +24,21 @@ } +def try_for(f, limit=3): + count = 0 + running = True + result = None + while running: + try: + count += 1 + result = f() + running = False + except Exception as e: + if count >= limit: + raise e + return result + + class Context: def __init__(self, *args): if len(args) == 1: From 0d00e65f96574b38000e1cdf1be3ab992f6cab66 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 18 Dec 2023 11:39:59 -0500 Subject: [PATCH 29/56] Fix bug handling start at value --- abm/lib/experiment.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 2c03563..bc29218 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -42,7 +42,8 @@ def run(context: Context, args: list): with open(benchmark_path, 'r') as f: config = yaml.safe_load(f) config['start_at'] = argv.run_number - + print(f"Starting with run number {argv.run_number}") + profiles = load_profiles() # latch = CountdownLatch(len(config['cloud'])) threads = [] @@ -70,7 +71,7 @@ def run_on_cloud(cloud: str, config: dict): context = Context(cloud) namespace = 'galaxy' chart = 'anvil/galaxykubeman' - start = config['start_at'] + start = int(config['start_at']) if start < 0: start = 1 end = start + config['runs'] From 382bfb32d6659a81b7f2084239a2a3f886f0a5a1 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Mon, 18 Dec 2023 11:40:38 -0500 Subject: [PATCH 30/56] Retry invoking and waiting for invocations --- abm/lib/benchmark.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index b7cb745..68e2247 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -8,7 +8,7 @@ from bioblend.galaxy import GalaxyInstance, dataset_collections from lib import INVOCATIONS_DIR, METRICS_DIR, Keys from lib.common import (Context, _get_dataset_data, _make_dataset_element, - connect, print_json) + connect, print_json, try_for) from lib.history import wait_for log = logging.getLogger('abm') @@ -224,16 +224,20 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): else: raise Exception(f'Invalid input value') print(f"Running workflow {wfid} in history {new_history_name}") - invocation = gi.workflows.invoke_workflow( + f = lambda : gi.workflows.invoke_workflow( wfid, inputs=inputs, history_name=new_history_name ) + invocation = try_for(f, 3) id = invocation['id'] # invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False) + f = lambda: gi.invocations.wait_for_invocation(id, 86400, 10, False) try: - invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False) - except: + invocations = try_for(f, 2) + except Exception as e: + print(f"Exception waiting for invocations") pprint(invocation) sys.exc_info() + raise e print("Waiting for jobs") if history_prefix is not None: parts = history_prefix.split() From ab3ec5c3f567299180b63008fdfa8555e41cc18c Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 20 Dec 2023 16:11:43 -0500 Subject: [PATCH 31/56] Fix run numbering range. --- abm/lib/experiment.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index bc29218..49b7833 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -43,7 +43,7 @@ def run(context: Context, args: list): config = yaml.safe_load(f) config['start_at'] = argv.run_number print(f"Starting with run number {argv.run_number}") - + profiles = load_profiles() # latch = CountdownLatch(len(config['cloud'])) threads = [] @@ -72,6 +72,7 @@ def run_on_cloud(cloud: str, config: dict): namespace = 'galaxy' chart = 'anvil/galaxykubeman' start = int(config['start_at']) + print(f"Staring run number {start}") if start < 0: start = 1 end = start + config['runs'] @@ -86,14 +87,14 @@ def run_on_cloud(cloud: str, config: dict): log.warning(f"job configuration not found: rules/{conf}.yml") continue for workflow_conf in config['benchmark_confs']: - for n in range(config['runs']): + for n in range(start, end): history_name_prefix = f"{n+1} {cloud} {conf}" benchmark.run( context, workflow_conf, history_name_prefix, config['name'] ) else: for workflow_conf in config['benchmark_confs']: - for n in range(config['runs']): + for n in range(start, end): history_name_prefix = f"{n+1} {cloud}" benchmark.run( context, workflow_conf, history_name_prefix, config['name'] From fe6567d51226ed49556611a7159d74f13c1183b7 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 20 Dec 2023 16:36:23 -0500 Subject: [PATCH 32/56] Retry getting jobs list --- abm/lib/history.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abm/lib/history.py b/abm/lib/history.py index f3799df..0e4cc0b 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -10,7 +10,7 @@ from bioblend.galaxy.objects import GalaxyInstance from lib.common import (Context, connect, find_history, parse_profile, print_json, summarize_metrics, print_markdown_table, - get_float_key, get_str_key, print_table_header) + get_float_key, get_str_key, print_table_header, try_for) # # History related functions @@ -418,7 +418,7 @@ def wait_for(gi: GalaxyInstance, history_id: str): restart = [] status_counts = dict() terminal = 0 - job_list = gi.jobs.get_jobs(history_id=history_id) + job_list = try_for(lambda: gi.jobs.get_jobs(history_id=history_id)) for job in job_list: job_states.update(job) state = job['state'] From d078c0b281a8601b9e037fe8ae57a1fbfafc4c58 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 19 Jan 2024 12:17:45 -0500 Subject: [PATCH 33/56] Use argparse in dataset.list --- abm/lib/dataset.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 6ae32a8..52978c8 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -1,3 +1,4 @@ +import argparse import json import os from pathlib import Path @@ -9,18 +10,34 @@ find_history, print_json) -def list(context: Context, args: list): +def list(context: Context, argv: list): + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--state', help='list jobs in this state') + parser.add_argument('--history', help='show jobs in the given history') + parser.add_argument('-t', '--tool', help='only show jobs generate by this tool') + args = parser.parse_args(argv) + kwargs = {'limit': 10000, 'offset': 0, 'deleted': False} gi = connect(context) - kwargs = {'limit': 10000, 'offset': 0} - if len(args) > 0: - if args[0] in ['-s', '--state']: - if len(args) != 2: - print("ERROR: Invalid command.") - return - kwargs['state'] = args[1] - else: - print(f"ERROR: Invalid parameter: {args[0]}") + if args.state: + kwargs['state'] = args.state + if args.history: + hid = find_history(gi, args.history) + if hid is None: + print("ERROR: No such history") return + kwargs['history_id'] = hid + if args.tool: + kwargs['tool_id'] = args.tool + + # if len(args) > 0: + # if args[0] in ['-s', '--state']: + # if len(args) != 2: + # print("ERROR: Invalid command.") + # return + # kwargs['state'] = args[1] + # else: + # print(f"ERROR: Invalid parameter: {args[0]}") + # return # datasets = gi.datasets.get_datasets(limit=10000, offset=0) # , deleted=True, purged=True) datasets = gi.datasets.get_datasets(**kwargs) if len(datasets) == 0: From 04763b4290dea06048daf031ea2e14b882181e00 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 19 Jan 2024 12:29:37 -0500 Subject: [PATCH 34/56] Don't add one to the run number when generating the history name. --- abm/lib/experiment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 49b7833..823ea92 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -88,14 +88,14 @@ def run_on_cloud(cloud: str, config: dict): continue for workflow_conf in config['benchmark_confs']: for n in range(start, end): - history_name_prefix = f"{n+1} {cloud} {conf}" + history_name_prefix = f"{n} {cloud} {conf}" benchmark.run( context, workflow_conf, history_name_prefix, config['name'] ) else: for workflow_conf in config['benchmark_confs']: for n in range(start, end): - history_name_prefix = f"{n+1} {cloud}" + history_name_prefix = f"{n} {cloud}" benchmark.run( context, workflow_conf, history_name_prefix, config['name'] ) From 1fc5c04a96f588e3c6acced15e3d15b834363070 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 19 Jan 2024 12:32:54 -0500 Subject: [PATCH 35/56] Bump version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index 32d0ce3..32a0e4f 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.5 +2.9.0-dev.6 From 9e62c702e48db2fc07065e8a0505a8262b4920d9 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 31 Jan 2024 12:33:22 -0500 Subject: [PATCH 36/56] Added code documentation. --- abm/lib/__init__.py | 6 ++++-- abm/lib/benchmark.py | 27 ++++++++++++++------------- abm/lib/dataset.py | 6 +++--- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/abm/lib/__init__.py b/abm/lib/__init__.py index 2cdb864..4efc7e1 100644 --- a/abm/lib/__init__.py +++ b/abm/lib/__init__.py @@ -4,14 +4,16 @@ sys.path.append(os.path.dirname(os.path.realpath(__file__))) -# from common import parse_profile - +# Where the workflow invocation data returned by Galaxy will be saved. INVOCATIONS_DIR = "invocations" +# Where workflow runtime metrics will be saved. METRICS_DIR = "metrics" +# Global instance of a YAML parser so we can reuse it if needed. parser = None +# Keys used in various dictionaries. class Keys: NAME = 'name' RUNS = 'runs' diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index 68e2247..5afd693 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -16,13 +16,10 @@ def run_cli(context: Context, args: list): """ - Runs a single workflow defined by *args[0]* + Command line handler to run a single benchmark. - :param args: a list that contains: - args[0] - the path to the benchmark configuration file - args[1] - the prefix to use when creating the new history in Galaxy - args[2] - the name of the experiment, if part of one. This is used to - generate output folder names. + :param context: a context object the defines how to connect to the Galaxy server. + :param args: parameters from the command line :return: True if the workflows completed sucessfully. False otherwise. """ @@ -43,11 +40,15 @@ def run_cli(context: Context, args: list): def run(context: Context, workflow_path, history_prefix: str, experiment: str): - # if len(args) > 1: - # history_prefix = args[1] - # if len(args) > 2: - # experiment = args[2].replace(' ', '_').lower() + """ + Does the actual work of running a benchmark. + :param context: a context object the defines how to connect to the Galaxy server. + :param workflow_path: path to the ABM workflow file. (benchmark really). NOTE this is NOT the Galaxy .ga file. + :param history_prefix: a prefix value used when generating new history names. + :param experiment: the name of the experiment (arbitrary string). Used to generate new history names. + :return: True if the workflow run completed successfully. False otherwise. + """ if os.path.exists(INVOCATIONS_DIR): if not os.path.isdir(INVOCATIONS_DIR): print('ERROR: Can not save invocation status, directory name in use.') @@ -76,7 +77,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): workflows = parse_workflow(workflow_path) if not workflows: print(f"Unable to load any workflow definitions from {workflow_path}") - return + return False print(f"Found {len(workflows)} workflow definitions") for workflow in workflows: @@ -173,7 +174,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): histories = gi.histories.get_histories(name=spec['history']) if len(histories) == 0: print(f"ERROR: History {spec['history']} not foune") - return + return False hid = histories[0]['id'] pairs = 0 paired_list = spec['paired'] @@ -416,7 +417,7 @@ def validate(context: Context, args: list): def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict): - """Blocks until all jobs defined in the *invocations* to complete. + """Blocks until all jobs defined in *invocations* to complete. :param gi: The *GalaxyInstance** running the jobs :param invocations: diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 52978c8..a8bfd88 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -12,9 +12,9 @@ def list(context: Context, argv: list): parser = argparse.ArgumentParser() - parser.add_argument('-s', '--state', help='list jobs in this state') - parser.add_argument('--history', help='show jobs in the given history') - parser.add_argument('-t', '--tool', help='only show jobs generate by this tool') + parser.add_argument('-s', '--state', help='list datasets in this state') + parser.add_argument('--history', help='show datasets in the given history') + parser.add_argument('-t', '--tool', help='only show datasets generate by this tool') args = parser.parse_args(argv) kwargs = {'limit': 10000, 'offset': 0, 'deleted': False} gi = connect(context) From 6c07eb7f1e68947e56eaa50692843c6fbadafe7a Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 31 Jan 2024 12:37:35 -0500 Subject: [PATCH 37/56] Print the starting run number after its value has been checked. --- abm/lib/experiment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 823ea92..5750fe2 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -75,6 +75,7 @@ def run_on_cloud(cloud: str, config: dict): print(f"Staring run number {start}") if start < 0: start = 1 + print(f"Staring run number {start}") end = start + config['runs'] if 'galaxy' in config: namespace = config['galaxy']['namespace'] From 29f5436dc953ef0d80281743859ca3f1cebbbf8b Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 31 Jan 2024 14:16:55 -0500 Subject: [PATCH 38/56] More code documentation. --- abm/lib/benchmark.py | 57 +++++++++++++++++++++++++++++++++-- abm/lib/cloudlaunch.py | 2 ++ abm/lib/common.py | 68 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 116 insertions(+), 11 deletions(-) diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index 5afd693..fcf2fd7 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -270,6 +270,14 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): def translate(context: Context, args: list): + """ + Translates the human readable names of datasets and workflows in to the Galaxy + ID that is unique to each server. + + :param context: the conext object used to connect to the Galaxy server + :param args: [0] the path to the benchmarking YAML file to translate + :return: Nothing. Prints the translated workflow file to stdout. + """ if len(args) == 0: print('ERROR: no workflow configuration specified') return @@ -312,6 +320,14 @@ def translate(context: Context, args: list): def validate(context: Context, args: list): + """ + Checks to see if the workflow and all datasets defined in the benchmark can + be found on the server. + + :param context: the context object used to connect to the Galaxy instance + :param args: [0] the benchmark YAML file to be validated. + :return: + """ if len(args) == 0: print('ERROR: no workflow configuration specified') return @@ -417,10 +433,10 @@ def validate(context: Context, args: list): def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict): - """Blocks until all jobs defined in *invocations* to complete. + """Blocks until all jobs defined in *invocations* are complete (in a terminal state). :param gi: The *GalaxyInstance** running the jobs - :param invocations: + :param invocations: a dictionary containing information about the jobs invoked :return: """ wfid = invocations['workflow_id'] @@ -490,6 +506,11 @@ def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict): def parse_workflow(workflow_path: str): + """ + Loads the benchmark YAML file. + :param workflow_path: the path to the file to be loaded. + :return: a dictionary containing the benchmark. + """ if not os.path.exists(workflow_path): print(f'ERROR: could not find workflow file {workflow_path}') return None @@ -508,6 +529,14 @@ def parse_workflow(workflow_path: str): def find_workflow_id(gi, name_or_id): + """ + Resolves the human-readable name for a workflow into the unique ID on the + Galaxy instance. + + :param gi: the connection object to the Galaxy instance + :param name_or_id: the name of the workflow + :return: The Galaxy workflow ID or None if the workflow could not be located + """ try: wf = gi.workflows.show_workflow(name_or_id) return wf['id'] @@ -524,7 +553,14 @@ def find_workflow_id(gi, name_or_id): def find_dataset_id(gi, name_or_id): - # print(f"Finding dataset {name_or_id}") + """ + Resolves the human-readable name if a dataset into the unique ID on the + Galaxy instance + + :param gi: the connection object to the Galaxy instance + :param name_or_id: the name of the dataset. + :return: the Galaxy dataset ID or None if the dataset could not be located. + """ try: ds = gi.datasets.show_dataset(name_or_id) return ds['id'] @@ -549,6 +585,14 @@ def find_dataset_id(gi, name_or_id): def find_collection_id(gi, name): + """ + Resolves a human-readable collection name into the unique Galaxy ID. + + :param gi: the connection object to the Galaxy instance + :param name: the name of the collection to resolve + :return: The unique Galaxy ID of the collection or None if the collection + can not be located. + """ kwargs = {'limit': 10000, 'offset': 0} datasets = gi.datasets.get_datasets(**kwargs) if len(datasets) == 0: @@ -570,6 +614,13 @@ def find_collection_id(gi, name): def test(context: Context, args: list): + """ + Allows running testing code from the command line. + + :param context: a connection object to a Galaxy instance + :param args: varies + :return: varies, typically None. + """ id = 'c90fffcf98b31cd3' gi = connect(context) inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input') diff --git a/abm/lib/cloudlaunch.py b/abm/lib/cloudlaunch.py index 0abd261..d933cc6 100644 --- a/abm/lib/cloudlaunch.py +++ b/abm/lib/cloudlaunch.py @@ -8,6 +8,8 @@ from cloudlaunch_cli.main import create_api_client from common import Context +# DEPRECATED - Cloudlaunch is no longer used to manage Galaxy clusters. + BOLD = '\033[1m' CLEAR = '\033[0m' diff --git a/abm/lib/common.py b/abm/lib/common.py index 96a21eb..cbc035f 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -9,8 +9,10 @@ from bioblend.galaxy import dataset_collections from ruamel.yaml import YAML +# Where we will look for our configuration file. PROFILE_SEARCH_PATH = ['~/.abm/profile.yml', '.abm-profile.yml'] +# Deprecated. Do not use. datasets = { "dna": [ "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR013/ERR013101/ERR013101_1.fastq.gz", @@ -25,6 +27,14 @@ def try_for(f, limit=3): + """ + Tries to invoke the function f. If the function f fails it will be retried + *limit* number of times. + + :param f: the function to invoke + :param limit: how many times the function will be retried + :return: the result of calling f() + """ count = 0 running = True result = None @@ -40,6 +50,13 @@ def try_for(f, limit=3): class Context: + """ + The context object that contains information to connect to a Galaxy instance. + + GALAXY_SERVER: the URL of the Galaxy server to connect to + API_KEY : a user's API key to make API calls on the Galaxy instance + KUBECONFIG: : the kubeconfig file needed to make changes via Helm + """ def __init__(self, *args): if len(args) == 1: arg = args[0] @@ -90,7 +107,12 @@ def connect(context: Context): def _set_active_profile(profile_name: str): - # print(f"Parsing profile for {profile_name}") + """ + Unused. + + :param profile_name: + :return: + """ lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG = parse_profile(profile_name) return lib.GALAXY_SERVER != None @@ -100,6 +122,11 @@ def get_context(profile_name: str): def get_yaml_parser(): + """ + Returns a singleton instance of a YAML parser. + + :return: a YAML parser. + """ if lib.parser is None: lib.parser = YAML() return lib.parser @@ -124,6 +151,12 @@ def load_profiles(): def save_profiles(profiles: dict): + """ + Write the ABM configuration file. + + :param profiles: the configuration to be saved. + :return: None + """ yaml = get_yaml_parser() for profile_path in PROFILE_SEARCH_PATH: path = os.path.expanduser(profile_path) @@ -161,15 +194,16 @@ def parse_profile(profile_name: str): def run(command, env: dict = None): + """ + Runs a command on the local machine. Used to invoke the helm and kubectl + executables. + + :param command: the command to be invoked + :param env: environment variables for the command. + :return: + """ if env is None: env = os.environ - # if env is not None: - # for name,value in env.items(): - # os.environ[name] = value - # if lib.KUBECONFIG is not None: - # os.environ['KUBECONFIG'] = lib.KUBECONFIG - # local_env = os.environ.copy() - # local_env.update(env) result = subprocess.run(command.split(), capture_output=True, env=env) if result.returncode != 0: raise RuntimeError(result.stderr.decode('utf-8').strip()) @@ -177,6 +211,11 @@ def run(command, env: dict = None): def get_env(context: Context): + """ + Creates a copy of the environment variables as returned by os.environ. + :param context: Ignored + :return: a dictionary of the environment variables + """ copy = os.environ.copy() for key, value in context.__dict__.items(): if value is not None: @@ -185,6 +224,13 @@ def get_env(context: Context): def find_executable(name): + """ + Used the which command on the local machine to find the full path to an + executable. + + :param name: the name of a command line executable or script. + :return: the full path to the executable or an empty string if the executable is not found. + """ return run(f"which {name}") @@ -208,6 +254,7 @@ def find_executable(name): # "swaptotal", # "uname" +# Columns to be defined when generating CSV files. table_header = [ "id", "history_id", @@ -237,6 +284,11 @@ def find_executable(name): ] def print_table_header(): + """ + Prints the table header suitable for inclusion in CSV files. + + :return: None. The table header is printed to stdout. + """ print(','.join(table_header)) From 144ebaac2eaeac0782fa5d0f3dd640b05d2f1e4b Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 29 Mar 2024 14:04:26 -0400 Subject: [PATCH 39/56] Rename list methods to prevent name collisions with the list type --- abm/VERSION | 2 +- abm/lib/benchmark.py | 6 +++++- abm/lib/cloudlaunch.py | 2 +- abm/lib/config.py | 2 +- abm/lib/dataset.py | 2 +- abm/lib/folder.py | 2 +- abm/lib/job.py | 2 +- abm/lib/library.py | 2 +- abm/lib/menu.yml | 16 ++++++++-------- abm/lib/users.py | 2 +- abm/lib/workflow.py | 2 +- 11 files changed, 22 insertions(+), 18 deletions(-) diff --git a/abm/VERSION b/abm/VERSION index 32a0e4f..80a9e43 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.6 +2.9.0-dev.7 diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index 68e2247..3505158 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -183,7 +183,11 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): for key in item.keys(): # print(f"Getting dataset for {key} = {item[key]}") value = _get_dataset_data(gi, item[key]) - size += value['size'] + if value is None: + print(f"ERROR: Unable to find dataset {item[key]}") + return + if size in value: + size += value['size'] elements.append( _make_dataset_element(key, value['id']) ) diff --git a/abm/lib/cloudlaunch.py b/abm/lib/cloudlaunch.py index 0abd261..3a2719d 100644 --- a/abm/lib/cloudlaunch.py +++ b/abm/lib/cloudlaunch.py @@ -40,7 +40,7 @@ def h1(text): ''' -def list(context: Context, args: list): +def do_list(context: Context, args: list): archived = False filter = None status = lambda t: t.instance_status if t.instance_status else t.status diff --git a/abm/lib/config.py b/abm/lib/config.py index cbb71a5..24d8112 100644 --- a/abm/lib/config.py +++ b/abm/lib/config.py @@ -6,7 +6,7 @@ print_yaml, save_profiles) -def list(context: Context, args: list): +def do_list(context: Context, args: list): profiles = load_profiles() print(f"Loaded {len(profiles)} profiles") for profile in profiles: diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 52978c8..0fe507f 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -10,7 +10,7 @@ find_history, print_json) -def list(context: Context, argv: list): +def do_list(context: Context, argv: list): parser = argparse.ArgumentParser() parser.add_argument('-s', '--state', help='list jobs in this state') parser.add_argument('--history', help='show jobs in the given history') diff --git a/abm/lib/folder.py b/abm/lib/folder.py index 06760a5..4c4abb8 100644 --- a/abm/lib/folder.py +++ b/abm/lib/folder.py @@ -3,7 +3,7 @@ from .common import Context, connect -def list(context: Context, args: list): +def do_list(context: Context, args: list): if len(args) == 0: print("ERROR: no library ID was provided") return diff --git a/abm/lib/job.py b/abm/lib/job.py index efd8bfa..4973209 100644 --- a/abm/lib/job.py +++ b/abm/lib/job.py @@ -8,7 +8,7 @@ log = logging.getLogger('abm') -def list(context: Context, args: list): +def do_list(context: Context, args: list): state = '' history_id = None log.debug('Processing args') diff --git a/abm/lib/library.py b/abm/lib/library.py index 8add917..4dbb610 100644 --- a/abm/lib/library.py +++ b/abm/lib/library.py @@ -3,7 +3,7 @@ from .common import Context, connect, datasets -def list(context: Context, args: list): +def do_list(context: Context, args: list): gi = connect(context) if len(args) == 0: for library in gi.libraries.get_libraries(): diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 607220a..a4e3844 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -37,7 +37,7 @@ help: 'download a workflow' params: ID PATH - name: ['list', 'ls'] - handler: workflow.list + handler: workflow.do_list help: 'list workflows available on the serer' - name: [show] handler: workflow.show @@ -92,7 +92,7 @@ params: KEY [--hs|--hist|--history HISTORY_ID | -c|--create "History name"] help: imports a dataset to the server from a URL specified in the datasets.yml config file. - name: ['list', 'ls'] - handler: dataset.list + handler: dataset.do_list help: lists all the datasets on the server - name: ['find'] handler: dataset.find @@ -191,7 +191,7 @@ menu: - name: [ list, ls ] help: list all jobs, or jobs in a particular state. Can filter by a history. - handler: job.list + handler: job.do_list params: "[-s|--state ok|running|error|waiting] [-h|--history historyID]" - name: [ show ] help: show detailed information about a job @@ -222,7 +222,7 @@ menu: - name: [list, ls] help: list all users on the Galaxy instance - handler: users.list + handler: users.do_list - name: [api_key, apikey, key] help: obtain the API key for the specified user handler: users.api_key @@ -303,7 +303,7 @@ menu: - name: [list, ls] help: list configured servers - handler: config.list + handler: config.do_list - name: [show, sh] help: disply URL, API key, and kube config for a specific cloud. handler: config.show @@ -341,7 +341,7 @@ standalone: true menu: - name: [list, ls] - handler: cloudlaunch.list + handler: cloudlaunch.do_list help: list deployments on all cloud providers - name: [create, launch, new] handler: cloudlaunch.create @@ -356,7 +356,7 @@ menu: - name: [list, ls] help: list all libraries on the server - handler: library.list + handler: library.do_list - name: [show] help: show detailed information about a library handler: library.show @@ -373,7 +373,7 @@ help: manage folders in data libraries menu: - name: [list, ls] - handler: folder.list + handler: folder.do_list help: list the folders in a data library params: LIBRARY_ID - name: [create, new] diff --git a/abm/lib/users.py b/abm/lib/users.py index 74353fb..cb98a46 100644 --- a/abm/lib/users.py +++ b/abm/lib/users.py @@ -5,7 +5,7 @@ from common import Context, connect -def list(context: Context, args: list): +def do_list(context: Context, args: list): # TODO the master API key needs to be parameterized or specified in a config file. context.API_KEY = "galaxypassword" gi = connect(context) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 1e21e77..6282158 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -15,7 +15,7 @@ log = logging.getLogger('abm') -def list(context: Context, args: list): +def do_list(context: Context, args: list): gi = connect(context) workflows = gi.workflows.get_workflows(published=True) if len(workflows) == 0: From 6efb9ce4af421f9f35c7b10e28ba9d98396fb94e Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 22 May 2024 13:53:39 -0400 Subject: [PATCH 40/56] Allow the Galaxy master API key to defined in the profile. --- abm/__init__.py | 2 ++ abm/lib/__init__.py | 17 +++++++++++++++++ abm/lib/common.py | 38 ++++++++++++++++++++++++++++---------- abm/lib/history.py | 2 +- abm/lib/users.py | 16 ++++------------ 5 files changed, 52 insertions(+), 23 deletions(-) diff --git a/abm/__init__.py b/abm/__init__.py index fb8a5b7..adc875e 100644 --- a/abm/__init__.py +++ b/abm/__init__.py @@ -1,6 +1,8 @@ import os import sys +import yaml + sys.path.append(os.path.dirname(os.path.realpath(__file__))) diff --git a/abm/lib/__init__.py b/abm/lib/__init__.py index 4efc7e1..a17780b 100644 --- a/abm/lib/__init__.py +++ b/abm/lib/__init__.py @@ -24,3 +24,20 @@ class Keys: COLLECTION = 'collection' HISTORY_BASE_NAME = 'output_history_base_name' HISTORY_NAME = 'history_name' + + +# def get_master_api_key(): +# ''' +# Get the master API key from the environment or configuration file. +# ''' +# if 'GALAXY_MASTER_API_KEY' in os.environ: +# return os.environ['GALAXY_MASTER_API_KEY'] +# config_path = os.path.expanduser("~/.abm/config.yml") +# if not os.path.exists(config_path): +# raise RuntimeError(f"ERROR: Configuration file not found: {config_path}") +# with open(config_path, 'r') as f: +# config = yaml.safe_load(f) +# key = config.get('GALAXY_MASTER_API_KEY', None) +# if key == None: +# raise RuntimeError("ERROR: GALAXY_MASTER_API_KEY not found in config.yml") +# return key diff --git a/abm/lib/common.py b/abm/lib/common.py index cbc035f..0fe6b3f 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -61,20 +61,26 @@ def __init__(self, *args): if len(args) == 1: arg = args[0] if type(arg) == str: - self.GALAXY_SERVER, self.API_KEY, self.KUBECONFIG = parse_profile(arg) + self.GALAXY_SERVER, self.API_KEY, self.KUBECONFIG, self.MASTER_KEY = parse_profile(arg) elif type(arg) == dict: self.GALAXY_SERVER = arg['GALAXY_SERVER'] self.API_KEY = arg['API_KEY'] self.KUBECONFIG = arg['KUBECONFIG'] + if 'MASTER_KEY' in arg: + self.MASTER_KEY = arg['MASTER_KEY'] + else: + self.MASTER_KEY = None else: raise Exception(f'Invalid arg for Context: {type(arg)}') - elif len(args) == 3: + elif len(args) == 3 or len(args) == 4: self.GALAXY_SERVER = args[0] self.API_KEY = args[1] self.KUBECONFIG = args[2] + if len(args) == 4: + self.MASTER_KEY = args[3] else: raise Exception( - f'Invalid args for Context. Expected one or three, found {len(args)}' + f'Invalid args for Context. Expected one or four, found {len(args)}' ) @@ -86,7 +92,7 @@ def print_yaml(obj): get_yaml_parser().dump(obj, sys.stdout) -def connect(context: Context): +def connect(context: Context, use_master_key=False): """ Create a connection to the Galaxy instance @@ -100,7 +106,14 @@ def connect(context: Context): print('ERROR: The Galaxy API key has not been set. Please check your') print(' configuration in ~/.abm/profile.yml and try again.') sys.exit(1) - gi = bioblend.galaxy.GalaxyInstance(url=context.GALAXY_SERVER, key=context.API_KEY) + key = context.API_KEY + if use_master_key: + if context.MASTER_KEY is None: + print('ERROR: The Galaxy master key has not been set. Please check your') + print(' configuration in ~/.abm/profile.yml and try again.') + sys.exit(1) + key = context.MASTER_KEY + gi = bioblend.galaxy.GalaxyInstance(url=context.GALAXY_SERVER, key=key) gi.max_get_attempts = 3 gi.get_retry_delay = 1 return gi @@ -113,7 +126,7 @@ def _set_active_profile(profile_name: str): :param profile_name: :return: """ - lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG = parse_profile(profile_name) + lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG, lib.MASTER_KEY = parse_profile(profile_name) return lib.GALAXY_SERVER != None @@ -174,10 +187,11 @@ def parse_profile(profile_name: str): :param profile_name: path to the profile to parse :return: a tuple containing the Galaxy URL, API key, and path to the kubeconfig ''' + nones = (None, None, None, None) profiles = load_profiles() if profiles is None: print(f'ERROR: Could not locate an abm profile file in {PROFILE_SEARCH_PATH}') - return None, None, None + return nones if profile_name not in profiles: print(f'ERROR: {profile_name} is not the name of a valid profile.') keys = list(profiles.keys()) @@ -186,11 +200,15 @@ def parse_profile(profile_name: str): ', '.join([f"'{k}'" for k in keys[0:-2]]) + f", and '{keys[-1]}'" ) print(f'The defined profile names are: {quoted_keys}') - return None, None, None + return nones profile = profiles[profile_name] + kube = None + master = 'galaxypassword' if 'kube' in profile: - return (profile['url'], profile['key'], os.path.expanduser(profile['kube'])) - return (profile['url'], profile['key'], None) + kube = os.path.expanduser(profile['kube']) + if 'master' in profile: + master = profile['master'] + return (profile['url'], profile['key'], kube, master) def run(command, env: dict = None): diff --git a/abm/lib/history.py b/abm/lib/history.py index 0e4cc0b..8236590 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -235,7 +235,7 @@ def error_message(msg='Invalid command'): return url = datasets[args[0]] elif len(args) == 3: - server, key = parse_profile(args[0]) + server, key, kube, master = parse_profile(args[0]) if server is None: error_message(f"Invalid server profile name: {args[0]}") return diff --git a/abm/lib/users.py b/abm/lib/users.py index cb98a46..ceacd82 100644 --- a/abm/lib/users.py +++ b/abm/lib/users.py @@ -6,9 +6,7 @@ def do_list(context: Context, args: list): - # TODO the master API key needs to be parameterized or specified in a config file. - context.API_KEY = "galaxypassword" - gi = connect(context) + gi = connect(context, use_master_key=True) user_list = gi.users.get_users() pprint(user_list) @@ -22,9 +20,7 @@ def get_api_key(context: Context, args: list): print("ERROR: no user email given") return - # TODO the master API key needs to be parameterized or specified in a config file. - context.API_KEY = "galaxypassword" - gi = connect(context) + gi = connect(context, use_master_key=True) user_list = gi.users.get_users(f_email=args[0]) if user_list is None or len(user_list) == 0: print("WARNING: no such user") @@ -52,9 +48,7 @@ def create(context: Context, args: list): print(f"ERROR: {email} does not look like a valid email address") return - # TODO the master API key needs to be parameterized or specified in a config file. - context.API_KEY = "galaxypassword" - gi = connect(context) + gi = connect(context, use_master_key=True) user_record = gi.users.create_local_user(name, email, password) id = user_record['id'] key = gi.users.create_user_apikey(id) @@ -67,9 +61,7 @@ def show(context: Context, args: list): print("ERROR: no user email given") return - # TODO the master API key needs to be parameterized or specified in a config file. - context.API_KEY = "galaxypassword" - gi = connect(context) + gi = connect(context, use_master_key=True) id = _get_user_id(gi, args[0]) if id is None: return From 5a85ae11cd4cc410d88635a3640f20646fe9e64d Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 22 May 2024 14:00:39 -0400 Subject: [PATCH 41/56] Add configuration as a command alias --- abm/lib/menu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index a4e3844..4aef10b 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -297,7 +297,7 @@ - name: [url] help: derive the URL to access this Galaxy instance handler: kubectl.url -- name: [config, conf, cfg] +- name: [config, configuration, conf, cfg] help: manage configuration profiles standalone: true menu: From 1b4eeb5b13e80160b6714da4b62206c22c45a674 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 22 May 2024 14:07:15 -0400 Subject: [PATCH 42/56] Update year in copyright notices --- LICENSE | 2 +- abm/__main__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/LICENSE b/LICENSE index cdd6c6d..919ca29 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 Galaxy Project +Copyright (c) 2024 Galaxy Project Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/abm/__main__.py b/abm/__main__.py index 014bada..58f5877 100644 --- a/abm/__main__.py +++ b/abm/__main__.py @@ -3,7 +3,7 @@ """ The Automated Benchmarking Tool -Copyright 2023 The Galaxy Project. All rights reserved. +Copyright 2024 The Galaxy Project. All rights reserved. """ @@ -64,7 +64,7 @@ def command_list(commands: list): def copyright(): - print(f" Copyright 2023 The Galaxy Project. All Rights Reserved.\n") + print(f" Copyright 2024 The Galaxy Project. All Rights Reserved.\n") def print_main_help(menu_data): From 0533f8c08390ebac74b03f3e750840ad621fc86d Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Fri, 24 May 2024 11:31:14 +0530 Subject: [PATCH 43/56] Update samples and cleanup unused --- README.md | 2 +- abm/lib/experiment.py | 1 - abm/lib/history.py | 2 +- abm/lib/threads/Latch.py | 20 ---- bootstrap-config/test.yaml | 11 -- rules/default.yml | 106 ------------------ .../benchmarks/dna-named.yml | 0 .../benchmarks}/example.yml | 0 samples/benchmarks/rna-named.yml | 14 +++ samples/benchmarks/rules/4x8.yml | 0 samples/benchmarks/rules/8x16.yml | 0 samples/experiment.yaml | 11 ++ 12 files changed, 27 insertions(+), 140 deletions(-) delete mode 100644 abm/lib/threads/Latch.py delete mode 100644 bootstrap-config/test.yaml delete mode 100644 rules/default.yml rename benchmarks/dna-named-2.yml => samples/benchmarks/dna-named.yml (100%) rename {benchmarks => samples/benchmarks}/example.yml (100%) create mode 100644 samples/benchmarks/rna-named.yml create mode 100644 samples/benchmarks/rules/4x8.yml create mode 100644 samples/benchmarks/rules/8x16.yml create mode 100644 samples/experiment.yaml diff --git a/README.md b/README.md index 81e023e..c52b5b2 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The `kubectl` program is only required when bootstrapping a new Galaxy instance, ### Credentials -You will need an [API key](https://training.galaxyproject.org/training-material/faqs/galaxy/preferences_admin_api_key.html) for every Galaxy instance you would like to intereact with. You will also need the *kubeconfig* file for each Kubernetes cluster. The `abm` script loads the Galaxy server URLs, API keys, and the location of the *kubeconfig* files from a Yaml configuration file that it expects to find in `$HOME/.abm/profile.yml` or `.abm-profile.yml` in the current directory. You can use the `profile-sample.yml` file as a starting point and it includes the URLs for all Galaxy instances we have used to date (December 22, 2021 as of this writing). +You will need an [API key](https://training.galaxyproject.org/training-material/faqs/galaxy/preferences_admin_api_key.html) for every Galaxy instance you would like to intereact with. You will also need the *kubeconfig* file for each Kubernetes cluster. The `abm` script loads the Galaxy server URLs, API keys, and the location of the *kubeconfig* files from a Yaml configuration file that it expects to find in `$HOME/.abm/profile.yml` or `.abm-profile.yml` in the current directory. You can use the `samples/profile.yml` file as a starting point and it includes the URLs for all Galaxy instances we have used to date (December 22, 2021 as of this writing). :bulb: It is now possible (>=2.0.0) to create Galaxy users and their API keys directly with `abm`. diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 5750fe2..54a1707 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -45,7 +45,6 @@ def run(context: Context, args: list): print(f"Starting with run number {argv.run_number}") profiles = load_profiles() - # latch = CountdownLatch(len(config['cloud'])) threads = [] start = perf_counter() for cloud in config['cloud']: diff --git a/abm/lib/history.py b/abm/lib/history.py index 0e4cc0b..e3fb4b8 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -214,7 +214,7 @@ def error_message(msg='Invalid command'): else: datasets = None config = f'{os.path.dirname(os.path.abspath(__file__))}/histories.yml' - # First load the histories.yml file that is pacakged with abm + # First load the histories.yml file that is packaged with abm if os.path.exists(config): with open(config, 'r') as f: datasets = yaml.safe_load(f) diff --git a/abm/lib/threads/Latch.py b/abm/lib/threads/Latch.py deleted file mode 100644 index 8db238d..0000000 --- a/abm/lib/threads/Latch.py +++ /dev/null @@ -1,20 +0,0 @@ -import threading - - -class CountdownLatch: - def __init__(self, count=1): - self.count = count - self.lock = threading.Condition - - def count_down(self, count=1): - self.lock.acquire(True) - self.count -= count - if self.count <= 0: - self.lock.notifyAll() - self.lock.release() - - def wait(self): - self.lock.acquire(True) - while self.count > 0: - self.lock.wait() - self.lock.release() diff --git a/bootstrap-config/test.yaml b/bootstrap-config/test.yaml deleted file mode 100644 index bf7822a..0000000 --- a/bootstrap-config/test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: Benchmarking DNA -runs: 3 -workflow_conf: - - config/test.yml - - config/dna-named.yml - - config/rna-named.yml -cloud: - - iu2 -job_configs: - - rules/4x8.yml - - rules/8x16.yml \ No newline at end of file diff --git a/rules/default.yml b/rules/default.yml deleted file mode 100644 index c12ead7..0000000 --- a/rules/default.yml +++ /dev/null @@ -1,106 +0,0 @@ -mappings: - summary_stats: - tool_ids: - - Summary_Statistics1 - docker_container_id_override: cloudve/gsummary:latest - resource_set: small - sam_fasta_dm: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/devteam/data_manager_sam_fasta_index_builder/sam_fasta_index_builder/.* - docker_container_id_override: cloudve/sam-fasta-dm:latest - resource_set: small - bwa_dm: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/devteam/data_manager_bwa_mem_index_builder/bwa_mem_index_builder_data_manager/.* - docker_container_id_override: cloudve/bwa-dm:latest - resource_set: small - prokka: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/crs4/prokka/prokka/1.14.5 - docker_container_id_override: cloudve/prokka:1.14.5 - jbrowse: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.5+galaxy6 - docker_container_id_override: cloudve/jbrowse:1.16.5 - lib_galaxy: - tool_ids: - - sort1 - - Grouping1 - docker_container_id_override: galaxy/galaxy-min:21.05 - resource_set: small - set_medium: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/.* - - toolshed.g2.bx.psu.edu/repos/iuc/bwameth/bwameth/.* - - toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/.* - - toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/.* - - toolshed.g2.bx.psu.edu/repos/iuc/valet/valet/.* - - toolshed.g2.bx.psu.edu/repos/iuc/varscan_somatic/varscan_somatic/.* - - toolshed.g2.bx.psu.edu/repos/nilesh/rseqc/rseqc_bam2wig/.* - resource_set: medium - set_large: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/.* - - toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_compare/deeptools_bam_compare/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_pe_fragmentsize/deeptools_bam_pe_fragmentsize/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bigwig_compare/deeptools_bigwig_compare/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_gc_bias/deeptools_compute_gc_bias/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_matrix/deeptools_compute_matrix/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_correct_gc_bias/deeptools_correct_gc_bias/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_multi_bam_summary/deeptools_multi_bam_summary/.* - - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_multi_bigwig_summary/deeptools_multi_bigwig_summary/.* - - toolshed.g2.bx.psu.edu/repos/devteam/freebayes/freebayes/.* - - toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/.* - - toolshed.g2.bx.psu.edu/repos/iuc/rnaspades/rnaspades/.* - - toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fasterq_dump/.* - resource_set: large - set_2xlarge: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/.* - - toolshed.g2.bx.psu.edu/repos/nml/spades/spades/.* - resource_set: 2xlarge - set_mlarge: - tool_ids: - - toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/.* - - toolshed.g2.bx.psu.edu/repos/iuc/plink/plink/.* - resource_set: mlarge -resources: - resource_sets: - small: - requests: - cpu: 1 - memory: 2G - limits: - cpu: 2 - memory: 5G - medium: - requests: - cpu: 2 - memory: 4G - limits: - cpu: 4 - memory: 10G - large: - requests: - cpu: 4 - memory: 8G - limits: - cpu: 8 - memory: 16G - 2xlarge: - requests: - cpu: 12 - memory: 20G - limits: - cpu: 12 - memory: 24G - mlarge: - requests: - cpu: 2 - memory: 16G - limits: - cpu: 4 - memory: 20G - default_resource_set: small diff --git a/benchmarks/dna-named-2.yml b/samples/benchmarks/dna-named.yml similarity index 100% rename from benchmarks/dna-named-2.yml rename to samples/benchmarks/dna-named.yml diff --git a/benchmarks/example.yml b/samples/benchmarks/example.yml similarity index 100% rename from benchmarks/example.yml rename to samples/benchmarks/example.yml diff --git a/samples/benchmarks/rna-named.yml b/samples/benchmarks/rna-named.yml new file mode 100644 index 0000000..b94c0e0 --- /dev/null +++ b/samples/benchmarks/rna-named.yml @@ -0,0 +1,14 @@ +- workflow_id: d6d3c2119c4849e4 + output_history_base_name: RNA-seq + reference_data: + - name: Reference Transcript (FASTA) + dataset_id: 50a269b7a99356aa + runs: + - history_name: 1 + inputs: + - name: FASTQ RNA Dataset + dataset_id: 28fa757e56346a34 + - history_name: 2 + inputs: + - name: FASTQ RNA Dataset + dataset_id: 1faa2d3b2ed5c436 diff --git a/samples/benchmarks/rules/4x8.yml b/samples/benchmarks/rules/4x8.yml new file mode 100644 index 0000000..e69de29 diff --git a/samples/benchmarks/rules/8x16.yml b/samples/benchmarks/rules/8x16.yml new file mode 100644 index 0000000..e69de29 diff --git a/samples/experiment.yaml b/samples/experiment.yaml new file mode 100644 index 0000000..b4874f0 --- /dev/null +++ b/samples/experiment.yaml @@ -0,0 +1,11 @@ +name: Benchmarking DNA +runs: 3 +workflow_conf: + - benchmarks/example.yml + - benchmarks/dna-named.yml + - benchmarks/rna-named.yml +cloud: + - iu2 +job_configs: + - rules/4x8.yml + - rules/8x16.yml From 40792ca03dd9069b3ccaba5a67b40139badf4bb2 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 28 May 2024 19:22:35 -0400 Subject: [PATCH 44/56] Add --no-tools flag for workflow import and upload --- abm/lib/menu.yml | 4 +-- abm/lib/workflow.py | 63 +++++++++++++++++++++++++++++++++------------ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 4aef10b..9ee7f3e 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -26,11 +26,11 @@ menu: - name: ['upload', 'up'] handler: workflow.upload - params: PATH + params: PATH [-n|--no-tools] help: 'upload a workflow file to the server' - name: ['import', 'imp'] handler: workflow.import_from_config - params: NAME + params: NAME [-n|--no-tools] help: 'import a workflow defined in ~/.abm/workflows.yml' - name: ['download', 'dl'] handler: workflow.download diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 6282158..3a20885 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -35,30 +35,48 @@ def delete(context: Context, args: list): def upload(context: Context, args: list): - if len(args) == 0: - print('ERROR: no workflow file given') + path = None + install = True + for arg in args: + if arg in ['-n', '--no-tools']: + print("Skipping tools") + install = False + else: + path = arg + if path is None: + print("ERROR: no workflow given") return - path = args[0] + if path.startswith('http'): import_from_url(context, args) return if not os.path.exists(path): print(f'ERROR: file not found: {path}') return + print("Uploading workflow") gi = connect(context) print("Importing the workflow") pprint(gi.workflows.import_workflow_from_local_path(path, publish=True)) runnable = for_path(path) - print("Installing tools") - result = install_shed_repos(runnable, gi, False) - pprint(result) + if install: + print("Installing tools") + result = install_shed_repos(runnable, gi, False) + pprint(result) def import_from_url(context: Context, args: list): - if len(args) == 0: - print("ERROR: no workflow URL given") + print("Importing workflow from URL") + url = None + install = True + for arg in args: + if arg in ['-n', '--no-tools']: + print("Skipping tools") + install = False + else: + url = arg + if url is None: + print("ERROR: no URL given") return - url = args[0] # There is a bug in ephemeris (for lack of a better term) that assumes all # Runnable objects can be found on the local file system @@ -93,17 +111,27 @@ def import_from_url(context: Context, args: list): result = gi.workflows.import_workflow_dict(workflow, publish=True) print(json.dumps(result, indent=4)) runnable = for_path(cached_file) - # runnable = for_uri(url) - print("Installing tools") - result = install_shed_repos(runnable, gi, False, install_tool_dependencies=True) - pprint(result) + if install: + print("Installing tools") + result = install_shed_repos(runnable, gi, False, install_tool_dependencies=True) + pprint(result) def import_from_config(context: Context, args: list): - if len(args) == 0: + print("Importing workflow from configuration") + key = None + install = True + for arg in args: + if arg in ['-n', '--no-tools']: + print("Skipping tools") + install = False + else: + key = arg + if key is None: print("ERROR: no workflow ID given") return - key = args[0] + + userfile = os.path.join(Path.home(), ".abm", "workflows.yml") if not os.path.exists(userfile): print("ERROR: this instance has not been configured to import workflows.") @@ -116,7 +144,10 @@ def import_from_config(context: Context, args: list): return url = workflows[key] - import_from_url(context, [url]) + argv = [url] + if not install: + argv.append('-n') + import_from_url(context, argv) def download(context: Context, args: list): From 88e13b81536f9c4e41a18b8f1e2a4d4ee05064e8 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 28 May 2024 20:28:25 -0400 Subject: [PATCH 45/56] Bump dev version --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index 80a9e43..d501693 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.7 +2.9.0-dev.8 \ No newline at end of file From 2ad1a1e8260c5fae3f7b1571c90ca7e8ce69a7dd Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 28 May 2024 20:28:50 -0400 Subject: [PATCH 46/56] Update bump script --- bump | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/bump b/bump index 87bac5a..2da9275 100755 --- a/bump +++ b/bump @@ -19,23 +19,20 @@ def main(): with open(VERSION_FILE, 'r') as f: version_string = f.read().strip() - - parts = version_string.split('.') + + prefix = version_string + suffix = None + release = None + build = None + if '-' in version_string: + # This is a development build + prefix, suffix = version_string.split('-') + release,build = suffix.split('.') + build = int(build) + parts = prefix.split('.') major = int(parts[0]) minor = int(parts[1]) - release = None - if '-' in parts[2]: - revs = parts[2].split('-') - revision = int(revs[0]) - if 'dev' in revs[1]: - release = 'dev' - build = int(revs[1].replace('dev', '')) - elif 'rc' in revs[1]: - release = 'rc' - build = int(revs[1].replace('rc', '')) - else: - revision = int(parts[2]) - build = None + revision = int(parts[2]) if sys.argv[1] in ['major', 'minor', 'revision'] and release is not None: print(f"ERROR: Cannot bump the {sys.argv[1]} version for a development build") @@ -75,7 +72,7 @@ def main(): if build is None: version_string = f"{major}.{minor}.{revision}" else: - version_string = f"{major}.{minor}.{revision}-{release}{build}" + version_string = f"{major}.{minor}.{revision}-{release}.{build}" with open(VERSION_FILE, 'w') as f: f.write(version_string) From 676572ceba03905ab9af15ec70491f3b37514e2b Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 29 May 2024 12:46:30 -0400 Subject: [PATCH 47/56] Try to fix problems resolving dataset collection IDs --- abm/__init__.py | 2 -- abm/lib/benchmark.py | 27 +++++++++++++++++++-------- abm/lib/common.py | 9 +++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/abm/__init__.py b/abm/__init__.py index adc875e..fb8a5b7 100644 --- a/abm/__init__.py +++ b/abm/__init__.py @@ -1,8 +1,6 @@ import os import sys -import yaml - sys.path.append(os.path.dirname(os.path.realpath(__file__))) diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index af2ecf8..b9962a2 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -145,11 +145,13 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): dsid = find_collection_id(gi, dsname) dsdata = _get_dataset_data(gi, dsid) if dsdata is None: - raise Exception( - f"ERROR: unable to resolve {dsname} to a dataset." - ) - dsid = dsdata['id'] - dssize = dsdata['size'] + # raise Exception( + # f"ERROR: unable to resolve {dsname} to a dataset." + # ) + dssize = 0 + else: + dsid = dsdata['id'] + dssize = dsdata['size'] input_data_size.append(dssize) print(f"Input collection ID: {dsname} [{dsid}] {dssize}") inputs[input[0]] = {'id': dsid, 'src': 'hdca', 'size': dssize} @@ -625,7 +627,16 @@ def test(context: Context, args: list): :param args: varies :return: varies, typically None. """ - id = 'c90fffcf98b31cd3' + # id = 'c90fffcf98b31cd3' + # gi = connect(context) + # inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input') + # pprint(inputs) + gi = connect(context) - inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input') - pprint(inputs) + print("Calling find_collection_id") + dsid = find_collection_id(gi, args[0]) + print(f"Collection ID: {dsid}") + print("Calling _get_dataset_data") + dsdata = _get_dataset_data(gi, dsid) + pprint(dsdata) + diff --git a/abm/lib/common.py b/abm/lib/common.py index 0fe6b3f..f75fe3a 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -406,6 +406,7 @@ def find_history(gi, name_or_id): def _get_dataset_data(gi, name_or_id): + print(f"Getting dataset data for {name_or_id}") def make_result(data): return { 'id': data['id'], @@ -415,14 +416,18 @@ def make_result(data): try: ds = gi.datasets.show_dataset(name_or_id) + print(f"Got dataset data for {name_or_id} {ds['id']}") return make_result(ds) except Exception as e: + print(f"Failed to get dataset data for {name_or_id}") pass try: + print("Getting all datasets") datasets = gi.datasets.get_datasets( name=name_or_id ) # , deleted=True, purged=True) + print(f"List of datasets for {name_or_id} is {len(datasets)}") for ds in datasets: # print_json(ds) state = True @@ -431,7 +436,11 @@ def make_result(data): if state and not ds['deleted'] and ds['visible']: # The dict returned by get_datasets does not include the input # file sizes so we need to make another call to show_datasets + print(f"Getting dataset data for {ds['id']}") return make_result(gi.datasets.show_dataset(ds['id'])) + else: + print(f"Skipping dataset {ds['id']}") + print_json(ds) except Exception as e: pass From fd36cd36a0a1d048e8ff505e78a02b18beebb4ab Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 29 May 2024 15:08:27 -0400 Subject: [PATCH 48/56] Bump build number --- abm/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abm/VERSION b/abm/VERSION index d501693..ea61abf 100644 --- a/abm/VERSION +++ b/abm/VERSION @@ -1 +1 @@ -2.9.0-dev.8 \ No newline at end of file +2.9.0-dev.9 \ No newline at end of file From 6bc34397345748aeab9142c6e2f9fa410e28cf09 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 4 Jun 2024 10:54:19 -0400 Subject: [PATCH 49/56] Search for a local .abm directory before using the global directory --- abm/lib/common.py | 12 ++++++++ abm/lib/dataset.py | 52 +++++++++++++++++++--------------- abm/lib/history.py | 68 ++++++++++++++------------------------------- abm/lib/workflow.py | 16 ++++++----- 4 files changed, 72 insertions(+), 76 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index f75fe3a..045ecd4 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -2,6 +2,8 @@ import os import subprocess import sys +from pathlib import Path + from math import ceil import bioblend.galaxy @@ -405,6 +407,16 @@ def find_history(gi, name_or_id): return history[0]['id'] +def find_config(name: str) -> str: + if os.path.exists(".abm"): + if os.path.exists(f".abm/{name}"): + return f".abm/{name}" + config = os.path.join(Path.home(), ".abm", name) + if os.path.exists(config): + return config + return None + + def _get_dataset_data(gi, name_or_id): print(f"Getting dataset data for {name_or_id}") def make_result(data): diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index a4f022c..829dde4 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -7,7 +7,7 @@ import yaml from bioblend.galaxy import dataset_collections from common import (Context, _get_dataset_data, _make_dataset_element, connect, - find_history, print_json) + find_history, print_json, find_config) def do_list(context: Context, argv: list): @@ -162,31 +162,42 @@ def collection(context: Context, args: list): def import_from_config(context: Context, args: list): + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--create', help='create a new history for the dataset', action='store_true') + parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None) + parser.add_argument('--history', help='add datasets to the given history', required=False, default=None) + parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None) + parser.add_argument('key', help='the key of the dataset to import') gi = None key = None history = None kwargs = {} - while len(args) > 0: - arg = args.pop(0) - if arg in ['--hs', '--hist', '--history']: - history = args.pop(0) - elif arg in ['-c', '--create']: - gi = connect(context) - history = gi.histories.create_history(args.pop(0)).get('id') - elif arg in ['-n', '--name']: - kwargs['file_name'] = args.pop(0) - elif key is not None: - print(f"ERROR: key already set: {key}") - return - else: - key = arg + argv = parser.parse_args(args) + if argv.name is not None: + kwargs['file_name'] = argv.name - configfile = os.path.join(Path.home(), '.abm', 'datasets.yml') - if not os.path.exists(configfile): - print("ERROR: ABM has not been configured to import datasets.") - print(f"Please create {configfile}") + if argv.create and argv.history is not None: + print("ERROR: cannot specify both --create and --history") return + if argv.create: + gi = connect(context) + history = gi.histories.create_history(argv.key).get('id') + if argv.history is not None: + history = find_history(gi, argv.history) + key = argv.key + if argv.file is not None: + configfile = argv.file + if not os.path.exists(configfile): + print(f"ERROR: the specified file {configfile} was not found") + return + else: + configfile = find_config("datasets.yml") + if configfile is None: + print("ERROR: ABM has not been configured to import datasets.") + print(f"Please create {configfile}") + return + with open(configfile, 'r') as f: datasets = yaml.safe_load(f) if not key in datasets: @@ -196,9 +207,6 @@ def import_from_config(context: Context, args: list): if gi is None: gi = connect(context) - if history is not None: - history = find_history(gi, history) - response = gi.tools.put_url(url, history, **kwargs) print(json.dumps(response, indent=4)) diff --git a/abm/lib/history.py b/abm/lib/history.py index b19233b..1f13cbe 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -10,7 +10,7 @@ from bioblend.galaxy.objects import GalaxyInstance from lib.common import (Context, connect, find_history, parse_profile, print_json, summarize_metrics, print_markdown_table, - get_float_key, get_str_key, print_table_header, try_for) + get_float_key, get_str_key, print_table_header, try_for, find_config) # # History related functions @@ -194,55 +194,29 @@ def _import(context: Context, args: list): def himport(context: Context, args: list): - def error_message(msg='Invalid command'): - print(f"ERROR: {msg}") - print(f"USAGE: {sys.argv[0]} history import SERVER HISTORY_ID JEHA_ID") - print(f" {sys.argv[0]} history import http://GALAXY_SERVER_URL") - print(f" {sys.argv[0]} history import [dna|rna]") - - wait = True - if '-n' in args: - args.remove('-n') - wait = False - if '--no-wait' in args: - args.remove('--no-wait') - wait = False + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--no-wait', action='store_true', help='Do not wait for the import to complete', default=False) + parser.add_argument('-f', '--file', help='Use the specified histories.yml file', required=False, default=None) + parser.add_argument('identifier', help='The history alias or URL to import', required=True) + argv = parser.parse_args(args) - if len(args) == 1: - if 'http' in args[0]: - url = args[0] + wait = not argv.no_wait + if argv.identifier.startswith('http'): + url = argv.identifier + else: + if argv.file is not None: + config = argv.file else: - datasets = None - config = f'{os.path.dirname(os.path.abspath(__file__))}/histories.yml' - # First load the histories.yml file that is packaged with abm - if os.path.exists(config): - with open(config, 'r') as f: - datasets = yaml.safe_load(f) - # Then load the user histories.yml, if any - userfile = os.path.join(Path.home(), ".abm", "histories.yml") - if os.path.exists(userfile): - if datasets is None: - datasets = {} - with open(userfile, 'r') as f: - userdata = yaml.safe_load(f) - for key, item in userdata.items(): - datasets[key] = item - if datasets is None: - error_message("No history URLs have been configured.") - return - if not args[0] in datasets: - error_message('Please specify a URL or name of the history to import') - return - url = datasets[args[0]] - elif len(args) == 3: - server, key, kube, master = parse_profile(args[0]) - if server is None: - error_message(f"Invalid server profile name: {args[0]}") + config = find_config("histories.yml") + if config is None: + print("ERROR: No histories.yml file found.") return - url = f"{server}history/export_archive?id={args[1]}&jeha_id={args[2]}" - else: - error_message() - return + with open(config, 'r') as f: + histories = yaml.safe_load(f) + if argv.identifier not in histories: + print(f"ERROR: No such history {argv.identifier}") + return + url = histories[argv.identifier] gi = connect(context) print(f"Importing history from {url}") diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 3a20885..9f6f2ac 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -8,7 +8,7 @@ import requests import yaml from common import Context, connect, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \ - print_table_header + print_table_header, find_config from planemo.galaxy.workflows import install_shed_repos from planemo.runnable import for_path, for_uri @@ -118,26 +118,28 @@ def import_from_url(context: Context, args: list): def import_from_config(context: Context, args: list): - print("Importing workflow from configuration") key = None install = True + config = None for arg in args: if arg in ['-n', '--no-tools']: print("Skipping tools") install = False + elif arg in ['-f', '--file']: + config = arg else: key = arg if key is None: print("ERROR: no workflow ID given") return - - userfile = os.path.join(Path.home(), ".abm", "workflows.yml") - if not os.path.exists(userfile): + if config is None: + config = find_config("workflows.yml") + if config is None: print("ERROR: this instance has not been configured to import workflows.") - print(f"Please configure {userfile} to enable workflow imports") + print(f"Please configure a workflows.yml file to enable imports") return - with open(userfile, 'r') as f: + with open(config, 'r') as f: workflows = yaml.safe_load(f) if not key in workflows: print(f"ERROR: no such workflow: {key}") From 5e9ab1756743853cf92e775710bb97073a2ad057 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Tue, 4 Jun 2024 17:02:27 -0400 Subject: [PATCH 50/56] Allow multiple datasets to be imported at once. --- abm/lib/dataset.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 829dde4..2f1cb9a 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -167,13 +167,15 @@ def import_from_config(context: Context, args: list): parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None) parser.add_argument('--history', help='add datasets to the given history', required=False, default=None) parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None) - parser.add_argument('key', help='the key of the dataset to import') + parser.add_argument('keys', help='the key of the dataset to import', nargs='+') gi = None - key = None history = None kwargs = {} argv = parser.parse_args(args) if argv.name is not None: + if len(argv.keys) > 1: + print("ERROR: cannot specify --name with multiple keys") + return kwargs['file_name'] = argv.name if argv.create and argv.history is not None: @@ -185,7 +187,6 @@ def import_from_config(context: Context, args: list): history = gi.histories.create_history(argv.key).get('id') if argv.history is not None: history = find_history(gi, argv.history) - key = argv.key if argv.file is not None: configfile = argv.file if not os.path.exists(configfile): @@ -197,18 +198,18 @@ def import_from_config(context: Context, args: list): print("ERROR: ABM has not been configured to import datasets.") print(f"Please create {configfile}") return - with open(configfile, 'r') as f: datasets = yaml.safe_load(f) - if not key in datasets: - print(f"ERROR: dataset {key} has not been defined.") - return - url = datasets[key] - if gi is None: gi = connect(context) - response = gi.tools.put_url(url, history, **kwargs) - print(json.dumps(response, indent=4)) + for key in argv.keys: + if not key in datasets: + print(f"ERROR: dataset {key} has not been defined.") + else: + url = datasets[key] + print(f"Importing {key} from {url}") + response = gi.tools.put_url(url, history, **kwargs) + print(json.dumps(response, indent=4)) def _import_from_url(gi, history, url, **kwargs): From 715098a37d795a874591e435e2daa5734fd98443 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Wed, 5 Jun 2024 13:25:58 -0400 Subject: [PATCH 51/56] Use local .abm directory for configurations if it exists. --- abm/lib/common.py | 24 ++++++++++++++++++++++++ abm/lib/dataset.py | 25 +++++++++++++++++-------- abm/lib/history.py | 2 +- abm/lib/menu.yml | 4 ++-- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/abm/lib/common.py b/abm/lib/common.py index 045ecd4..6ab69bc 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -407,6 +407,30 @@ def find_history(gi, name_or_id): return history[0]['id'] +def find_dataset(gi, history_id, name_or_id): + try: + dataset = gi.datasets.show_dataset(name=name_or_id) + return dataset['id'] + except: + pass + + try: + dataset = gi.datasets.show_dataset(name_or_id) + return dataset['id'] + except: + pass + return None + # print("Calling get_datasets") + # datasets = gi.datasets.get_datasets(history_id=history_id, name=name_or_id) + # if datasets is None: + # print("Not found") + # return None + # if len(datasets) == 0: + # print("No datasets found (len == 0)") + # return None + # return datasets[0]['id'] + + def find_config(name: str) -> str: if os.path.exists(".abm"): if os.path.exists(f".abm/{name}"): diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 2f1cb9a..3f8d7a1 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -7,7 +7,7 @@ import yaml from bioblend.galaxy import dataset_collections from common import (Context, _get_dataset_data, _make_dataset_element, connect, - find_history, print_json, find_config) + find_history, print_json, find_config, find_dataset) def do_list(context: Context, argv: list): @@ -163,7 +163,7 @@ def collection(context: Context, args: list): def import_from_config(context: Context, args: list): parser = argparse.ArgumentParser() - parser.add_argument('-c', '--create', help='create a new history for the dataset', action='store_true') + parser.add_argument('-c', '--create', help='create a new history for the dataset', required=False, default=None) parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None) parser.add_argument('--history', help='add datasets to the given history', required=False, default=None) parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None) @@ -178,14 +178,15 @@ def import_from_config(context: Context, args: list): return kwargs['file_name'] = argv.name - if argv.create and argv.history is not None: + if argv.create is not None and argv.history is not None: print("ERROR: cannot specify both --create and --history") return - if argv.create: + if argv.create is not None: gi = connect(context) - history = gi.histories.create_history(argv.key).get('id') + history = gi.histories.create_history(argv.create).get('id') if argv.history is not None: + gi = connect(context) history = find_history(gi, argv.history) if argv.file is not None: configfile = argv.file @@ -249,9 +250,17 @@ def rename(context: Context, args: list): print("ERROR: please provide the history ID, dataset ID, and new name.") return gi = connect(context) - response = gi.histories.update_dataset(args[0], args[1], name=args[2]) - result = {'state': response['state'], 'name': response['name']} - print(json.dumps(result, indent=4)) + hid = find_history(gi, args[0]) + if hid is None: + print("ERROR: no such history") + return + dsid = find_dataset(gi, hid, args[1]) + if dsid is None: + print("ERROR: no such dataset") + return + response = gi.histories.update_dataset(hid, dsid, name=args[2]) + # result = {'state': response['state'], 'name': response['name']} + print(json.dumps(response, indent=4)) def test(context: Context, args: list): diff --git a/abm/lib/history.py b/abm/lib/history.py index 1f13cbe..12cd35e 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -197,7 +197,7 @@ def himport(context: Context, args: list): parser = argparse.ArgumentParser() parser.add_argument('-n', '--no-wait', action='store_true', help='Do not wait for the import to complete', default=False) parser.add_argument('-f', '--file', help='Use the specified histories.yml file', required=False, default=None) - parser.add_argument('identifier', help='The history alias or URL to import', required=True) + parser.add_argument('identifier', help='The history alias or URL to import') argv = parser.parse_args(args) wait = not argv.no_wait diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 9ee7f3e..90f877f 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -89,8 +89,8 @@ help: download a dataset from the server - name: ['import', 'imp'] handler: dataset.import_from_config - params: KEY [--hs|--hist|--history HISTORY_ID | -c|--create "History name"] - help: imports a dataset to the server from a URL specified in the datasets.yml config file. + params: '[--hs|--hist|--history HISTORY_ID | -c|--create "History name"] KEY [KEY...]' + help: imports one or more datasets to the server from a URL specified in the datasets.yml config file. - name: ['list', 'ls'] handler: dataset.do_list help: lists all the datasets on the server From 774f956d6d063ef3fadc2e98fbcbca580c2f0afa Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Thu, 6 Jun 2024 14:37:21 -0400 Subject: [PATCH 52/56] Use gi.jobs.get_metrics(job_id) to get the metrics for a job before writing the metrics file. --- abm/lib/benchmark.py | 1 + abm/lib/common.py | 3 ++- abm/lib/experiment.py | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index b9962a2..f9c5655 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -456,6 +456,7 @@ def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict): jobs = gi.jobs.get_jobs(history_id=hid) for job in jobs: data = gi.jobs.show_job(job['id'], full_details=True) + data['job_metrics'] = gi.jobs.get_job_metrics(job['id']) metrics = { 'run': run, 'cloud': cloud, diff --git a/abm/lib/common.py b/abm/lib/common.py index f75fe3a..a6d113d 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -287,7 +287,8 @@ def find_executable(name): "galaxy_slots", # "memory.failcnt", "memory.limit_in_bytes", - "memory.max_usage_in_bytes", + "memory.peak", + #"memory.max_usage_in_bytes", # "memory.memsw.limit_in_bytes", # "memory.memsw.max_usage_in_bytes", # "memory.oom_control.oom_kill_disable", diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index 54a1707..ae0addc 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -233,7 +233,8 @@ def summarize(context: Context, args: list): 'runtime_seconds', 'cpuacct.usage', 'memory.limit_in_bytes', - 'memory.max_usage_in_bytes', + 'memory.peak' + #'memory.max_usage_in_bytes', ] # ,'memory.soft_limit_in_bytes'] From 1a86f2f7b34ed9f94b2015a9f9a175be547309e2 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 12 Jul 2024 22:11:27 -0400 Subject: [PATCH 53/56] Allow --history to be specified by name --- abm/lib/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 3f8d7a1..373a002 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -118,6 +118,7 @@ def upload(context: Context, args: list): return if gi is None: gi = connect(context) + history = find_history(gi, history) if name: _import_from_url(gi, history, url, file_name=name) else: From 6de42f9ecc5176cd16d6b4e8b54069a4b3bd91fb Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 12 Jul 2024 22:11:54 -0400 Subject: [PATCH 54/56] Document --name option when uploading/downloading datasets --- abm/lib/menu.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml index 90f877f..eab2d00 100644 --- a/abm/lib/menu.yml +++ b/abm/lib/menu.yml @@ -81,7 +81,7 @@ menu: - name: ['upload', 'up'] handler: dataset.upload - params: PATH [-id HISTORY_ID | -c "History name"] + params: PATH [--history "History name_or_id" | -c|--create "History name"] [-m|--name "Dataset name"] help: upload a dataset to the server from the specified URL - name: ['download', 'dl'] handler: dataset.download @@ -89,7 +89,7 @@ help: download a dataset from the server - name: ['import', 'imp'] handler: dataset.import_from_config - params: '[--hs|--hist|--history HISTORY_ID | -c|--create "History name"] KEY [KEY...]' + params: '[--hs|--hist|--history HISTORY_ID | -c|--create "History name"] [-m|--name "Dataset name"] KEY [KEY...]' help: imports one or more datasets to the server from a URL specified in the datasets.yml config file. - name: ['list', 'ls'] handler: dataset.do_list From 71f9aea26bb77aa7650dec0bee8209446a0a688a Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 12 Jul 2024 22:12:23 -0400 Subject: [PATCH 55/56] Update requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7284f55..bc7653c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ bioblend pyyaml planemo -cloudlaunch-cli \ No newline at end of file +cloudlaunch-cli From da7f6f4222c7b7bae720c85e978b347e352511b9 Mon Sep 17 00:00:00 2001 From: Keith Suderman Date: Fri, 12 Jul 2024 22:27:01 -0400 Subject: [PATCH 56/56] Format code with Black and iSort --- abm/lib/benchmark.py | 7 ++++--- abm/lib/common.py | 26 ++++++++++++++++++++------ abm/lib/dataset.py | 29 ++++++++++++++++++++++++----- abm/lib/experiment.py | 7 +++++-- abm/lib/helm.py | 3 ++- abm/lib/history.py | 30 +++++++++++++++++++++++------- abm/lib/invocation.py | 6 ++++-- abm/lib/job.py | 5 +++-- abm/lib/workflow.py | 5 +++-- 9 files changed, 88 insertions(+), 30 deletions(-) diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py index f9c5655..a1a1b69 100644 --- a/abm/lib/benchmark.py +++ b/abm/lib/benchmark.py @@ -187,7 +187,9 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): # print(f"Getting dataset for {key} = {item[key]}") value = _get_dataset_data(gi, item[key]) if value is None: - print(f"ERROR: Unable to find dataset {item[key]}") + print( + f"ERROR: Unable to find dataset {item[key]}" + ) return if size in value: size += value['size'] @@ -231,7 +233,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str): else: raise Exception(f'Invalid input value') print(f"Running workflow {wfid} in history {new_history_name}") - f = lambda : gi.workflows.invoke_workflow( + f = lambda: gi.workflows.invoke_workflow( wfid, inputs=inputs, history_name=new_history_name ) invocation = try_for(f, 3) @@ -640,4 +642,3 @@ def test(context: Context, args: list): print("Calling _get_dataset_data") dsdata = _get_dataset_data(gi, dsid) pprint(dsdata) - diff --git a/abm/lib/common.py b/abm/lib/common.py index 397f086..dd5ff2f 100644 --- a/abm/lib/common.py +++ b/abm/lib/common.py @@ -2,9 +2,8 @@ import os import subprocess import sys -from pathlib import Path - from math import ceil +from pathlib import Path import bioblend.galaxy import lib @@ -59,11 +58,17 @@ class Context: API_KEY : a user's API key to make API calls on the Galaxy instance KUBECONFIG: : the kubeconfig file needed to make changes via Helm """ + def __init__(self, *args): if len(args) == 1: arg = args[0] if type(arg) == str: - self.GALAXY_SERVER, self.API_KEY, self.KUBECONFIG, self.MASTER_KEY = parse_profile(arg) + ( + self.GALAXY_SERVER, + self.API_KEY, + self.KUBECONFIG, + self.MASTER_KEY, + ) = parse_profile(arg) elif type(arg) == dict: self.GALAXY_SERVER = arg['GALAXY_SERVER'] self.API_KEY = arg['API_KEY'] @@ -128,7 +133,9 @@ def _set_active_profile(profile_name: str): :param profile_name: :return: """ - lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG, lib.MASTER_KEY = parse_profile(profile_name) + lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG, lib.MASTER_KEY = parse_profile( + profile_name + ) return lib.GALAXY_SERVER != None @@ -290,7 +297,7 @@ def find_executable(name): # "memory.failcnt", "memory.limit_in_bytes", "memory.peak", - #"memory.max_usage_in_bytes", + # "memory.max_usage_in_bytes", # "memory.memsw.limit_in_bytes", # "memory.memsw.max_usage_in_bytes", # "memory.oom_control.oom_kill_disable", @@ -304,6 +311,7 @@ def find_executable(name): # "uname" ] + def print_table_header(): """ Prints the table header suitable for inclusion in CSV files. @@ -314,6 +322,8 @@ def print_table_header(): history_name_cache = dict() + + def get_history_name(gi, hid: str) -> str: if hid in history_name_cache: return history_name_cache[hid] @@ -444,6 +454,7 @@ def find_config(name: str) -> str: def _get_dataset_data(gi, name_or_id): print(f"Getting dataset data for {name_or_id}") + def make_result(data): return { 'id': data['id'], @@ -488,17 +499,20 @@ def _make_dataset_element(name, value): # print(f"Making dataset element for {name} = {value}({type(value)})") return dataset_collections.HistoryDatasetElement(name=name, id=value) + def get_float_key(column: int): def get_key(row: list): if row[column] == '': return -1 return float(row[column]) + return get_key + def get_str_key(column: int): # print(f"Getting string key for column {column}") def get_key(row: list): # print(f"Sorting by column {column} key {row[column]}") return row[column] - return get_key + return get_key diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py index 373a002..9ac3811 100644 --- a/abm/lib/dataset.py +++ b/abm/lib/dataset.py @@ -7,7 +7,7 @@ import yaml from bioblend.galaxy import dataset_collections from common import (Context, _get_dataset_data, _make_dataset_element, connect, - find_history, print_json, find_config, find_dataset) + find_config, find_dataset, find_history, print_json) def do_list(context: Context, argv: list): @@ -164,10 +164,29 @@ def collection(context: Context, args: list): def import_from_config(context: Context, args: list): parser = argparse.ArgumentParser() - parser.add_argument('-c', '--create', help='create a new history for the dataset', required=False, default=None) - parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None) - parser.add_argument('--history', help='add datasets to the given history', required=False, default=None) - parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None) + parser.add_argument( + '-c', + '--create', + help='create a new history for the dataset', + required=False, + default=None, + ) + parser.add_argument( + '-f', + '--file', + help='use instead of the datasets.yml', + required=False, + default=None, + ) + parser.add_argument( + '--history', + help='add datasets to the given history', + required=False, + default=None, + ) + parser.add_argument( + '-n', '--name', help='set the name of the dataset', required=False, default=None + ) parser.add_argument('keys', help='the key of the dataset to import', nargs='+') gi = None history = None diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py index ae0addc..1e35d6e 100644 --- a/abm/lib/experiment.py +++ b/abm/lib/experiment.py @@ -11,7 +11,8 @@ import benchmark import helm import yaml -from common import Context, load_profiles, print_markdown_table, get_str_key, get_float_key +from common import (Context, get_float_key, get_str_key, load_profiles, + print_markdown_table) INVOCATIONS_DIR = "invocations" METRICS_DIR = "metrics" @@ -221,7 +222,9 @@ def summarize(context: Context, args: list): # cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}" memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}" # memory = float(row[13]) / GB - print(f"| {row[0]} | {row[5].split(' ')[0]} |{row[2]} | {row[6]} | {row[7]} | {runtime} | {memory} |") + print( + f"| {row[0]} | {row[5].split(' ')[0]} |{row[2]} | {row[6]} | {row[7]} | {runtime} | {memory} |" + ) else: for row in table: print(separator.join([str(x) for x in row])) diff --git a/abm/lib/helm.py b/abm/lib/helm.py index da92bc3..b1cfa20 100644 --- a/abm/lib/helm.py +++ b/abm/lib/helm.py @@ -146,7 +146,8 @@ def wait_until_ready(namespace: str, env: dict): for deployment in deployments: print( run( - f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch", env + f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch", + env, ) ) diff --git a/abm/lib/history.py b/abm/lib/history.py index 12cd35e..bc78e95 100644 --- a/abm/lib/history.py +++ b/abm/lib/history.py @@ -8,9 +8,10 @@ import yaml from bioblend.galaxy.objects import GalaxyInstance -from lib.common import (Context, connect, find_history, parse_profile, - print_json, summarize_metrics, print_markdown_table, - get_float_key, get_str_key, print_table_header, try_for, find_config) +from lib.common import (Context, connect, find_config, find_history, + get_float_key, get_str_key, parse_profile, print_json, + print_markdown_table, print_table_header, + summarize_metrics, try_for) # # History related functions @@ -19,6 +20,7 @@ # The number of times a failed job will be restarted. RESTART_MAX = 3 + def longest_name(histories: list): longest = 0 for history in histories: @@ -195,8 +197,20 @@ def _import(context: Context, args: list): def himport(context: Context, args: list): parser = argparse.ArgumentParser() - parser.add_argument('-n', '--no-wait', action='store_true', help='Do not wait for the import to complete', default=False) - parser.add_argument('-f', '--file', help='Use the specified histories.yml file', required=False, default=None) + parser.add_argument( + '-n', + '--no-wait', + action='store_true', + help='Do not wait for the import to complete', + default=False, + ) + parser.add_argument( + '-f', + '--file', + help='Use the specified histories.yml file', + required=False, + default=None, + ) parser.add_argument('identifier', help='The history alias or URL to import') argv = parser.parse_args(args) @@ -373,14 +387,16 @@ def wait(context: Context, args: list): wait_for(gi, history_id) -def kill_all_jobs(gi: GalaxyInstance, job_list:list): +def kill_all_jobs(gi: GalaxyInstance, job_list: list): cancel_states = ['new', 'running', 'paused'] for job in job_list: if job['state'] in cancel_states: print(f"Cancelling job {job['tool_id']}") gi.jobs.cancel_job(job['id']) else: - print(f"Job {job['id']} for tool {job['tool_id']} is in state {job['state']}") + print( + f"Job {job['id']} for tool {job['tool_id']} is in state {job['state']}" + ) def wait_for(gi: GalaxyInstance, history_id: str): diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py index 84e3906..c76e200 100644 --- a/abm/lib/invocation.py +++ b/abm/lib/invocation.py @@ -1,6 +1,8 @@ import argparse -from common import Context, connect, print_json, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \ - print_table_header, print_yaml + +from common import (Context, connect, get_float_key, get_str_key, print_json, + print_markdown_table, print_table_header, print_yaml, + summarize_metrics) def doList(context: Context, args: list): diff --git a/abm/lib/job.py b/abm/lib/job.py index 4973209..92ae693 100644 --- a/abm/lib/job.py +++ b/abm/lib/job.py @@ -1,8 +1,9 @@ +import argparse import datetime import json import logging import time -import argparse + from .common import Context, connect, find_history, print_json log = logging.getLogger('abm') @@ -61,7 +62,7 @@ def wait(context: Context, args: list): timeout = params.timeout job_id = params.job_id gi = connect(context) - start_time = time.time() # we only interested in precision to the second + start_time = time.time() # we only interested in precision to the second waiting = True while waiting: job = gi.jobs.show_job(job_id, full_details=False) diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py index 9f6f2ac..e14bbf9 100644 --- a/abm/lib/workflow.py +++ b/abm/lib/workflow.py @@ -7,8 +7,9 @@ import requests import yaml -from common import Context, connect, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \ - print_table_header, find_config +from common import (Context, connect, find_config, get_float_key, get_str_key, + print_markdown_table, print_table_header, + summarize_metrics) from planemo.galaxy.workflows import install_shed_repos from planemo.runnable import for_path, for_uri