From 60aba883c8991bdc26d2d93ec018d97a9495acf6 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Mon, 11 Dec 2023 16:04:10 -0500
Subject: [PATCH 01/56] Modify summarize_metrics to return a table of data
 rather than printing directly

---
 abm/lib/common.py     | 9 ++++++---
 abm/lib/history.py    | 4 +++-
 abm/lib/invocation.py | 4 +++-
 abm/lib/workflow.py   | 4 +++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index dbb2402..57f2d05 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -194,6 +194,7 @@ def find_executable(name):
 
 
 def summarize_metrics(gi, jobs: list):
+    table = []
     header = [
         "id",
         "history_id",
@@ -221,8 +222,8 @@ def summarize_metrics(gi, jobs: list):
         # "swaptotal",
         # "uname"
     ]
-
-    print(','.join(header))
+    table.append(header)
+    # print(','.join(header))
     for job in jobs:
         job_metrics = gi.jobs.get_metrics(job['id'])
         row = []
@@ -238,7 +239,9 @@ def summarize_metrics(gi, jobs: list):
                 row.append(metrics[key])
             else:
                 row.append('')
-        print(','.join(row), end='\n')
+        # print(','.join(row), end='\n')
+        table.append(row)
+    return table
 
 
 def metrics_to_dict(metrics: list, accept: list):
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 656fee0..133ab2d 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -368,7 +368,9 @@ def summarize(context: Context, args: list):
         #             job['workflow_id'] = invocation['workflow_id']
         #         all_jobs.append(job)
     # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0]))
-    summarize_metrics(gi, all_jobs)
+    table = summarize_metrics(gi, all_jobs)
+    for row in table:
+        print(','.join(row))
 
 
 def wait(context: Context, args: list):
diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index 0e6807f..e18eccf 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -36,4 +36,6 @@ def summarize(context: Context, args: list):
         job['invocation_id'] = id
         job['workflow_id'] = ''
         all_jobs.append(job)
-    summarize_metrics(gi, all_jobs)
+    table = summarize_metrics(gi, all_jobs)
+    for row in table:
+        print(','.join(row))
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 0ba36f7..94a4ab5 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -250,4 +250,6 @@ def summarize(context: Context, args: list):
             job['invocation_id'] = id
             job['workflow_id'] = wid
             all_jobs.append(job)
-    summarize_metrics(gi, all_jobs)
+    table = summarize_metrics(gi, all_jobs)
+    for row in table:
+        print(','.join(row))

From b27956acafb9d331584536a59111f62d9a82d223 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Mon, 11 Dec 2023 16:55:33 -0500
Subject: [PATCH 02/56] Add --markdown option to summarize commands

---
 abm/lib/common.py     |  8 ++++++++
 abm/lib/experiment.py | 24 +++++++++++++++++++-----
 abm/lib/history.py    | 14 +++++++++++---
 abm/lib/invocation.py |  5 +++++
 abm/lib/menu.yml      | 14 +++++++-------
 abm/lib/workflow.py   |  5 +++++
 6 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 57f2d05..244477b 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -244,6 +244,14 @@ def summarize_metrics(gi, jobs: list):
     return table
 
 
+def print_markdown_table(table: list) -> None:
+    print('| ID | History | Tool | CPU | Memory | Runtime |')
+    print('|---|---|---|---|---|---|')
+    for row in table[1:]:
+        line = ' | '.join( row[i] for i in [0,2,4,7,11,15])
+        print(f'| {line} |')
+
+
 def metrics_to_dict(metrics: list, accept: list):
     result = dict()
     for m in metrics:
diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 9dead51..3e7954b 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -114,31 +114,37 @@ def summarize(context: Context, args: list):
     :param args[0]: The path to the directory containing metrics filees
     :return: None
     """
+    markdown = False
     separator = None
     input_dirs = []
     make_row = make_table_row
     header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)"
     for arg in args:
         if arg in ['-t', '--tsv']:
-            if separator is not None:
+            if separator is not None or markdown:
                 print('ERROR: The output format is specified more than once')
                 return
             print('tsv')
             separator = '\t'
         elif arg in ['-c', '--csv']:
-            if separator is not None:
+            if separator is not None or markdown:
                 print('ERROR: The output format is specified more than once')
                 return
             separator = ','
             print('csv')
         elif arg in ['-m', '--model']:
-            if separator is not None:
+            if separator is not None or markdown:
                 print('ERROR: The output format is specified more than once')
                 return
             print('making a model')
             separator = ','
             make_row = make_model_row
             header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2"
+        elif arg == '--markdown':
+            if separator is not None or markdown:
+                print('ERROR: The output format is specified more than once')
+                return
+            markdown = True
         else:
             # print(f"Input dir {arg}")
             input_dirs.append(arg)
@@ -149,7 +155,11 @@ def summarize(context: Context, args: list):
     if separator is None:
         separator = ','
 
-    print(header_row)
+    if markdown:
+        print("|Run|Job Conf|Tool|Tool Version|State|Runtime (Sec)|CPU|Max Memory|")
+        print("|---|---|---|---|---|---|---|---|")
+    else:
+        print(header_row)
     for input_dir in input_dirs:
         for file in os.listdir(input_dir):
             input_path = os.path.join(input_dir, file)
@@ -162,7 +172,11 @@ def summarize(context: Context, args: list):
                     # print('Ignoring upload tool')
                     continue
                 row = make_row(data)
-                print(separator.join([str(x) for x in row]))
+                if markdown:
+                    line = ' | '.join(row[i] for i in [0,2,6,7,8,11,12,14])
+                    print(f'| {line} |')
+                else:
+                    print(separator.join([str(x) for x in row]))
             except Exception as e:
                 # Silently fail to allow the remainder of the table to be generated.
                 print(f"Unable to process {input_path}")
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 133ab2d..04e1fcb 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -8,7 +8,7 @@
 import yaml
 from bioblend.galaxy.objects import GalaxyInstance
 from lib.common import (Context, connect, find_history, parse_profile,
-                        print_json, summarize_metrics)
+                        print_json, summarize_metrics, print_markdown_table)
 
 #
 # History related functions
@@ -339,6 +339,11 @@ def tag(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
+    markdown = False
+    if '--markdown' in args:
+        markdown = True
+        args.remove('--markdown')
+
     if len(args) == 0:
         print("ERROR: Provide one or more history ID values.")
         return
@@ -369,8 +374,11 @@ def summarize(context: Context, args: list):
         #         all_jobs.append(job)
     # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0]))
     table = summarize_metrics(gi, all_jobs)
-    for row in table:
-        print(','.join(row))
+    if markdown:
+        print_markdown_table(table)
+    else:
+        for row in table:
+            print(','.join(row))
 
 
 def wait(context: Context, args: list):
diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index e18eccf..be4368c 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -25,6 +25,11 @@ def doList(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
+    markdown = False
+    if '--markdown' in args:
+        markdown = True
+        args.remove('--markdown')
+
     if len(args) == 0:
         print("ERROR: Provide one or more invocation ID values.")
         return
diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 9aafa1f..c3374bd 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -69,8 +69,8 @@
       handler: workflow.inputs
     - name: [summary, summarize]
       handler: workflow.summarize
-      help: generate a CSV with job metrics for all workflow runs
-      params: ID [ID ...]
+      help: generate a CSV or markdown table with job metrics for all workflow runs
+      params: "ID [ID ...] [--markdown]"
     - name: ['test']
       handler: workflow.test
       help: run some test code
@@ -158,8 +158,8 @@
       help: show detailed information about a history
     - name: [summarize, summary, table]
       handler: history.summarize
-      params: "ID [ID...]"
-      help: Generate a CSV table with runtime metrics for all jobs in the history.
+      params: "ID [ID...] [--markdown]"
+      help: Generate a CSV or markdown table with runtime metrics for all jobs in the history.
     - name: [publish, pub]
       handler: history.publish
       help: publish the given history
@@ -250,7 +250,7 @@
     - name: [summarize, summary]
       help: summarize metrics to a CSV or TSV file.
       handler: experiment.summarize
-      params: "[-c, --csv, -t, --tsv]"
+      params: "[-c, --csv, -t, --tsv, --markdown]"
     - name: [test]
       help: playground code
       handler: experiment.test
@@ -263,8 +263,8 @@
       handler: invocation.doList
       params: "[-w|--workflow ID] [-h|--history ID]"
     - name: [summarize]
-      help: generate a CSV of job metrics for an invocation
-      params: ID
+      help: generate a CSV or markdown table of job metrics for an invocation
+      params: "ID [--markdown]"
       handler: invocation.summarize
 - name: [helm]
   help: execute a helm command
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 94a4ab5..1de403c 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -236,6 +236,11 @@ def rename(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
+    markdown = False
+    if '--markdown' in args:
+        markdown = True
+        args.remove('--markdown')
+
     if len(args) == 0:
         print("ERROR: Provide one or more workflow ID values.")
         return

From 228ac9f13c4d6c2307e59b00141803b4b6965aec Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 09:36:01 -0500
Subject: [PATCH 03/56] Bump to dev version

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index 4985c3c..b9270b9 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.8.0-dev.6
+2.9.0-dev.0

From 94f23e030b109b097c7a55f4a75ea707d2482d26 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 10:36:11 -0500
Subject: [PATCH 04/56] Finish summarize --markdown implementations

---
 abm/lib/common.py     | 12 ++++++++----
 abm/lib/experiment.py |  2 +-
 abm/lib/invocation.py |  9 ++++++---
 abm/lib/workflow.py   |  9 ++++++---
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 244477b..39d611f 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -227,12 +227,16 @@ def summarize_metrics(gi, jobs: list):
     for job in jobs:
         job_metrics = gi.jobs.get_metrics(job['id'])
         row = []
+        toolid = job.get('tool_id', 'unknown')
+        if '/' in toolid:
+            parts = toolid.split('/')
+            toolid = f'{parts[-2]}/{parts[-1]}'
         metrics = metrics_to_dict(job_metrics, header)
         metrics['id'] = job.get('id', 'unknown')
         metrics['history_id'] = job.get('history_id', 'unknown')
         metrics['history_name'] = job.get('history_name', 'unknown')
         metrics['state'] = job.get('state', 'unknown')
-        metrics['tool_id'] = job.get('tool_id', 'unknown')
+        metrics['tool_id'] = toolid
         metrics['invocation_id'] = job.get('invocation_id', 'unknown')
         for key in header:
             if key in metrics:
@@ -245,10 +249,10 @@ def summarize_metrics(gi, jobs: list):
 
 
 def print_markdown_table(table: list) -> None:
-    print('| ID | History | Tool | CPU | Memory | Runtime |')
-    print('|---|---|---|---|---|---|')
+    print('| ID | History | State |Tool | CPU | Memory | Runtime |')
+    print('|---|---|---|---|---|---|---|')
     for row in table[1:]:
-        line = ' | '.join( row[i] for i in [0,2,4,7,11,15])
+        line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
         print(f'| {line} |')
 
 
diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 3e7954b..e533773 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -9,7 +9,7 @@
 import benchmark
 import helm
 import yaml
-from common import Context, load_profiles
+from common import Context, load_profiles, print_markdown_table
 
 INVOCATIONS_DIR = "invocations"
 METRICS_DIR = "metrics"
diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index be4368c..c531471 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -1,4 +1,4 @@
-from common import Context, connect, print_json, summarize_metrics
+from common import Context, connect, print_json, summarize_metrics, print_markdown_table
 
 
 def doList(context: Context, args: list):
@@ -42,5 +42,8 @@ def summarize(context: Context, args: list):
         job['workflow_id'] = ''
         all_jobs.append(job)
     table = summarize_metrics(gi, all_jobs)
-    for row in table:
-        print(','.join(row))
+    if markdown:
+        print_markdown_table(table)
+    else:
+        for row in table:
+            print(','.join(row))
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 1de403c..54c79a3 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -7,7 +7,7 @@
 import planemo
 import requests
 import yaml
-from common import Context, connect, summarize_metrics
+from common import Context, connect, summarize_metrics, print_markdown_table
 from planemo.galaxy.workflows import install_shed_repos
 from planemo.runnable import for_path, for_uri
 
@@ -256,5 +256,8 @@ def summarize(context: Context, args: list):
             job['workflow_id'] = wid
             all_jobs.append(job)
     table = summarize_metrics(gi, all_jobs)
-    for row in table:
-        print(','.join(row))
+    if markdown:
+        print_markdown_table(table)
+    else:
+        for row in table:
+            print(','.join(row))

From 837b60bd82af0d3d996fe4d65372e6cc7d098b4f Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 15:16:08 -0500
Subject: [PATCH 05/56] Bump dev version

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index b9270b9..7299291 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.0
+2.9.0-dev.1

From 7bbafe477e88f31d66d38a385beea08a0f005017 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 15:16:58 -0500
Subject: [PATCH 06/56] Add units to markdown table header row. Better
 formatting for floats

---
 abm/lib/common.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 39d611f..38ca619 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -249,11 +249,19 @@ def summarize_metrics(gi, jobs: list):
 
 
 def print_markdown_table(table: list) -> None:
-    print('| ID | History | State |Tool | CPU | Memory | Runtime |')
-    print('|---|---|---|---|---|---|---|')
+    print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|')
+    print('|---|---|---|---|---|---|')
+    GB = 1024 * 1024 * 1024
     for row in table[1:]:
-        line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
-        print(f'| {line} |')
+        history = row[2]
+        state = row[3]
+        tool_id = row[4]
+        cpu = float(row[7]) / 10**9
+        memory = float(row[11]) / GB
+        runtime = float(row[15])
+        # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
+        # print(f'| {line} |')
+        print(f'| {tool_id} | {history} | {state} | {cpu:6.1f} | {memory:3.3f} | {runtime:6.1f} |')
 
 
 def metrics_to_dict(metrics: list, accept: list):

From bfe272b110c98d118ffa3ae3558a3a94fd022d91 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 18:09:08 -0500
Subject: [PATCH 07/56] Better column formatting for markdown tables

---
 abm/lib/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 38ca619..4eccbb9 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -250,7 +250,7 @@ def summarize_metrics(gi, jobs: list):
 
 def print_markdown_table(table: list) -> None:
     print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|')
-    print('|---|---|---|---|---|---|')
+    print('|---|---|---|---:|---:|---:|')
     GB = 1024 * 1024 * 1024
     for row in table[1:]:
         history = row[2]
@@ -261,7 +261,7 @@ def print_markdown_table(table: list) -> None:
         runtime = float(row[15])
         # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
         # print(f'| {line} |')
-        print(f'| {tool_id} | {history} | {state} | {cpu:6.1f} | {memory:3.3f} | {runtime:6.1f} |')
+        print(f'| {tool_id} | {history} | {state} | {cpu:5.1f} | {memory:3.3f} | {runtime:5.1f} |')
 
 
 def metrics_to_dict(metrics: list, accept: list):

From 61b1b2964bd9fe702723ff0205aa115138d01e54 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 20:08:13 -0500
Subject: [PATCH 08/56] Change order that benchmarks are executed during an
 experiment.

---
 abm/lib/experiment.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index e533773..0b72b2d 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -76,16 +76,16 @@ def run_on_cloud(cloud: str, config: dict):
             if not helm.update(context, [f"rules/{conf}.yml", namespace, chart]):
                 log.warning(f"job configuration not found: rules/{conf}.yml")
                 continue
-            for n in range(config['runs']):
-                history_name_prefix = f"{n+1} {cloud} {conf}"
-                for workflow_conf in config['benchmark_confs']:
+            for workflow_conf in config['benchmark_confs']:
+                for n in range(config['runs']):
+                    history_name_prefix = f"{n+1} {cloud} {conf}"
                     benchmark.run(
                         context, workflow_conf, history_name_prefix, config['name']
                     )
     else:
-        for n in range(config['runs']):
-            history_name_prefix = f"{n+1} {cloud}"
-            for workflow_conf in config['benchmark_confs']:
+        for workflow_conf in config['benchmark_confs']:
+            for n in range(config['runs']):
+                history_name_prefix = f"{n+1} {cloud}"
                 benchmark.run(
                     context, workflow_conf, history_name_prefix, config['name']
                 )

From 9d0eb6db3e871e8c8f81eb61d0e1c6ed37811f75 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 21:03:18 -0500
Subject: [PATCH 09/56] Add --timeout option to job.wait

---
 abm/lib/job.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/abm/lib/job.py b/abm/lib/job.py
index c0b8ff5..efd8bfa 100644
--- a/abm/lib/job.py
+++ b/abm/lib/job.py
@@ -1,8 +1,8 @@
+import datetime
 import json
 import logging
 import time
-from pprint import pprint
-
+import argparse
 from .common import Context, connect, find_history, print_json
 
 log = logging.getLogger('abm')
@@ -54,18 +54,27 @@ def show(context: Context, args: list):
 
 
 def wait(context: Context, args: list):
-    if len(args) != 1:
-        print("ERROR: Invalid parameters. Job ID is required")
-        return
+    parser = argparse.ArgumentParser()
+    parser.add_argument('job_id')
+    parser.add_argument('-t', '--timeout', default=-1)
+    params = parser.parse_args(args)
+    timeout = params.timeout
+    job_id = params.job_id
     gi = connect(context)
-    state = "Unknown"
+    start_time = time.time() # we only interested in precision to the second
     waiting = True
     while waiting:
-        job = gi.jobs.show_job(args[0], full_details=False)
+        job = gi.jobs.show_job(job_id, full_details=False)
+        if job is None or len(job) == 0:
+            print(f"Job {job_id} not found.")
+            return
         state = job["state"]
+        if timeout > 0:
+            if time.time() - start_time > timeout:
+                waiting = False
         if state == "ok" or state == "error":
             waiting = False
-        else:
+        if waiting:
             time.sleep(15)
     print(json.dumps(job, indent=4))
 

From dfb2d5026490433a9df94cafd1a5694e220a5148 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 21:32:46 -0500
Subject: [PATCH 10/56] Use --run-number to specify starting int when numbering
 benchmark runs

---
 abm/lib/experiment.py | 17 +++++++++++------
 abm/lib/menu.yml      |  6 +++---
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index e533773..b04a397 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import logging
 import os
@@ -26,18 +27,20 @@ def run(context: Context, args: list):
 
     :return: True if the benchmarks completed sucessfully. False otherwise.
     """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('benchmark_path')
+    parser.add_argument('-r', '--run-number', default=-1)
+    argv = parser.parse_args(args)
 
-    if len(args) == 0:
-        print("ERROR: No benchmarking configuration provided.")
-        return False
+    benchmark_path = argv.benchmark_path
 
-    benchmark_path = args[0]
     if not os.path.exists(benchmark_path):
         print(f"ERROR: Benchmarking configuration not found {benchmark_path}")
         return False
 
     with open(benchmark_path, 'r') as f:
         config = yaml.safe_load(f)
+    config['start_at'] = argv.run_number
 
     profiles = load_profiles()
     # latch = CountdownLatch(len(config['cloud']))
@@ -66,6 +69,8 @@ def run_on_cloud(cloud: str, config: dict):
     context = Context(cloud)
     namespace = 'galaxy'
     chart = 'anvil/galaxykubeman'
+    start = config['start_at']
+    end = start + config['runs']
     if 'galaxy' in config:
         namespace = config['galaxy']['namespace']
         chart = config['galaxy']['chart']
@@ -76,14 +81,14 @@ def run_on_cloud(cloud: str, config: dict):
             if not helm.update(context, [f"rules/{conf}.yml", namespace, chart]):
                 log.warning(f"job configuration not found: rules/{conf}.yml")
                 continue
-            for n in range(config['runs']):
+            for n in range(start, end):
                 history_name_prefix = f"{n+1} {cloud} {conf}"
                 for workflow_conf in config['benchmark_confs']:
                     benchmark.run(
                         context, workflow_conf, history_name_prefix, config['name']
                     )
     else:
-        for n in range(config['runs']):
+        for n in range(start, end):
             history_name_prefix = f"{n+1} {cloud}"
             for workflow_conf in config['benchmark_confs']:
                 benchmark.run(
diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index c3374bd..948d44a 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -208,7 +208,7 @@
     - name: [wait]
       help: Wait for a job to finish running
       handler: job.wait
-      params: ID
+      params: "ID [-T|--timeout SECONDS]"
     - name: [ metrics, stats ]
       help: display runtime metrics for the job, or a list of jobs contained in a history
       handler: job.metrics
@@ -244,9 +244,9 @@
   standalone: true
   menu:
     - name: [run]
-      help: run all benchmarks in an experiment
+      help: run all benchmarks in an experiment. Use --run-number to specify staring counter.
       handler: experiment.run
-      params: PATH
+      params: "PATH [-r|--run-number N]"
     - name: [summarize, summary]
       help: summarize metrics to a CSV or TSV file.
       handler: experiment.summarize

From e9d037037b07b4dbb103b60cf4c53995acd448ee Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 12 Dec 2023 21:35:23 -0500
Subject: [PATCH 11/56] Handle case with --run-number is not specified

---
 abm/lib/experiment.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index b04a397..94de288 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -70,6 +70,8 @@ def run_on_cloud(cloud: str, config: dict):
     namespace = 'galaxy'
     chart = 'anvil/galaxykubeman'
     start = config['start_at']
+    if start < 0:
+        start = 1
     end = start + config['runs']
     if 'galaxy' in config:
         namespace = config['galaxy']['namespace']

From 01f624cfc963fdd9da8086ef62c0fd72c7fea3fa Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 13 Dec 2023 10:08:39 -0500
Subject: [PATCH 12/56] Improve mardown for experiment summarize

---
 abm/lib/experiment.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index e533773..686301a 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -4,6 +4,7 @@
 import threading
 import traceback
 from datetime import timedelta
+from pprint import pprint
 from time import perf_counter
 
 import benchmark
@@ -156,10 +157,11 @@ def summarize(context: Context, args: list):
         separator = ','
 
     if markdown:
-        print("|Run|Job Conf|Tool|Tool Version|State|Runtime (Sec)|CPU|Max Memory|")
-        print("|---|---|---|---|---|---|---|---|")
+        print("|Run|Job Conf|Tool|State|Runtime (Sec)|CPU (Sec) |Max Memory (GB)|")
+        print("|---|---|---|---|---:|---:|---:|")
     else:
         print(header_row)
+    GB = 1024 * 1024 * 1024
     for input_dir in input_dirs:
         for file in os.listdir(input_dir):
             input_path = os.path.join(input_dir, file)
@@ -173,8 +175,13 @@ def summarize(context: Context, args: list):
                     continue
                 row = make_row(data)
                 if markdown:
-                    line = ' | '.join(row[i] for i in [0,2,6,7,8,11,12,14])
-                    print(f'| {line} |')
+                    runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}"
+                    cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}"
+                    memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}"
+                    # print(runtime, cpu, memory)
+                    print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |")
+                    # line = ' | '.join(row[i] for i in [0,2,6,7,10,11,13])
+                    # print(f'| {line} |')
                 else:
                     print(separator.join([str(x) for x in row]))
             except Exception as e:

From c371f14154fec8f0faa4d25bce35f545eeb646f7 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 13 Dec 2023 12:40:17 -0500
Subject: [PATCH 13/56] Add --sort-by option to experiment.summarize

---
 abm/lib/experiment.py | 133 +++++++++++++++++++++++++++++-------------
 1 file changed, 93 insertions(+), 40 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 686301a..9b924bf 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import logging
 import os
@@ -120,35 +121,68 @@ def summarize(context: Context, args: list):
     input_dirs = []
     make_row = make_table_row
     header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)"
-    for arg in args:
-        if arg in ['-t', '--tsv']:
-            if separator is not None or markdown:
-                print('ERROR: The output format is specified more than once')
-                return
-            print('tsv')
-            separator = '\t'
-        elif arg in ['-c', '--csv']:
-            if separator is not None or markdown:
-                print('ERROR: The output format is specified more than once')
-                return
-            separator = ','
-            print('csv')
-        elif arg in ['-m', '--model']:
-            if separator is not None or markdown:
-                print('ERROR: The output format is specified more than once')
-                return
-            print('making a model')
-            separator = ','
-            make_row = make_model_row
-            header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2"
-        elif arg == '--markdown':
-            if separator is not None or markdown:
-                print('ERROR: The output format is specified more than once')
-                return
-            markdown = True
-        else:
-            # print(f"Input dir {arg}")
-            input_dirs.append(arg)
+    # for arg in args:
+    #     if arg in ['-t', '--tsv']:
+    #         if separator is not None or markdown:
+    #             print('ERROR: The output format is specified more than once')
+    #             return
+    #         print('tsv')
+    #         separator = '\t'
+    #     elif arg in ['-c', '--csv']:
+    #         if separator is not None or markdown:
+    #             print('ERROR: The output format is specified more than once')
+    #             return
+    #         separator = ','
+    #         print('csv')
+    #     elif arg in ['-m', '--model']:
+    #         if separator is not None or markdown:
+    #             print('ERROR: The output format is specified more than once')
+    #             return
+    #         print('making a model')
+    #         separator = ','
+    #         make_row = make_model_row
+    #         header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2"
+    #     elif arg == '--markdown':
+    #         if separator is not None or markdown:
+    #             print('ERROR: The output format is specified more than once')
+    #             return
+    #         markdown = True
+    #     else:
+    #         # print(f"Input dir {arg}")
+    #         input_dirs.append(arg)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('dirs', nargs='*')
+    parser.add_argument('-c', '--csv', action='store_true')
+    parser.add_argument('-t', '--tsv', action='store_true')
+    parser.add_argument('-m', '--model', action='store_true')
+    parser.add_argument('--markdown', action='store_true')
+    parser.add_argument('-s', '--sort-by', choices=['cpu', 'runtime', 'memory'])
+    argv = parser.parse_args(args)
+
+    count = 0
+    if argv.csv:
+        separator = ','
+        count += 1
+    if argv.tsv:
+        separator = '\t'
+        count += 1
+    if argv.model:
+        separator = ','
+        make_row = make_model_row
+        count += 1
+    if argv.markdown:
+        markdown = True
+        count += 1
+
+    if count == 0:
+        print("ERROR: no output format selected")
+        return
+    if count > 1:
+        print("ERROR: multiple output formats selected")
+        return
+
+    input_dirs = argv.dirs
 
     if len(input_dirs) == 0:
         input_dirs.append('metrics')
@@ -161,7 +195,9 @@ def summarize(context: Context, args: list):
         print("|---|---|---|---|---:|---:|---:|")
     else:
         print(header_row)
-    GB = 1024 * 1024 * 1024
+
+    table = list()
+    GB = float(1073741824)
     for input_dir in input_dirs:
         for file in os.listdir(input_dir):
             input_path = os.path.join(input_dir, file)
@@ -174,16 +210,7 @@ def summarize(context: Context, args: list):
                     # print('Ignoring upload tool')
                     continue
                 row = make_row(data)
-                if markdown:
-                    runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}"
-                    cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}"
-                    memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}"
-                    # print(runtime, cpu, memory)
-                    print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |")
-                    # line = ' | '.join(row[i] for i in [0,2,6,7,10,11,13])
-                    # print(f'| {line} |')
-                else:
-                    print(separator.join([str(x) for x in row]))
+                table.append(row)
             except Exception as e:
                 # Silently fail to allow the remainder of the table to be generated.
                 print(f"Unable to process {input_path}")
@@ -191,6 +218,32 @@ def summarize(context: Context, args: list):
                 traceback.print_exc()
                 # pass
 
+    def comparator(row):
+        print('key', row[key])
+        print('type', type(row[key]))
+        return row[key]
+
+    if argv.sort_by:
+        key = 0
+        if argv.sort_by == 'runtime':
+            key = 10
+        elif argv.sort_by == 'cpu':
+            key = 11
+        elif argv.sort_by == 'memory':
+            key = 13
+        table.sort(key=lambda row: -1 if row[key] == '' else float(row[key]), reverse=True)
+
+    if markdown:
+        for row in table:
+            runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}"
+            cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}"
+            memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}"
+            # memory = float(row[13]) / GB
+            print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |")
+    else:
+        for row in table:
+            print(separator.join([str(x) for x in row]))
+
 
 accept_metrics = [
     'galaxy_slots',

From ac9088046ffd0181f2b017356cac1fe93d86e0ea Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 13 Dec 2023 12:49:38 -0500
Subject: [PATCH 14/56] Dev 2 version

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index 7299291..f43ee2a 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.1
+2.9.0-dev.2

From c3a56781e4a7e44296f5d3116ed1362a1f4fffa5 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 13 Dec 2023 14:16:05 -0500
Subject: [PATCH 15/56] Fix exception generating markdown if cell is empty

---
 abm/lib/common.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 4eccbb9..09b2c60 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -249,19 +249,19 @@ def summarize_metrics(gi, jobs: list):
 
 
 def print_markdown_table(table: list) -> None:
-    print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|')
+    print('| Tool ID | History | State | Memory (GB) | Runtime (sec)|')
     print('|---|---|---|---:|---:|---:|')
     GB = 1024 * 1024 * 1024
     for row in table[1:]:
         history = row[2]
         state = row[3]
         tool_id = row[4]
-        cpu = float(row[7]) / 10**9
-        memory = float(row[11]) / GB
-        runtime = float(row[15])
+        # cpu = '' if row[7] == '' else float(row[7]) / 10**9
+        memory = '' if row[11] == '' else f"{float(row[11]) / GB:3.3f}"
+        runtime = '' if row[15] == '' else f"{float(row[15]):5.1f}"
         # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
         # print(f'| {line} |')
-        print(f'| {tool_id} | {history} | {state} | {cpu:5.1f} | {memory:3.3f} | {runtime:5.1f} |')
+        print(f'| {tool_id} | {history} | {state} | {memory} | {runtime} |')
 
 
 def metrics_to_dict(metrics: list, accept: list):

From 1572f1171c502bdf6855e85e1650bdc05af8af37 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 13 Dec 2023 14:19:12 -0500
Subject: [PATCH 16/56] Version dev.3

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index f43ee2a..0496284 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.2
+2.9.0-dev.3

From e8940b89a7f91102c3937c25d5303f73bf83aa3e Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 13 Dec 2023 14:57:25 -0500
Subject: [PATCH 17/56] Dev version 4

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index 0496284..a7902d7 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.3
+2.9.0-dev.4

From c0086e7490412e3b93fa112edc3ba93feebcd929 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Thu, 14 Dec 2023 11:09:01 -0500
Subject: [PATCH 18/56] Add --sort-by to all summarize commands

---
 abm/lib/common.py     | 79 +++++++++++++++++++++++++++----------------
 abm/lib/experiment.py | 70 ++++++++++++--------------------------
 abm/lib/history.py    | 51 ++++++++++++++--------------
 abm/lib/invocation.py | 31 +++++++++++------
 abm/lib/workflow.py   | 49 ++++++++++++---------------
 5 files changed, 139 insertions(+), 141 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 09b2c60..7814fc6 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -192,37 +192,41 @@ def find_executable(name):
 #     "swaptotal",
 #     "uname"
 
+table_header = [
+    "id",
+    "history_id",
+    "history_name",
+    "state",
+    "tool_id",
+    "invocation_id",
+    "workflow_id",
+    "cpuacct.usage",
+    # "end_epoch",
+    "galaxy_memory_mb",
+    "galaxy_slots",
+    # "memory.failcnt",
+    "memory.limit_in_bytes",
+    "memory.max_usage_in_bytes",
+    # "memory.memsw.limit_in_bytes",
+    # "memory.memsw.max_usage_in_bytes",
+    # "memory.oom_control.oom_kill_disable",
+    # "memory.oom_control.under_oom",
+    "memory.soft_limit_in_bytes",
+    "memtotal",
+    "processor_count",
+    "runtime_seconds",
+    # "start_epoch",
+    # "swaptotal",
+    # "uname"
+]
+
+def print_table_header():
+    print(','.join(table_header))
+
 
 def summarize_metrics(gi, jobs: list):
     table = []
-    header = [
-        "id",
-        "history_id",
-        "history_name",
-        "state",
-        "tool_id",
-        "invocation_id",
-        "workflow_id",
-        "cpuacct.usage",
-        # "end_epoch",
-        "galaxy_memory_mb",
-        "galaxy_slots",
-        # "memory.failcnt",
-        "memory.limit_in_bytes",
-        "memory.max_usage_in_bytes",
-        # "memory.memsw.limit_in_bytes",
-        # "memory.memsw.max_usage_in_bytes",
-        # "memory.oom_control.oom_kill_disable",
-        # "memory.oom_control.under_oom",
-        "memory.soft_limit_in_bytes",
-        "memtotal",
-        "processor_count",
-        "runtime_seconds",
-        # "start_epoch",
-        # "swaptotal",
-        # "uname"
-    ]
-    table.append(header)
+    # table.append(header)
     # print(','.join(header))
     for job in jobs:
         job_metrics = gi.jobs.get_metrics(job['id'])
@@ -231,14 +235,14 @@ def summarize_metrics(gi, jobs: list):
         if '/' in toolid:
             parts = toolid.split('/')
             toolid = f'{parts[-2]}/{parts[-1]}'
-        metrics = metrics_to_dict(job_metrics, header)
+        metrics = metrics_to_dict(job_metrics, table_header)
         metrics['id'] = job.get('id', 'unknown')
         metrics['history_id'] = job.get('history_id', 'unknown')
         metrics['history_name'] = job.get('history_name', 'unknown')
         metrics['state'] = job.get('state', 'unknown')
         metrics['tool_id'] = toolid
         metrics['invocation_id'] = job.get('invocation_id', 'unknown')
-        for key in header:
+        for key in table_header:
             if key in metrics:
                 row.append(metrics[key])
             else:
@@ -334,3 +338,18 @@ def make_result(data):
 def _make_dataset_element(name, value):
     # print(f"Making dataset element for {name} = {value}({type(value)})")
     return dataset_collections.HistoryDatasetElement(name=name, id=value)
+
+def get_float_key(column: int):
+    def get_key(row: list):
+        if row[column] == '':
+            return -1
+        return float(row[column])
+    return get_key
+
+def get_str_key(column: int):
+    # print(f"Getting string key for column {column}")
+    def get_key(row: list):
+        # print(f"Sorting by column {column} key {row[column]}")
+        return row[column]
+    return get_key
+
diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 81c0c5c..2c03563 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -11,7 +11,7 @@
 import benchmark
 import helm
 import yaml
-from common import Context, load_profiles, print_markdown_table
+from common import Context, load_profiles, print_markdown_table, get_str_key, get_float_key
 
 INVOCATIONS_DIR = "invocations"
 METRICS_DIR = "metrics"
@@ -124,38 +124,8 @@ def summarize(context: Context, args: list):
     """
     markdown = False
     separator = None
-    input_dirs = []
     make_row = make_table_row
     header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)"
-    # for arg in args:
-    #     if arg in ['-t', '--tsv']:
-    #         if separator is not None or markdown:
-    #             print('ERROR: The output format is specified more than once')
-    #             return
-    #         print('tsv')
-    #         separator = '\t'
-    #     elif arg in ['-c', '--csv']:
-    #         if separator is not None or markdown:
-    #             print('ERROR: The output format is specified more than once')
-    #             return
-    #         separator = ','
-    #         print('csv')
-    #     elif arg in ['-m', '--model']:
-    #         if separator is not None or markdown:
-    #             print('ERROR: The output format is specified more than once')
-    #             return
-    #         print('making a model')
-    #         separator = ','
-    #         make_row = make_model_row
-    #         header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2"
-    #     elif arg == '--markdown':
-    #         if separator is not None or markdown:
-    #             print('ERROR: The output format is specified more than once')
-    #             return
-    #         markdown = True
-    #     else:
-    #         # print(f"Input dir {arg}")
-    #         input_dirs.append(arg)
 
     parser = argparse.ArgumentParser()
     parser.add_argument('dirs', nargs='*')
@@ -163,7 +133,7 @@ def summarize(context: Context, args: list):
     parser.add_argument('-t', '--tsv', action='store_true')
     parser.add_argument('-m', '--model', action='store_true')
     parser.add_argument('--markdown', action='store_true')
-    parser.add_argument('-s', '--sort-by', choices=['cpu', 'runtime', 'memory'])
+    parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool'])
     argv = parser.parse_args(args)
 
     count = 0
@@ -197,8 +167,8 @@ def summarize(context: Context, args: list):
         separator = ','
 
     if markdown:
-        print("|Run|Job Conf|Tool|State|Runtime (Sec)|CPU (Sec) |Max Memory (GB)|")
-        print("|---|---|---|---|---:|---:|---:|")
+        print("|Run|Inputs|Job Conf|Tool|State|Runtime (Sec)|Max Memory (GB)|")
+        print("|---|---|---|---|---|---:|---:|")
     else:
         print(header_row)
 
@@ -218,34 +188,38 @@ def summarize(context: Context, args: list):
                 row = make_row(data)
                 table.append(row)
             except Exception as e:
-                # Silently fail to allow the remainder of the table to be generated.
                 print(f"Unable to process {input_path}")
                 print(e)
                 traceback.print_exc()
+                # Silently fail to allow the remainder of the table to be generated.
                 # pass
 
-    def comparator(row):
-        print('key', row[key])
-        print('type', type(row[key]))
-        return row[key]
-
+    reverse = True
     if argv.sort_by:
-        key = 0
+        comp = get_str_key(6)
         if argv.sort_by == 'runtime':
-            key = 10
-        elif argv.sort_by == 'cpu':
-            key = 11
+            # key = 10
+            comp = get_float_key(10)
+        # elif argv.sort_by == 'cpu':
+        #     comp = get_float_comparator(11)
+        #     #key = 11
         elif argv.sort_by == 'memory':
-            key = 13
-        table.sort(key=lambda row: -1 if row[key] == '' else float(row[key]), reverse=True)
+            comp = get_float_key(13)
+            # key = 13
+        elif argv.sort_by == 'tool':
+            # print('Getting string key accessor.')
+            comp = get_str_key(6)
+            reverse = False
+        # table.sort(key=lambda row: -1 if row[key] == '' else float(row[key]), reverse=True)
+        table.sort(key=comp, reverse=reverse)
 
     if markdown:
         for row in table:
             runtime = '' if len(row[10]) == 0 else f"{float(row[10]):4.1f}"
-            cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}"
+            # cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}"
             memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}"
             # memory = float(row[13]) / GB
-            print(f"| {row[0]} | {row[2]} | {row[6]} | {row[7]} | {runtime} | {cpu} | {memory} |")
+            print(f"| {row[0]} | {row[5].split(' ')[0]} |{row[2]} | {row[6]} | {row[7]} | {runtime}  | {memory} |")
     else:
         for row in table:
             print(separator.join([str(x) for x in row]))
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 04e1fcb..1f6dda4 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import os
 import sys
@@ -8,7 +9,8 @@
 import yaml
 from bioblend.galaxy.objects import GalaxyInstance
 from lib.common import (Context, connect, find_history, parse_profile,
-                        print_json, summarize_metrics, print_markdown_table)
+                        print_json, summarize_metrics, print_markdown_table,
+                        get_float_key, get_str_key, print_table_header)
 
 #
 # History related functions
@@ -339,18 +341,20 @@ def tag(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
-    markdown = False
-    if '--markdown' in args:
-        markdown = True
-        args.remove('--markdown')
+    parser = argparse.ArgumentParser()
+    parser.add_argument('id_list', nargs='+')
+    parser.add_argument('--markdown', action='store_true')
+    parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool'])
+    argv = parser.parse_args(args)
 
-    if len(args) == 0:
+    if len(argv.id_list) == 0:
         print("ERROR: Provide one or more history ID values.")
         return
     gi = connect(context)
     all_jobs = []
-    while len(args) > 0:
-        hid = find_history(gi, args.pop(0))
+    id_list = argv.id_list
+    while len(id_list) > 0:
+        hid = find_history(gi, id_list.pop(0))
         history = gi.histories.show_history(history_id=hid)
         jobs = gi.jobs.get_jobs(history_id=hid)
         for job in jobs:
@@ -358,25 +362,23 @@ def summarize(context: Context, args: list):
             job['history_id'] = hid
             job['history_name'] = history['name']
             job['workflow_id'] = ''
-            # if 'workflow_id' in invocation:
-            #     job['workflow_id'] = invocation['workflow_id']
             all_jobs.append(job)
-        # invocations = gi.invocations.get_invocations(history_id=hid)
-        # for invocation in invocations:
-        #     id = invocation['id']
-        #     #jobs = gi.jobs.get_jobs(history_id=hid, invocation_id=id)
-        #     jobs = gi.jobs.get_jobs(history_id=hid)
-        #     for job in jobs:
-        #         job['invocation_id'] = id
-        #         job['history_id'] = hid
-        #         if 'workflow_id' in invocation:
-        #             job['workflow_id'] = invocation['workflow_id']
-        #         all_jobs.append(job)
-    # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0]))
     table = summarize_metrics(gi, all_jobs)
-    if markdown:
+    if argv.sort_by:
+        reverse = True
+        get_key = None
+        if argv.sort_by == 'runtime':
+            get_key = get_float_key(15)
+        elif argv.sort_by == 'memory':
+            get_key = get_float_key(11)
+        elif argv.sort_by == 'tool':
+            get_key = get_str_key(4)
+            reverse = False
+        table.sort(key=get_key, reverse=reverse)
+    if argv.markdown:
         print_markdown_table(table)
     else:
+        print_table_header()
         for row in table:
             print(','.join(row))
 
@@ -437,9 +439,6 @@ def wait_for(gi: GalaxyInstance, history_id: str):
             waiting = False
         if waiting:
             time.sleep(30)
-            # elif state == 'paused':
-            #     paused += 1
-            # print(f"{job['id']}\t{job['state']}\t{job['update_time']}\t{job['tool_id']}")
 
 
 class JobStates:
diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index c531471..00baa9f 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -1,4 +1,6 @@
-from common import Context, connect, print_json, summarize_metrics, print_markdown_table
+import argparse
+from common import Context, connect, print_json, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \
+    print_table_header
 
 
 def doList(context: Context, args: list):
@@ -25,14 +27,11 @@ def doList(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
-    markdown = False
-    if '--markdown' in args:
-        markdown = True
-        args.remove('--markdown')
-
-    if len(args) == 0:
-        print("ERROR: Provide one or more invocation ID values.")
-        return
+    parser = argparse.ArgumentParser()
+    parser.add_argument('id', nargs=1)
+    parser.add_argument('--markdown', action='store_true')
+    parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool'])
+    argv = parser.parse_args(args)
     gi = connect(context)
     id = args[0]
     all_jobs = []
@@ -42,8 +41,20 @@ def summarize(context: Context, args: list):
         job['workflow_id'] = ''
         all_jobs.append(job)
     table = summarize_metrics(gi, all_jobs)
-    if markdown:
+    if argv.sort_by:
+        reverse = True
+        get_key = None
+        if argv.sort_by == 'runtime':
+            get_key = get_float_key(15)
+        elif argv.sort_by == 'memory':
+            get_key = get_float_key(11)
+        elif argv.sort_by == 'tool':
+            get_key = get_str_key(4)
+            reverse = False
+        table.sort(key=get_key, reverse=reverse)
+    if argv.markdown:
         print_markdown_table(table)
     else:
+        print_table_header()
         for row in table:
             print(','.join(row))
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 54c79a3..1e21e77 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -1,13 +1,14 @@
+import argparse
 import json
 import logging
 import os
 from pathlib import Path
 from pprint import pprint
 
-import planemo
 import requests
 import yaml
-from common import Context, connect, summarize_metrics, print_markdown_table
+from common import Context, connect, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \
+    print_table_header
 from planemo.galaxy.workflows import install_shed_repos
 from planemo.runnable import for_path, for_uri
 
@@ -81,12 +82,6 @@ def import_from_url(context: Context, args: list):
         input_text = response.text
         with open(cached_file, 'w') as f:
             f.write(input_text)
-
-    # response = requests.get(url)
-    # if (response.status_code != 200):
-    #     print(f"ERROR: There was a problem downloading the workflow: {response.status_code}")
-    #     print(response.reason)
-    #     return
     try:
         workflow = json.loads(input_text)
     except Exception as e:
@@ -164,30 +159,21 @@ def invocation(context: Context, args: list):
         print("ERROR: Invalid paramaeters. A workflow ID invocation ID are required")
         return
     workflow_id = None
-    invocation_id = None
     while len(args) > 0:
         arg = args.pop(0)
         if arg in ['-w', '--work', '--workflow']:
             print("Setting workflow id")
             workflow_id = args.pop(0)
-        # elif arg in ['-i', '--invoke', '--invocation']:
-        #     invocation_id = args.pop(0)
-        #     print("Setting invocation id")
         else:
             print(f'Invalid parameter: "{arg}')
             return
     if workflow_id is None:
         print("ERROR: No workflow ID provided")
         return
-    # if invocation_id is None:
-    #     print("ERROR: No invocation ID provided")
-    #     return
     gi = connect(context)
-    # result = gi.workflows.show_invocation(workflow_id, invocation_id)
     invocations = gi.invocations.get_invocations(
         workflow_id=workflow_id, view='element', step_details=True
     )
-    # print(json.dumps(result, indent=4))
     print('ID\tState\tWorkflow\tHistory')
     for invocation in invocations:
         id = invocation['id']
@@ -236,16 +222,13 @@ def rename(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
-    markdown = False
-    if '--markdown' in args:
-        markdown = True
-        args.remove('--markdown')
-
-    if len(args) == 0:
-        print("ERROR: Provide one or more workflow ID values.")
-        return
+    parser = argparse.ArgumentParser()
+    parser.add_argument('id', nargs=1)
+    parser.add_argument('--markdown', action='store_true')
+    parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool'])
+    argv = parser.parse_args(args)
     gi = connect(context)
-    wid = args[0]
+    wid = argv.id
     all_jobs = []
     invocations = gi.invocations.get_invocations(workflow_id=wid)
     for invocation in invocations:
@@ -256,8 +239,20 @@ def summarize(context: Context, args: list):
             job['workflow_id'] = wid
             all_jobs.append(job)
     table = summarize_metrics(gi, all_jobs)
-    if markdown:
+    if argv.sort_by:
+        reverse = True
+        get_key = None
+        if argv.sort_by == 'runtime':
+            get_key = get_float_key(15)
+        elif argv.sort_by == 'memory':
+            get_key = get_float_key(11)
+        elif argv.sort_by == 'tool':
+            get_key = get_str_key(4)
+            reverse = False
+        table.sort(key=get_key, reverse=reverse)
+    if argv.markdown:
         print_markdown_table(table)
     else:
+        print_table_header()
         for row in table:
             print(','.join(row))

From a253ed15bb068f6ef0c72ed02500cdcd7f104df1 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Thu, 14 Dec 2023 13:53:58 -0500
Subject: [PATCH 19/56] Fix header for markdown tables

---
 abm/VERSION       | 2 +-
 abm/lib/common.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/abm/VERSION b/abm/VERSION
index a7902d7..32d0ce3 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.4
+2.9.0-dev.5
diff --git a/abm/lib/common.py b/abm/lib/common.py
index 7814fc6..13223ac 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -254,7 +254,7 @@ def summarize_metrics(gi, jobs: list):
 
 def print_markdown_table(table: list) -> None:
     print('| Tool ID | History | State | Memory (GB) | Runtime (sec)|')
-    print('|---|---|---|---:|---:|---:|')
+    print('|---|---|---:|---:|---:|')
     GB = 1024 * 1024 * 1024
     for row in table[1:]:
         history = row[2]
@@ -263,8 +263,6 @@ def print_markdown_table(table: list) -> None:
         # cpu = '' if row[7] == '' else float(row[7]) / 10**9
         memory = '' if row[11] == '' else f"{float(row[11]) / GB:3.3f}"
         runtime = '' if row[15] == '' else f"{float(row[15]):5.1f}"
-        # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
-        # print(f'| {line} |')
         print(f'| {tool_id} | {history} | {state} | {memory} | {runtime} |')
 
 

From 3d627d74ff95e8fd13e2fcab0fa297cd0e4615aa Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Thu, 14 Dec 2023 20:41:51 -0500
Subject: [PATCH 20/56] Add invocation.show

---
 abm/lib/invocation.py | 11 ++++++++++-
 abm/lib/menu.yml      |  4 ++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index 00baa9f..adabbc5 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -1,6 +1,6 @@
 import argparse
 from common import Context, connect, print_json, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \
-    print_table_header
+    print_table_header, print_yaml
 
 
 def doList(context: Context, args: list):
@@ -26,6 +26,15 @@ def doList(context: Context, args: list):
         print(f'{id}\t{state}\t{workflow}\t{history}')
 
 
+def show(context: Context, args: list):
+    if len(args) == 0:
+        print("ERROR: no invocation ID was provided")
+        return
+    gi = connect(context)
+    invocation = gi.invocations.show_invocation(args[0])
+    print_yaml(invocation)
+
+
 def summarize(context: Context, args: list):
     parser = argparse.ArgumentParser()
     parser.add_argument('id', nargs=1)
diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 948d44a..3d6d3e7 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -262,6 +262,10 @@
       help: list all invocations.
       handler: invocation.doList
       params: "[-w|--workflow ID] [-h|--history ID]"
+    - name: [show]
+      help: display information about the workflow invocation
+      params: ID
+      handler: invocation.show
     - name: [summarize]
       help: generate a CSV or markdown table of job metrics for an invocation
       params: "ID [--markdown]"

From dac22bc4b10126ede560933c0e8d0944b1cb35d3 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 15 Dec 2023 12:14:19 -0500
Subject: [PATCH 21/56] Adde help goal to the Makefile

---
 Makefile | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Makefile b/Makefile
index 10fc6f2..2a6432c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,15 @@
 .PHONY: dist
+help:
+	@echo
+	@echo "GOALS"
+	@echo "    clean       - deletes the dist directory and egg-info"
+	@echo "    dist        - creates the distribution package (wheel)"
+	@echo "    format      - runs Black and isort"
+	@echo "    test-deploy - deploys to test.pypi.org"
+	@echo "    deploy      - deploys to pypi.org"
+	@echo "    release     - creates a GitHub release package"
+	@echo
+	
 dist:
 	python3 setup.py sdist bdist_wheel
     

From 24e327a5c5e658b714e661220ef4520237b50361 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 15 Dec 2023 13:07:42 -0500
Subject: [PATCH 22/56] Improve history name lookup

---
 abm/lib/common.py     | 17 +++++++++++++++--
 abm/lib/invocation.py |  2 +-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 13223ac..1e3d797 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -224,6 +224,18 @@ def print_table_header():
     print(','.join(table_header))
 
 
+history_name_cache = dict()
+def get_history_name(gi, hid: str) -> str:
+    if hid in history_name_cache:
+        return history_name_cache[hid]
+    history = gi.histories.show_history(hid)
+    if history is None:
+        return 'unknown'
+    name = history['name']
+    history_name_cache[hid] = name
+    return name
+
+
 def summarize_metrics(gi, jobs: list):
     table = []
     # table.append(header)
@@ -237,8 +249,9 @@ def summarize_metrics(gi, jobs: list):
             toolid = f'{parts[-2]}/{parts[-1]}'
         metrics = metrics_to_dict(job_metrics, table_header)
         metrics['id'] = job.get('id', 'unknown')
-        metrics['history_id'] = job.get('history_id', 'unknown')
-        metrics['history_name'] = job.get('history_name', 'unknown')
+        hid = job.get('history_id', 'unknown')
+        metrics['history_id'] = hid
+        metrics['history_name'] = get_history_name(gi, hid)
         metrics['state'] = job.get('state', 'unknown')
         metrics['tool_id'] = toolid
         metrics['invocation_id'] = job.get('invocation_id', 'unknown')
diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index adabbc5..84e3906 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -42,7 +42,7 @@ def summarize(context: Context, args: list):
     parser.add_argument('-s', '--sort-by', choices=['runtime', 'memory', 'tool'])
     argv = parser.parse_args(args)
     gi = connect(context)
-    id = args[0]
+    id = argv.id[0]
     all_jobs = []
     jobs = gi.jobs.get_jobs(invocation_id=id)
     for job in jobs:

From 4958a7af3baaeb15128488ea793461e67d6e162d Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 15 Dec 2023 14:04:21 -0500
Subject: [PATCH 23/56] Round up memory and runtime values if they would
 display zeroes

---
 abm/lib/common.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 1e3d797..9903b6e 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -2,6 +2,7 @@
 import os
 import subprocess
 import sys
+from math import ceil
 
 import bioblend.galaxy
 import lib
@@ -270,12 +271,18 @@ def print_markdown_table(table: list) -> None:
     print('|---|---|---:|---:|---:|')
     GB = 1024 * 1024 * 1024
     for row in table[1:]:
+        # memory = ''
+        # if row[11] != '':
+        #     memory = float(row[11]) / GB
+        #     if memory < 0.1:
+        #         memory = 0.1
+        #     memory = f"{memory:3.1f}"
         history = row[2]
         state = row[3]
         tool_id = row[4]
         # cpu = '' if row[7] == '' else float(row[7]) / 10**9
-        memory = '' if row[11] == '' else f"{float(row[11]) / GB:3.3f}"
-        runtime = '' if row[15] == '' else f"{float(row[15]):5.1f}"
+        memory = '' if row[11] == '' else f"{max(0.1, float(row[11]) / GB):3.1f}"
+        runtime = '' if row[15] == '' else f"{max(1, float(row[15])):5.0f}"
         print(f'| {tool_id} | {history} | {state} | {memory} | {runtime} |')
 
 

From d0c526b2b2d0919f1131edb0dbae8886638aeb05 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 15 Dec 2023 16:35:01 -0500
Subject: [PATCH 24/56] Limit the number of attempts a job will be restarted.

---
 abm/lib/history.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/abm/lib/history.py b/abm/lib/history.py
index 1f6dda4..f3799df 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -16,6 +16,8 @@
 # History related functions
 #
 
+# The number of times a failed job will be restarted.
+RESTART_MAX = 3
 
 def longest_name(histories: list):
     longest = 0
@@ -397,10 +399,21 @@ def wait(context: Context, args: list):
     wait_for(gi, history_id)
 
 
+def kill_all_jobs(gi: GalaxyInstance, job_list:list):
+    cancel_states = ['new', 'running', 'paused']
+    for job in job_list:
+        if job['state'] in cancel_states:
+            print(f"Cancelling job {job['tool_id']}")
+            gi.jobs.cancel_job(job['id'])
+        else:
+            print(f"Job {job['id']} for tool {job['tool_id']} is in state {job['state']}")
+
+
 def wait_for(gi: GalaxyInstance, history_id: str):
     errored = []
     waiting = True
     job_states = JobStates()
+    restart_counts = dict()
     while waiting:
         restart = []
         status_counts = dict()
@@ -421,9 +434,18 @@ def wait_for(gi: GalaxyInstance, history_id: str):
             elif state == 'error':
                 terminal += 1
                 if id not in errored:
-                    restart.append(id)
+                    tool = job['tool_id']
+                    if tool in restart_counts:
+                        restart_counts[tool] += 1
+                    else:
+                        restart_counts[tool] = 1
+                    if restart_counts[tool] < RESTART_MAX:
+                        restart.append(id)
+                    else:
+                        kill_all_jobs(gi, job_list)
+                        waiting = False
                     errored.append(id)
-        if len(restart) > 0:
+        if len(restart) > 0 and waiting:
             for job in restart:
                 print(f"Restaring job {job}")
                 try:

From 32077fa704a45f4aa12dca023961a7cc5b2732ee Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 15 Dec 2023 16:44:00 -0500
Subject: [PATCH 25/56] Add document to menu.yml for the --sort-by option

---
 abm/lib/menu.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 3d6d3e7..083abe6 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -70,7 +70,7 @@
     - name: [summary, summarize]
       handler: workflow.summarize
       help: generate a CSV or markdown table with job metrics for all workflow runs
-      params: "ID [ID ...] [--markdown]"
+      params: "ID [ID ...] [--markdown] [-s|--sort-by (tool,runtime,memory)"
     - name: ['test']
       handler: workflow.test
       help: run some test code
@@ -158,7 +158,7 @@
       help: show detailed information about a history
     - name: [summarize, summary, table]
       handler: history.summarize
-      params: "ID [ID...] [--markdown]"
+      params: "ID [ID...] [--markdown] [-s|--sort-by (tool,runtime,memory)]"
       help: Generate a CSV or markdown table with runtime metrics for all jobs in the history.
     - name: [publish, pub]
       handler: history.publish
@@ -250,7 +250,7 @@
     - name: [summarize, summary]
       help: summarize metrics to a CSV or TSV file.
       handler: experiment.summarize
-      params: "[-c, --csv, -t, --tsv, --markdown]"
+      params: "[-c, --csv, -t, --tsv, --markdown] [-s|--sort-by (tool,runtime,memory)]"
     - name: [test]
       help: playground code
       handler: experiment.test
@@ -268,7 +268,7 @@
       handler: invocation.show
     - name: [summarize]
       help: generate a CSV or markdown table of job metrics for an invocation
-      params: "ID [--markdown]"
+      params: "ID [--markdown] [-s|--sort-by (tool, runtime, memory)]"
       handler: invocation.summarize
 - name: [helm]
   help: execute a helm command

From 7c2ad51a77ab9b109dcfa70f747f5f9b5fc14b14 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 15 Dec 2023 16:48:38 -0500
Subject: [PATCH 26/56] Add documentation for experiment.summarize --markdown

---
 abm/lib/menu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 083abe6..607220a 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -248,7 +248,7 @@
       handler: experiment.run
       params: "PATH [-r|--run-number N]"
     - name: [summarize, summary]
-      help: summarize metrics to a CSV or TSV file.
+      help: summarize metrics to a CSV, TSV or markdown file.
       handler: experiment.summarize
       params: "[-c, --csv, -t, --tsv, --markdown] [-s|--sort-by (tool,runtime,memory)]"
     - name: [test]

From 211bab9c3c0ca296f85b2bc82e22ee47de84995c Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Mon, 18 Dec 2023 11:38:30 -0500
Subject: [PATCH 27/56] Pass env to all helm invocations

---
 abm/lib/helm.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/abm/lib/helm.py b/abm/lib/helm.py
index 52a6a60..da92bc3 100644
--- a/abm/lib/helm.py
+++ b/abm/lib/helm.py
@@ -59,7 +59,7 @@ def update(context: Context, args: list):
     print('Waiting for the new deployments to come online')
     # Give kubernetes a moment to start processing the update.
     time.sleep(5)
-    wait_until_ready(namespace)
+    wait_until_ready(namespace, env)
     return True
 
 
@@ -133,9 +133,9 @@ def wait_for(kubectl: str, namespace: str, name: str, env: dict):
 #     wait_for(kubectl, namespace, 'galaxy-job', env)
 #     wait_for(kubectl, namespace, 'galaxy-web', env)
 #     wait_for(kubectl, namespace, 'galaxy-workflow', env)
-def wait_until_ready(namespace: str):
+def wait_until_ready(namespace: str, env: dict):
     kubectl = find_executable('kubectl')
-    data = run(f"{kubectl} get deployment -n {namespace} -o json")
+    data = run(f"{kubectl} get deployment -n {namespace} -o json", env)
     deployment_data = json.loads(data)
     deployments = list()
     for deployment in deployment_data['items']:
@@ -146,7 +146,7 @@ def wait_until_ready(namespace: str):
     for deployment in deployments:
         print(
             run(
-                f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch"
+                f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch", env
             )
         )
 

From 3f6890a8dacb9a039b84dae5041b7e4f43520711 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Mon, 18 Dec 2023 11:39:11 -0500
Subject: [PATCH 28/56] Add try_for method to retry api calls

---
 abm/lib/common.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 9903b6e..96a21eb 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -24,6 +24,21 @@
 }
 
 
+def try_for(f, limit=3):
+    count = 0
+    running = True
+    result = None
+    while running:
+        try:
+            count += 1
+            result = f()
+            running = False
+        except Exception as e:
+            if count >= limit:
+                raise e
+    return result
+
+
 class Context:
     def __init__(self, *args):
         if len(args) == 1:

From 0d00e65f96574b38000e1cdf1be3ab992f6cab66 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Mon, 18 Dec 2023 11:39:59 -0500
Subject: [PATCH 29/56] Fix bug handling start at value

---
 abm/lib/experiment.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 2c03563..bc29218 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -42,7 +42,8 @@ def run(context: Context, args: list):
     with open(benchmark_path, 'r') as f:
         config = yaml.safe_load(f)
     config['start_at'] = argv.run_number
-
+    print(f"Starting with run number {argv.run_number}")
+    
     profiles = load_profiles()
     # latch = CountdownLatch(len(config['cloud']))
     threads = []
@@ -70,7 +71,7 @@ def run_on_cloud(cloud: str, config: dict):
     context = Context(cloud)
     namespace = 'galaxy'
     chart = 'anvil/galaxykubeman'
-    start = config['start_at']
+    start = int(config['start_at'])
     if start < 0:
         start = 1
     end = start + config['runs']

From 382bfb32d6659a81b7f2084239a2a3f886f0a5a1 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Mon, 18 Dec 2023 11:40:38 -0500
Subject: [PATCH 30/56] Retry invoking and waiting for invocations

---
 abm/lib/benchmark.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index b7cb745..68e2247 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -8,7 +8,7 @@
 from bioblend.galaxy import GalaxyInstance, dataset_collections
 from lib import INVOCATIONS_DIR, METRICS_DIR, Keys
 from lib.common import (Context, _get_dataset_data, _make_dataset_element,
-                        connect, print_json)
+                        connect, print_json, try_for)
 from lib.history import wait_for
 
 log = logging.getLogger('abm')
@@ -224,16 +224,20 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
                     else:
                         raise Exception(f'Invalid input value')
             print(f"Running workflow {wfid} in history {new_history_name}")
-            invocation = gi.workflows.invoke_workflow(
+            f = lambda : gi.workflows.invoke_workflow(
                 wfid, inputs=inputs, history_name=new_history_name
             )
+            invocation = try_for(f, 3)
             id = invocation['id']
             # invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False)
+            f = lambda: gi.invocations.wait_for_invocation(id, 86400, 10, False)
             try:
-                invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False)
-            except:
+                invocations = try_for(f, 2)
+            except Exception as e:
+                print(f"Exception waiting for invocations")
                 pprint(invocation)
                 sys.exc_info()
+                raise e
             print("Waiting for jobs")
             if history_prefix is not None:
                 parts = history_prefix.split()

From ab3ec5c3f567299180b63008fdfa8555e41cc18c Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 20 Dec 2023 16:11:43 -0500
Subject: [PATCH 31/56] Fix run numbering range.

---
 abm/lib/experiment.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index bc29218..49b7833 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -43,7 +43,7 @@ def run(context: Context, args: list):
         config = yaml.safe_load(f)
     config['start_at'] = argv.run_number
     print(f"Starting with run number {argv.run_number}")
-    
+
     profiles = load_profiles()
     # latch = CountdownLatch(len(config['cloud']))
     threads = []
@@ -72,6 +72,7 @@ def run_on_cloud(cloud: str, config: dict):
     namespace = 'galaxy'
     chart = 'anvil/galaxykubeman'
     start = int(config['start_at'])
+    print(f"Staring run number {start}")
     if start < 0:
         start = 1
     end = start + config['runs']
@@ -86,14 +87,14 @@ def run_on_cloud(cloud: str, config: dict):
                 log.warning(f"job configuration not found: rules/{conf}.yml")
                 continue
             for workflow_conf in config['benchmark_confs']:
-                for n in range(config['runs']):
+                for n in range(start, end):
                     history_name_prefix = f"{n+1} {cloud} {conf}"
                     benchmark.run(
                         context, workflow_conf, history_name_prefix, config['name']
                     )
     else:
         for workflow_conf in config['benchmark_confs']:
-            for n in range(config['runs']):
+            for n in range(start, end):
                 history_name_prefix = f"{n+1} {cloud}"
                 benchmark.run(
                     context, workflow_conf, history_name_prefix, config['name']

From fe6567d51226ed49556611a7159d74f13c1183b7 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 20 Dec 2023 16:36:23 -0500
Subject: [PATCH 32/56] Retry getting jobs list

---
 abm/lib/history.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/abm/lib/history.py b/abm/lib/history.py
index f3799df..0e4cc0b 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -10,7 +10,7 @@
 from bioblend.galaxy.objects import GalaxyInstance
 from lib.common import (Context, connect, find_history, parse_profile,
                         print_json, summarize_metrics, print_markdown_table,
-                        get_float_key, get_str_key, print_table_header)
+                        get_float_key, get_str_key, print_table_header, try_for)
 
 #
 # History related functions
@@ -418,7 +418,7 @@ def wait_for(gi: GalaxyInstance, history_id: str):
         restart = []
         status_counts = dict()
         terminal = 0
-        job_list = gi.jobs.get_jobs(history_id=history_id)
+        job_list = try_for(lambda: gi.jobs.get_jobs(history_id=history_id))
         for job in job_list:
             job_states.update(job)
             state = job['state']

From d078c0b281a8601b9e037fe8ae57a1fbfafc4c58 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 19 Jan 2024 12:17:45 -0500
Subject: [PATCH 33/56] Use argparse in dataset.list

---
 abm/lib/dataset.py | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 6ae32a8..52978c8 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import os
 from pathlib import Path
@@ -9,18 +10,34 @@
                     find_history, print_json)
 
 
-def list(context: Context, args: list):
+def list(context: Context, argv: list):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-s', '--state', help='list jobs in this state')
+    parser.add_argument('--history', help='show jobs in the given history')
+    parser.add_argument('-t', '--tool', help='only show jobs generate by this tool')
+    args = parser.parse_args(argv)
+    kwargs = {'limit': 10000, 'offset': 0, 'deleted': False}
     gi = connect(context)
-    kwargs = {'limit': 10000, 'offset': 0}
-    if len(args) > 0:
-        if args[0] in ['-s', '--state']:
-            if len(args) != 2:
-                print("ERROR: Invalid command.")
-                return
-            kwargs['state'] = args[1]
-        else:
-            print(f"ERROR: Invalid parameter: {args[0]}")
+    if args.state:
+        kwargs['state'] = args.state
+    if args.history:
+        hid = find_history(gi, args.history)
+        if hid is None:
+            print("ERROR: No such history")
             return
+        kwargs['history_id'] = hid
+    if args.tool:
+        kwargs['tool_id'] = args.tool
+
+    # if len(args) > 0:
+    #     if args[0] in ['-s', '--state']:
+    #         if len(args) != 2:
+    #             print("ERROR: Invalid command.")
+    #             return
+    #         kwargs['state'] = args[1]
+    #     else:
+    #         print(f"ERROR: Invalid parameter: {args[0]}")
+    #         return
     # datasets = gi.datasets.get_datasets(limit=10000, offset=0)  # , deleted=True, purged=True)
     datasets = gi.datasets.get_datasets(**kwargs)
     if len(datasets) == 0:

From 04763b4290dea06048daf031ea2e14b882181e00 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 19 Jan 2024 12:29:37 -0500
Subject: [PATCH 34/56] Don't add one to the run number when generating the
 history name.

---
 abm/lib/experiment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 49b7833..823ea92 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -88,14 +88,14 @@ def run_on_cloud(cloud: str, config: dict):
                 continue
             for workflow_conf in config['benchmark_confs']:
                 for n in range(start, end):
-                    history_name_prefix = f"{n+1} {cloud} {conf}"
+                    history_name_prefix = f"{n} {cloud} {conf}"
                     benchmark.run(
                         context, workflow_conf, history_name_prefix, config['name']
                     )
     else:
         for workflow_conf in config['benchmark_confs']:
             for n in range(start, end):
-                history_name_prefix = f"{n+1} {cloud}"
+                history_name_prefix = f"{n} {cloud}"
                 benchmark.run(
                     context, workflow_conf, history_name_prefix, config['name']
                 )

From 1fc5c04a96f588e3c6acced15e3d15b834363070 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 19 Jan 2024 12:32:54 -0500
Subject: [PATCH 35/56] Bump version

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index 32d0ce3..32a0e4f 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.5
+2.9.0-dev.6

From 9e62c702e48db2fc07065e8a0505a8262b4920d9 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 31 Jan 2024 12:33:22 -0500
Subject: [PATCH 36/56] Added code documentation.

---
 abm/lib/__init__.py  |  6 ++++--
 abm/lib/benchmark.py | 27 ++++++++++++++-------------
 abm/lib/dataset.py   |  6 +++---
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/abm/lib/__init__.py b/abm/lib/__init__.py
index 2cdb864..4efc7e1 100644
--- a/abm/lib/__init__.py
+++ b/abm/lib/__init__.py
@@ -4,14 +4,16 @@
 
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 
-# from common import parse_profile
-
+# Where the workflow invocation data returned by Galaxy will be saved.
 INVOCATIONS_DIR = "invocations"
+#  Where workflow runtime metrics will be saved.
 METRICS_DIR = "metrics"
 
+# Global instance of a YAML parser so we can reuse it if needed.
 parser = None
 
 
+# Keys used in various dictionaries.
 class Keys:
     NAME = 'name'
     RUNS = 'runs'
diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index 68e2247..5afd693 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -16,13 +16,10 @@
 
 def run_cli(context: Context, args: list):
     """
-    Runs a single workflow defined by *args[0]*
+    Command line handler to run a single benchmark.
 
-    :param args: a list that contains:
-    args[0] - the path to the benchmark configuration file
-    args[1] - the prefix to use when creating the new history in Galaxy
-    args[2] - the name of the experiment, if part of one. This is used to
-              generate output folder names.
+    :param context: a context object the defines how to connect to the Galaxy server.
+    :param args: parameters from the command line
 
     :return: True if the workflows completed sucessfully. False otherwise.
     """
@@ -43,11 +40,15 @@ def run_cli(context: Context, args: list):
 
 
 def run(context: Context, workflow_path, history_prefix: str, experiment: str):
-    # if len(args) > 1:
-    #     history_prefix = args[1]
-    #     if len(args) > 2:
-    #         experiment = args[2].replace(' ', '_').lower()
+    """
+    Does the actual work of running a benchmark.
 
+    :param context: a context object the defines how to connect to the Galaxy server.
+    :param workflow_path: path to the ABM workflow file. (benchmark really). NOTE this is NOT the Galaxy .ga file.
+    :param history_prefix: a prefix value used when generating new history names.
+    :param experiment: the name of the experiment (arbitrary string). Used to generate new history names.
+    :return: True if the workflow run completed successfully. False otherwise.
+    """
     if os.path.exists(INVOCATIONS_DIR):
         if not os.path.isdir(INVOCATIONS_DIR):
             print('ERROR: Can not save invocation status, directory name in use.')
@@ -76,7 +77,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
     workflows = parse_workflow(workflow_path)
     if not workflows:
         print(f"Unable to load any workflow definitions from {workflow_path}")
-        return
+        return False
 
     print(f"Found {len(workflows)} workflow definitions")
     for workflow in workflows:
@@ -173,7 +174,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
                             histories = gi.histories.get_histories(name=spec['history'])
                             if len(histories) == 0:
                                 print(f"ERROR: History {spec['history']} not foune")
-                                return
+                                return False
                             hid = histories[0]['id']
                             pairs = 0
                             paired_list = spec['paired']
@@ -416,7 +417,7 @@ def validate(context: Context, args: list):
 
 
 def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):
-    """Blocks until all jobs defined in the *invocations* to complete.
+    """Blocks until all jobs defined in *invocations* to complete.
 
     :param gi: The *GalaxyInstance** running the jobs
     :param invocations:
diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 52978c8..a8bfd88 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -12,9 +12,9 @@
 
 def list(context: Context, argv: list):
     parser = argparse.ArgumentParser()
-    parser.add_argument('-s', '--state', help='list jobs in this state')
-    parser.add_argument('--history', help='show jobs in the given history')
-    parser.add_argument('-t', '--tool', help='only show jobs generate by this tool')
+    parser.add_argument('-s', '--state', help='list datasets in this state')
+    parser.add_argument('--history', help='show datasets in the given history')
+    parser.add_argument('-t', '--tool', help='only show datasets generate by this tool')
     args = parser.parse_args(argv)
     kwargs = {'limit': 10000, 'offset': 0, 'deleted': False}
     gi = connect(context)

From 6c07eb7f1e68947e56eaa50692843c6fbadafe7a Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 31 Jan 2024 12:37:35 -0500
Subject: [PATCH 37/56] Print the starting run number after its value has been
 checked.

---
 abm/lib/experiment.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 823ea92..5750fe2 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -75,6 +75,7 @@ def run_on_cloud(cloud: str, config: dict):
     print(f"Staring run number {start}")
     if start < 0:
         start = 1
+    print(f"Staring run number {start}")
     end = start + config['runs']
     if 'galaxy' in config:
         namespace = config['galaxy']['namespace']

From 29f5436dc953ef0d80281743859ca3f1cebbbf8b Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 31 Jan 2024 14:16:55 -0500
Subject: [PATCH 38/56] More code documentation.

---
 abm/lib/benchmark.py   | 57 +++++++++++++++++++++++++++++++++--
 abm/lib/cloudlaunch.py |  2 ++
 abm/lib/common.py      | 68 +++++++++++++++++++++++++++++++++++++-----
 3 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index 5afd693..fcf2fd7 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -270,6 +270,14 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
 
 
 def translate(context: Context, args: list):
+    """
+    Translates the human readable names of datasets and workflows in to the Galaxy
+    ID that is unique to each server.
+
+    :param context: the conext object used to connect to the Galaxy server
+    :param args: [0] the path to the benchmarking YAML file to translate
+    :return: Nothing. Prints the translated workflow file to stdout.
+    """
     if len(args) == 0:
         print('ERROR: no workflow configuration specified')
         return
@@ -312,6 +320,14 @@ def translate(context: Context, args: list):
 
 
 def validate(context: Context, args: list):
+    """
+    Checks to see if the workflow and all datasets defined in the benchmark can
+    be found on the server.
+
+    :param context: the context object used to connect to the Galaxy instance
+    :param args: [0] the benchmark YAML file to be validated.
+    :return:
+    """
     if len(args) == 0:
         print('ERROR: no workflow configuration specified')
         return
@@ -417,10 +433,10 @@ def validate(context: Context, args: list):
 
 
 def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):
-    """Blocks until all jobs defined in *invocations* to complete.
+    """Blocks until all jobs defined in *invocations* are complete (in a terminal state).
 
     :param gi: The *GalaxyInstance** running the jobs
-    :param invocations:
+    :param invocations: a dictionary containing information about the jobs invoked
     :return:
     """
     wfid = invocations['workflow_id']
@@ -490,6 +506,11 @@ def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):
 
 
 def parse_workflow(workflow_path: str):
+    """
+    Loads the benchmark YAML file.
+    :param workflow_path: the path to the file to be loaded.
+    :return: a dictionary containing the benchmark.
+    """
     if not os.path.exists(workflow_path):
         print(f'ERROR: could not find workflow file {workflow_path}')
         return None
@@ -508,6 +529,14 @@ def parse_workflow(workflow_path: str):
 
 
 def find_workflow_id(gi, name_or_id):
+    """
+    Resolves the human-readable name for a workflow into the unique ID on the
+    Galaxy instance.
+
+    :param gi: the connection object to the Galaxy instance
+    :param name_or_id: the name of the workflow
+    :return: The Galaxy workflow ID or None if the workflow could not be located
+    """
     try:
         wf = gi.workflows.show_workflow(name_or_id)
         return wf['id']
@@ -524,7 +553,14 @@ def find_workflow_id(gi, name_or_id):
 
 
 def find_dataset_id(gi, name_or_id):
-    # print(f"Finding dataset {name_or_id}")
+    """
+    Resolves the human-readable name if a dataset into the unique ID on the
+    Galaxy instance
+
+    :param gi: the connection object to the Galaxy instance
+    :param name_or_id: the name of the dataset.
+    :return: the Galaxy dataset ID or None if the dataset could not be located.
+    """
     try:
         ds = gi.datasets.show_dataset(name_or_id)
         return ds['id']
@@ -549,6 +585,14 @@ def find_dataset_id(gi, name_or_id):
 
 
 def find_collection_id(gi, name):
+    """
+    Resolves a human-readable collection name into the unique Galaxy ID.
+
+    :param gi: the connection object to the Galaxy instance
+    :param name: the name of the collection to resolve
+    :return: The unique Galaxy ID of the collection or None if the collection
+    can not be located.
+    """
     kwargs = {'limit': 10000, 'offset': 0}
     datasets = gi.datasets.get_datasets(**kwargs)
     if len(datasets) == 0:
@@ -570,6 +614,13 @@ def find_collection_id(gi, name):
 
 
 def test(context: Context, args: list):
+    """
+    Allows running testing code from the command line.
+
+    :param context: a connection object to a Galaxy instance
+    :param args: varies
+    :return: varies, typically None.
+    """
     id = 'c90fffcf98b31cd3'
     gi = connect(context)
     inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input')
diff --git a/abm/lib/cloudlaunch.py b/abm/lib/cloudlaunch.py
index 0abd261..d933cc6 100644
--- a/abm/lib/cloudlaunch.py
+++ b/abm/lib/cloudlaunch.py
@@ -8,6 +8,8 @@
 from cloudlaunch_cli.main import create_api_client
 from common import Context
 
+# DEPRECATED - Cloudlaunch is no longer used to manage Galaxy clusters.
+
 BOLD = '\033[1m'
 CLEAR = '\033[0m'
 
diff --git a/abm/lib/common.py b/abm/lib/common.py
index 96a21eb..cbc035f 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -9,8 +9,10 @@
 from bioblend.galaxy import dataset_collections
 from ruamel.yaml import YAML
 
+# Where we will look for our configuration file.
 PROFILE_SEARCH_PATH = ['~/.abm/profile.yml', '.abm-profile.yml']
 
+# Deprecated. Do not use.
 datasets = {
     "dna": [
         "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR013/ERR013101/ERR013101_1.fastq.gz",
@@ -25,6 +27,14 @@
 
 
 def try_for(f, limit=3):
+    """
+    Tries to invoke the function f. If the function f fails it will be retried
+    *limit* number of times.
+
+    :param f: the function to invoke
+    :param limit: how many times the function will be retried
+    :return: the result of calling f()
+    """
     count = 0
     running = True
     result = None
@@ -40,6 +50,13 @@ def try_for(f, limit=3):
 
 
 class Context:
+    """
+    The context object that contains information to connect to a Galaxy instance.
+
+    GALAXY_SERVER: the URL of the Galaxy server to connect to
+    API_KEY      : a user's API key to make API calls on the Galaxy instance
+    KUBECONFIG:  : the kubeconfig file needed to make changes via Helm
+    """
     def __init__(self, *args):
         if len(args) == 1:
             arg = args[0]
@@ -90,7 +107,12 @@ def connect(context: Context):
 
 
 def _set_active_profile(profile_name: str):
-    # print(f"Parsing profile for {profile_name}")
+    """
+    Unused.
+
+    :param profile_name:
+    :return:
+    """
     lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG = parse_profile(profile_name)
     return lib.GALAXY_SERVER != None
 
@@ -100,6 +122,11 @@ def get_context(profile_name: str):
 
 
 def get_yaml_parser():
+    """
+    Returns a singleton instance of a YAML parser.
+
+    :return: a YAML parser.
+    """
     if lib.parser is None:
         lib.parser = YAML()
     return lib.parser
@@ -124,6 +151,12 @@ def load_profiles():
 
 
 def save_profiles(profiles: dict):
+    """
+    Write the ABM configuration file.
+
+    :param profiles: the configuration to be saved.
+    :return: None
+    """
     yaml = get_yaml_parser()
     for profile_path in PROFILE_SEARCH_PATH:
         path = os.path.expanduser(profile_path)
@@ -161,15 +194,16 @@ def parse_profile(profile_name: str):
 
 
 def run(command, env: dict = None):
+    """
+    Runs a command on the local machine.  Used to invoke the helm and kubectl
+    executables.
+
+    :param command: the command to be invoked
+    :param env: environment variables for the command.
+    :return:
+    """
     if env is None:
         env = os.environ
-    # if env is not None:
-    #     for name,value in env.items():
-    #         os.environ[name] = value
-    # if lib.KUBECONFIG is not None:
-    #     os.environ['KUBECONFIG'] = lib.KUBECONFIG
-    # local_env = os.environ.copy()
-    # local_env.update(env)
     result = subprocess.run(command.split(), capture_output=True, env=env)
     if result.returncode != 0:
         raise RuntimeError(result.stderr.decode('utf-8').strip())
@@ -177,6 +211,11 @@ def run(command, env: dict = None):
 
 
 def get_env(context: Context):
+    """
+    Creates a copy of the environment variables as returned by os.environ.
+    :param context: Ignored
+    :return: a dictionary of the environment variables
+    """
     copy = os.environ.copy()
     for key, value in context.__dict__.items():
         if value is not None:
@@ -185,6 +224,13 @@ def get_env(context: Context):
 
 
 def find_executable(name):
+    """
+    Used the which command on the local machine to find the full path to an
+    executable.
+
+    :param name: the name of a command line executable or script.
+    :return: the full path to the executable or an empty string if the executable is not found.
+    """
     return run(f"which {name}")
 
 
@@ -208,6 +254,7 @@ def find_executable(name):
 #     "swaptotal",
 #     "uname"
 
+# Columns to be defined when generating CSV files.
 table_header = [
     "id",
     "history_id",
@@ -237,6 +284,11 @@ def find_executable(name):
 ]
 
 def print_table_header():
+    """
+    Prints the table header suitable for inclusion in CSV files.
+
+    :return: None. The table header is printed to stdout.
+    """
     print(','.join(table_header))
 
 

From 144ebaac2eaeac0782fa5d0f3dd640b05d2f1e4b Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 29 Mar 2024 14:04:26 -0400
Subject: [PATCH 39/56] Rename list methods to prevent name collisions with the
 list type

---
 abm/VERSION            |  2 +-
 abm/lib/benchmark.py   |  6 +++++-
 abm/lib/cloudlaunch.py |  2 +-
 abm/lib/config.py      |  2 +-
 abm/lib/dataset.py     |  2 +-
 abm/lib/folder.py      |  2 +-
 abm/lib/job.py         |  2 +-
 abm/lib/library.py     |  2 +-
 abm/lib/menu.yml       | 16 ++++++++--------
 abm/lib/users.py       |  2 +-
 abm/lib/workflow.py    |  2 +-
 11 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/abm/VERSION b/abm/VERSION
index 32a0e4f..80a9e43 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.6
+2.9.0-dev.7
diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index 68e2247..3505158 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -183,7 +183,11 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
                                 for key in item.keys():
                                     # print(f"Getting dataset for {key} = {item[key]}")
                                     value = _get_dataset_data(gi, item[key])
-                                    size += value['size']
+                                    if value is None:
+                                        print(f"ERROR: Unable to find dataset {item[key]}")
+                                        return
+                                    if size in value:
+                                        size += value['size']
                                     elements.append(
                                         _make_dataset_element(key, value['id'])
                                     )
diff --git a/abm/lib/cloudlaunch.py b/abm/lib/cloudlaunch.py
index 0abd261..3a2719d 100644
--- a/abm/lib/cloudlaunch.py
+++ b/abm/lib/cloudlaunch.py
@@ -40,7 +40,7 @@ def h1(text):
 '''
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     archived = False
     filter = None
     status = lambda t: t.instance_status if t.instance_status else t.status
diff --git a/abm/lib/config.py b/abm/lib/config.py
index cbb71a5..24d8112 100644
--- a/abm/lib/config.py
+++ b/abm/lib/config.py
@@ -6,7 +6,7 @@
                     print_yaml, save_profiles)
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     profiles = load_profiles()
     print(f"Loaded {len(profiles)} profiles")
     for profile in profiles:
diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 52978c8..0fe507f 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -10,7 +10,7 @@
                     find_history, print_json)
 
 
-def list(context: Context, argv: list):
+def do_list(context: Context, argv: list):
     parser = argparse.ArgumentParser()
     parser.add_argument('-s', '--state', help='list jobs in this state')
     parser.add_argument('--history', help='show jobs in the given history')
diff --git a/abm/lib/folder.py b/abm/lib/folder.py
index 06760a5..4c4abb8 100644
--- a/abm/lib/folder.py
+++ b/abm/lib/folder.py
@@ -3,7 +3,7 @@
 from .common import Context, connect
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     if len(args) == 0:
         print("ERROR: no library ID was provided")
         return
diff --git a/abm/lib/job.py b/abm/lib/job.py
index efd8bfa..4973209 100644
--- a/abm/lib/job.py
+++ b/abm/lib/job.py
@@ -8,7 +8,7 @@
 log = logging.getLogger('abm')
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     state = ''
     history_id = None
     log.debug('Processing args')
diff --git a/abm/lib/library.py b/abm/lib/library.py
index 8add917..4dbb610 100644
--- a/abm/lib/library.py
+++ b/abm/lib/library.py
@@ -3,7 +3,7 @@
 from .common import Context, connect, datasets
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     gi = connect(context)
     if len(args) == 0:
         for library in gi.libraries.get_libraries():
diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 607220a..a4e3844 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -37,7 +37,7 @@
       help: 'download a workflow'
       params: ID PATH
     - name: ['list', 'ls']
-      handler: workflow.list
+      handler: workflow.do_list
       help: 'list workflows available on the serer'
     - name: [show]
       handler: workflow.show
@@ -92,7 +92,7 @@
       params: KEY [--hs|--hist|--history HISTORY_ID | -c|--create "History name"]
       help: imports a dataset to the server from a URL specified in the datasets.yml config file.
     - name: ['list', 'ls']
-      handler: dataset.list
+      handler: dataset.do_list
       help: lists all the datasets on the server
     - name: ['find']
       handler: dataset.find
@@ -191,7 +191,7 @@
   menu:
     - name: [ list, ls ]
       help: list all jobs, or jobs in a particular state. Can filter by a history.
-      handler: job.list
+      handler: job.do_list
       params: "[-s|--state ok|running|error|waiting] [-h|--history historyID]"
     - name: [ show ]
       help: show detailed information about a job
@@ -222,7 +222,7 @@
   menu:
     - name: [list, ls]
       help: list all users on the Galaxy instance
-      handler: users.list
+      handler: users.do_list
     - name: [api_key, apikey, key]
       help: obtain the API key for the specified user
       handler: users.api_key
@@ -303,7 +303,7 @@
   menu:
     - name: [list, ls]
       help: list configured servers
-      handler: config.list
+      handler: config.do_list
     - name: [show, sh]
       help: disply URL, API key, and kube config for a specific cloud.
       handler: config.show
@@ -341,7 +341,7 @@
   standalone: true
   menu:
     - name: [list, ls]
-      handler: cloudlaunch.list
+      handler: cloudlaunch.do_list
       help: list deployments on all cloud providers
     - name: [create, launch, new]
       handler: cloudlaunch.create
@@ -356,7 +356,7 @@
   menu:
     - name: [list, ls]
       help: list all libraries on the server
-      handler: library.list
+      handler: library.do_list
     - name: [show]
       help: show detailed information about a library
       handler: library.show
@@ -373,7 +373,7 @@
   help: manage folders in data libraries
   menu:
     - name: [list, ls]
-      handler: folder.list
+      handler: folder.do_list
       help: list the folders in a data library
       params: LIBRARY_ID
     - name: [create, new]
diff --git a/abm/lib/users.py b/abm/lib/users.py
index 74353fb..cb98a46 100644
--- a/abm/lib/users.py
+++ b/abm/lib/users.py
@@ -5,7 +5,7 @@
 from common import Context, connect
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     # TODO the master API key needs to be parameterized or specified in a config file.
     context.API_KEY = "galaxypassword"
     gi = connect(context)
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 1e21e77..6282158 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -15,7 +15,7 @@
 log = logging.getLogger('abm')
 
 
-def list(context: Context, args: list):
+def do_list(context: Context, args: list):
     gi = connect(context)
     workflows = gi.workflows.get_workflows(published=True)
     if len(workflows) == 0:

From 6efb9ce4af421f9f35c7b10e28ba9d98396fb94e Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 22 May 2024 13:53:39 -0400
Subject: [PATCH 40/56] Allow the Galaxy master API key to defined in the
 profile.

---
 abm/__init__.py     |  2 ++
 abm/lib/__init__.py | 17 +++++++++++++++++
 abm/lib/common.py   | 38 ++++++++++++++++++++++++++++----------
 abm/lib/history.py  |  2 +-
 abm/lib/users.py    | 16 ++++------------
 5 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/abm/__init__.py b/abm/__init__.py
index fb8a5b7..adc875e 100644
--- a/abm/__init__.py
+++ b/abm/__init__.py
@@ -1,6 +1,8 @@
 import os
 import sys
 
+import yaml
+
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 
 
diff --git a/abm/lib/__init__.py b/abm/lib/__init__.py
index 4efc7e1..a17780b 100644
--- a/abm/lib/__init__.py
+++ b/abm/lib/__init__.py
@@ -24,3 +24,20 @@ class Keys:
     COLLECTION = 'collection'
     HISTORY_BASE_NAME = 'output_history_base_name'
     HISTORY_NAME = 'history_name'
+
+
+# def get_master_api_key():
+#     '''
+#     Get the master API key from the environment or configuration file.
+#     '''
+#     if 'GALAXY_MASTER_API_KEY' in os.environ:
+#         return os.environ['GALAXY_MASTER_API_KEY']
+#     config_path = os.path.expanduser("~/.abm/config.yml")
+#     if not os.path.exists(config_path):
+#         raise RuntimeError(f"ERROR: Configuration file not found: {config_path}")
+#     with open(config_path, 'r') as f:
+#         config = yaml.safe_load(f)
+#     key = config.get('GALAXY_MASTER_API_KEY', None)
+#     if key == None:
+#         raise RuntimeError("ERROR: GALAXY_MASTER_API_KEY not found in config.yml")
+#     return key
diff --git a/abm/lib/common.py b/abm/lib/common.py
index cbc035f..0fe6b3f 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -61,20 +61,26 @@ def __init__(self, *args):
         if len(args) == 1:
             arg = args[0]
             if type(arg) == str:
-                self.GALAXY_SERVER, self.API_KEY, self.KUBECONFIG = parse_profile(arg)
+                self.GALAXY_SERVER, self.API_KEY, self.KUBECONFIG, self.MASTER_KEY = parse_profile(arg)
             elif type(arg) == dict:
                 self.GALAXY_SERVER = arg['GALAXY_SERVER']
                 self.API_KEY = arg['API_KEY']
                 self.KUBECONFIG = arg['KUBECONFIG']
+                if 'MASTER_KEY' in arg:
+                    self.MASTER_KEY = arg['MASTER_KEY']
+                else:
+                    self.MASTER_KEY = None
             else:
                 raise Exception(f'Invalid arg for Context: {type(arg)}')
-        elif len(args) == 3:
+        elif len(args) == 3 or len(args) == 4:
             self.GALAXY_SERVER = args[0]
             self.API_KEY = args[1]
             self.KUBECONFIG = args[2]
+            if len(args) == 4:
+                self.MASTER_KEY = args[3]
         else:
             raise Exception(
-                f'Invalid args for Context. Expected one or three, found {len(args)}'
+                f'Invalid args for Context. Expected one or four, found {len(args)}'
             )
 
 
@@ -86,7 +92,7 @@ def print_yaml(obj):
     get_yaml_parser().dump(obj, sys.stdout)
 
 
-def connect(context: Context):
+def connect(context: Context, use_master_key=False):
     """
     Create a connection to the Galaxy instance
 
@@ -100,7 +106,14 @@ def connect(context: Context):
         print('ERROR: The Galaxy API key has not been set.  Please check your')
         print('       configuration in ~/.abm/profile.yml and try again.')
         sys.exit(1)
-    gi = bioblend.galaxy.GalaxyInstance(url=context.GALAXY_SERVER, key=context.API_KEY)
+    key = context.API_KEY
+    if use_master_key:
+        if context.MASTER_KEY is None:
+            print('ERROR: The Galaxy master key has not been set.  Please check your')
+            print('       configuration in ~/.abm/profile.yml and try again.')
+            sys.exit(1)
+        key = context.MASTER_KEY
+    gi = bioblend.galaxy.GalaxyInstance(url=context.GALAXY_SERVER, key=key)
     gi.max_get_attempts = 3
     gi.get_retry_delay = 1
     return gi
@@ -113,7 +126,7 @@ def _set_active_profile(profile_name: str):
     :param profile_name:
     :return:
     """
-    lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG = parse_profile(profile_name)
+    lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG, lib.MASTER_KEY = parse_profile(profile_name)
     return lib.GALAXY_SERVER != None
 
 
@@ -174,10 +187,11 @@ def parse_profile(profile_name: str):
     :param profile_name: path to the profile to parse
     :return: a tuple containing the Galaxy URL, API key, and path to the kubeconfig
     '''
+    nones = (None, None, None, None)
     profiles = load_profiles()
     if profiles is None:
         print(f'ERROR: Could not locate an abm profile file in {PROFILE_SEARCH_PATH}')
-        return None, None, None
+        return nones
     if profile_name not in profiles:
         print(f'ERROR: {profile_name} is not the name of a valid profile.')
         keys = list(profiles.keys())
@@ -186,11 +200,15 @@ def parse_profile(profile_name: str):
                 ', '.join([f"'{k}'" for k in keys[0:-2]]) + f", and '{keys[-1]}'"
             )
             print(f'The defined profile names are: {quoted_keys}')
-        return None, None, None
+        return nones
     profile = profiles[profile_name]
+    kube = None
+    master = 'galaxypassword'
     if 'kube' in profile:
-        return (profile['url'], profile['key'], os.path.expanduser(profile['kube']))
-    return (profile['url'], profile['key'], None)
+        kube = os.path.expanduser(profile['kube'])
+    if 'master' in profile:
+        master = profile['master']
+    return (profile['url'], profile['key'], kube, master)
 
 
 def run(command, env: dict = None):
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 0e4cc0b..8236590 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -235,7 +235,7 @@ def error_message(msg='Invalid command'):
                 return
             url = datasets[args[0]]
     elif len(args) == 3:
-        server, key = parse_profile(args[0])
+        server, key, kube, master = parse_profile(args[0])
         if server is None:
             error_message(f"Invalid server profile name: {args[0]}")
             return
diff --git a/abm/lib/users.py b/abm/lib/users.py
index cb98a46..ceacd82 100644
--- a/abm/lib/users.py
+++ b/abm/lib/users.py
@@ -6,9 +6,7 @@
 
 
 def do_list(context: Context, args: list):
-    # TODO the master API key needs to be parameterized or specified in a config file.
-    context.API_KEY = "galaxypassword"
-    gi = connect(context)
+    gi = connect(context, use_master_key=True)
     user_list = gi.users.get_users()
     pprint(user_list)
 
@@ -22,9 +20,7 @@ def get_api_key(context: Context, args: list):
         print("ERROR: no user email given")
         return
 
-    # TODO the master API key needs to be parameterized or specified in a config file.
-    context.API_KEY = "galaxypassword"
-    gi = connect(context)
+    gi = connect(context, use_master_key=True)
     user_list = gi.users.get_users(f_email=args[0])
     if user_list is None or len(user_list) == 0:
         print("WARNING: no such user")
@@ -52,9 +48,7 @@ def create(context: Context, args: list):
         print(f"ERROR: {email} does not look like a valid email address")
         return
 
-    # TODO the master API key needs to be parameterized or specified in a config file.
-    context.API_KEY = "galaxypassword"
-    gi = connect(context)
+    gi = connect(context, use_master_key=True)
     user_record = gi.users.create_local_user(name, email, password)
     id = user_record['id']
     key = gi.users.create_user_apikey(id)
@@ -67,9 +61,7 @@ def show(context: Context, args: list):
         print("ERROR: no user email given")
         return
 
-    # TODO the master API key needs to be parameterized or specified in a config file.
-    context.API_KEY = "galaxypassword"
-    gi = connect(context)
+    gi = connect(context, use_master_key=True)
     id = _get_user_id(gi, args[0])
     if id is None:
         return

From 5a85ae11cd4cc410d88635a3640f20646fe9e64d Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 22 May 2024 14:00:39 -0400
Subject: [PATCH 41/56] Add configuration as a command alias

---
 abm/lib/menu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index a4e3844..4aef10b 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -297,7 +297,7 @@
     - name: [url]
       help: derive the URL to access this Galaxy instance
       handler: kubectl.url
-- name: [config, conf, cfg]
+- name: [config, configuration, conf, cfg]
   help: manage configuration profiles
   standalone: true
   menu:

From 1b4eeb5b13e80160b6714da4b62206c22c45a674 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 22 May 2024 14:07:15 -0400
Subject: [PATCH 42/56] Update year in copyright notices

---
 LICENSE         | 2 +-
 abm/__main__.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/LICENSE b/LICENSE
index cdd6c6d..919ca29 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2021 Galaxy Project
+Copyright (c) 2024 Galaxy Project
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/abm/__main__.py b/abm/__main__.py
index 014bada..58f5877 100644
--- a/abm/__main__.py
+++ b/abm/__main__.py
@@ -3,7 +3,7 @@
 """
 The Automated Benchmarking Tool
 
-Copyright 2023 The Galaxy Project. All rights reserved.
+Copyright 2024 The Galaxy Project. All rights reserved.
 
 """
 
@@ -64,7 +64,7 @@ def command_list(commands: list):
 
 
 def copyright():
-    print(f"    Copyright 2023 The Galaxy Project. All Rights Reserved.\n")
+    print(f"    Copyright 2024 The Galaxy Project. All Rights Reserved.\n")
 
 
 def print_main_help(menu_data):

From 0533f8c08390ebac74b03f3e750840ad621fc86d Mon Sep 17 00:00:00 2001
From: nuwang <2070605+nuwang@users.noreply.github.com>
Date: Fri, 24 May 2024 11:31:14 +0530
Subject: [PATCH 43/56] Update samples and cleanup unused

---
 README.md                                     |   2 +-
 abm/lib/experiment.py                         |   1 -
 abm/lib/history.py                            |   2 +-
 abm/lib/threads/Latch.py                      |  20 ----
 bootstrap-config/test.yaml                    |  11 --
 rules/default.yml                             | 106 ------------------
 .../benchmarks/dna-named.yml                  |   0
 .../benchmarks}/example.yml                   |   0
 samples/benchmarks/rna-named.yml              |  14 +++
 samples/benchmarks/rules/4x8.yml              |   0
 samples/benchmarks/rules/8x16.yml             |   0
 samples/experiment.yaml                       |  11 ++
 12 files changed, 27 insertions(+), 140 deletions(-)
 delete mode 100644 abm/lib/threads/Latch.py
 delete mode 100644 bootstrap-config/test.yaml
 delete mode 100644 rules/default.yml
 rename benchmarks/dna-named-2.yml => samples/benchmarks/dna-named.yml (100%)
 rename {benchmarks => samples/benchmarks}/example.yml (100%)
 create mode 100644 samples/benchmarks/rna-named.yml
 create mode 100644 samples/benchmarks/rules/4x8.yml
 create mode 100644 samples/benchmarks/rules/8x16.yml
 create mode 100644 samples/experiment.yaml

diff --git a/README.md b/README.md
index 81e023e..c52b5b2 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ The `kubectl` program is only required when bootstrapping a new Galaxy instance,
 
 ### Credentials
 
-You will need an [API key](https://training.galaxyproject.org/training-material/faqs/galaxy/preferences_admin_api_key.html) for every Galaxy instance you would like to intereact with. You will also need the *kubeconfig* file for each Kubernetes cluster.  The `abm` script loads the Galaxy server URLs, API keys, and the location of the *kubeconfig* files from a Yaml configuration file that it expects to find in `$HOME/.abm/profile.yml` or `.abm-profile.yml` in the current directory.  You can use the `profile-sample.yml` file as a starting point and it includes the URLs for all Galaxy instances we have used to date (December 22, 2021 as of this writing). 
+You will need an [API key](https://training.galaxyproject.org/training-material/faqs/galaxy/preferences_admin_api_key.html) for every Galaxy instance you would like to intereact with. You will also need the *kubeconfig* file for each Kubernetes cluster.  The `abm` script loads the Galaxy server URLs, API keys, and the location of the *kubeconfig* files from a Yaml configuration file that it expects to find in `$HOME/.abm/profile.yml` or `.abm-profile.yml` in the current directory.  You can use the `samples/profile.yml` file as a starting point and it includes the URLs for all Galaxy instances we have used to date (December 22, 2021 as of this writing). 
 
 :bulb: It is now possible (>=2.0.0) to create Galaxy users and their API keys directly with `abm`.
 
diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 5750fe2..54a1707 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -45,7 +45,6 @@ def run(context: Context, args: list):
     print(f"Starting with run number {argv.run_number}")
 
     profiles = load_profiles()
-    # latch = CountdownLatch(len(config['cloud']))
     threads = []
     start = perf_counter()
     for cloud in config['cloud']:
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 0e4cc0b..e3fb4b8 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -214,7 +214,7 @@ def error_message(msg='Invalid command'):
         else:
             datasets = None
             config = f'{os.path.dirname(os.path.abspath(__file__))}/histories.yml'
-            # First load the histories.yml file that is pacakged with abm
+            # First load the histories.yml file that is packaged with abm
             if os.path.exists(config):
                 with open(config, 'r') as f:
                     datasets = yaml.safe_load(f)
diff --git a/abm/lib/threads/Latch.py b/abm/lib/threads/Latch.py
deleted file mode 100644
index 8db238d..0000000
--- a/abm/lib/threads/Latch.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import threading
-
-
-class CountdownLatch:
-    def __init__(self, count=1):
-        self.count = count
-        self.lock = threading.Condition
-
-    def count_down(self, count=1):
-        self.lock.acquire(True)
-        self.count -= count
-        if self.count <= 0:
-            self.lock.notifyAll()
-        self.lock.release()
-
-    def wait(self):
-        self.lock.acquire(True)
-        while self.count > 0:
-            self.lock.wait()
-        self.lock.release()
diff --git a/bootstrap-config/test.yaml b/bootstrap-config/test.yaml
deleted file mode 100644
index bf7822a..0000000
--- a/bootstrap-config/test.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: Benchmarking DNA
-runs: 3
-workflow_conf:
-  - config/test.yml
-  - config/dna-named.yml
-  - config/rna-named.yml
-cloud:
-  - iu2
-job_configs:
-  - rules/4x8.yml
-  - rules/8x16.yml
\ No newline at end of file
diff --git a/rules/default.yml b/rules/default.yml
deleted file mode 100644
index c12ead7..0000000
--- a/rules/default.yml
+++ /dev/null
@@ -1,106 +0,0 @@
-mappings:
-  summary_stats:
-    tool_ids:
-      - Summary_Statistics1
-    docker_container_id_override: cloudve/gsummary:latest
-    resource_set: small
-  sam_fasta_dm:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/devteam/data_manager_sam_fasta_index_builder/sam_fasta_index_builder/.*
-    docker_container_id_override: cloudve/sam-fasta-dm:latest
-    resource_set: small
-  bwa_dm:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/devteam/data_manager_bwa_mem_index_builder/bwa_mem_index_builder_data_manager/.*
-    docker_container_id_override: cloudve/bwa-dm:latest
-    resource_set: small
-  prokka:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/crs4/prokka/prokka/1.14.5
-    docker_container_id_override: cloudve/prokka:1.14.5
-  jbrowse:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.5+galaxy6
-    docker_container_id_override: cloudve/jbrowse:1.16.5
-  lib_galaxy:
-    tool_ids:
-      - sort1
-      - Grouping1
-    docker_container_id_override: galaxy/galaxy-min:21.05
-    resource_set: small
-  set_medium:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/bwameth/bwameth/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/featurecounts/featurecounts/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/valet/valet/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/varscan_somatic/varscan_somatic/.*
-      - toolshed.g2.bx.psu.edu/repos/nilesh/rseqc/rseqc_bam2wig/.*
-    resource_set: medium
-  set_large:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/.*
-      - toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_compare/deeptools_bam_compare/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_coverage/deeptools_bam_coverage/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bam_pe_fragmentsize/deeptools_bam_pe_fragmentsize/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_bigwig_compare/deeptools_bigwig_compare/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_gc_bias/deeptools_compute_gc_bias/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_compute_matrix/deeptools_compute_matrix/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_correct_gc_bias/deeptools_correct_gc_bias/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_multi_bam_summary/deeptools_multi_bam_summary/.*
-      - toolshed.g2.bx.psu.edu/repos/bgruening/deeptools_multi_bigwig_summary/deeptools_multi_bigwig_summary/.*
-      - toolshed.g2.bx.psu.edu/repos/devteam/freebayes/freebayes/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/rnaspades/rnaspades/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/sra_tools/fasterq_dump/.*
-    resource_set: large
-  set_2xlarge:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/.*
-      - toolshed.g2.bx.psu.edu/repos/nml/spades/spades/.*
-    resource_set: 2xlarge
-  set_mlarge:
-    tool_ids:
-      - toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/.*
-      - toolshed.g2.bx.psu.edu/repos/iuc/plink/plink/.*
-    resource_set: mlarge
-resources:
-  resource_sets:
-    small:
-      requests:
-        cpu: 1
-        memory: 2G
-      limits:
-        cpu: 2
-        memory: 5G
-    medium:
-      requests:
-        cpu: 2
-        memory: 4G
-      limits:
-        cpu: 4
-        memory: 10G
-    large:
-      requests:
-        cpu: 4
-        memory: 8G
-      limits:
-        cpu: 8
-        memory: 16G
-    2xlarge:
-      requests:
-        cpu: 12
-        memory: 20G
-      limits:
-        cpu: 12
-        memory: 24G
-    mlarge:
-      requests:
-        cpu: 2
-        memory: 16G
-      limits:
-        cpu: 4
-        memory: 20G
-  default_resource_set: small
diff --git a/benchmarks/dna-named-2.yml b/samples/benchmarks/dna-named.yml
similarity index 100%
rename from benchmarks/dna-named-2.yml
rename to samples/benchmarks/dna-named.yml
diff --git a/benchmarks/example.yml b/samples/benchmarks/example.yml
similarity index 100%
rename from benchmarks/example.yml
rename to samples/benchmarks/example.yml
diff --git a/samples/benchmarks/rna-named.yml b/samples/benchmarks/rna-named.yml
new file mode 100644
index 0000000..b94c0e0
--- /dev/null
+++ b/samples/benchmarks/rna-named.yml
@@ -0,0 +1,14 @@
+- workflow_id: d6d3c2119c4849e4
+  output_history_base_name: RNA-seq
+  reference_data:
+    - name: Reference Transcript (FASTA)
+      dataset_id: 50a269b7a99356aa
+  runs:
+    - history_name: 1
+      inputs:
+      - name: FASTQ RNA Dataset
+        dataset_id: 28fa757e56346a34
+    - history_name: 2
+      inputs:
+      - name: FASTQ RNA Dataset
+        dataset_id: 1faa2d3b2ed5c436
diff --git a/samples/benchmarks/rules/4x8.yml b/samples/benchmarks/rules/4x8.yml
new file mode 100644
index 0000000..e69de29
diff --git a/samples/benchmarks/rules/8x16.yml b/samples/benchmarks/rules/8x16.yml
new file mode 100644
index 0000000..e69de29
diff --git a/samples/experiment.yaml b/samples/experiment.yaml
new file mode 100644
index 0000000..b4874f0
--- /dev/null
+++ b/samples/experiment.yaml
@@ -0,0 +1,11 @@
+name: Benchmarking DNA
+runs: 3
+workflow_conf:
+  - benchmarks/example.yml
+  - benchmarks/dna-named.yml
+  - benchmarks/rna-named.yml
+cloud:
+  - iu2
+job_configs:
+  - rules/4x8.yml
+  - rules/8x16.yml

From 40792ca03dd9069b3ccaba5a67b40139badf4bb2 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 28 May 2024 19:22:35 -0400
Subject: [PATCH 44/56] Add --no-tools flag for workflow import and upload

---
 abm/lib/menu.yml    |  4 +--
 abm/lib/workflow.py | 63 +++++++++++++++++++++++++++++++++------------
 2 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 4aef10b..9ee7f3e 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -26,11 +26,11 @@
   menu:
     - name: ['upload', 'up']
       handler: workflow.upload
-      params: PATH
+      params: PATH [-n|--no-tools]
       help: 'upload a workflow file to the server'
     - name: ['import', 'imp']
       handler: workflow.import_from_config
-      params: NAME
+      params: NAME [-n|--no-tools]
       help: 'import a workflow defined in ~/.abm/workflows.yml'
     - name: ['download', 'dl']
       handler: workflow.download
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 6282158..3a20885 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -35,30 +35,48 @@ def delete(context: Context, args: list):
 
 
 def upload(context: Context, args: list):
-    if len(args) == 0:
-        print('ERROR: no workflow file given')
+    path = None
+    install = True
+    for arg in args:
+        if arg in ['-n', '--no-tools']:
+            print("Skipping tools")
+            install = False
+        else:
+            path = arg
+    if path is None:
+        print("ERROR: no workflow given")
         return
-    path = args[0]
+
     if path.startswith('http'):
         import_from_url(context, args)
         return
     if not os.path.exists(path):
         print(f'ERROR: file not found: {path}')
         return
+    print("Uploading workflow")
     gi = connect(context)
     print("Importing the workflow")
     pprint(gi.workflows.import_workflow_from_local_path(path, publish=True))
     runnable = for_path(path)
-    print("Installing tools")
-    result = install_shed_repos(runnable, gi, False)
-    pprint(result)
+    if install:
+        print("Installing tools")
+        result = install_shed_repos(runnable, gi, False)
+        pprint(result)
 
 
 def import_from_url(context: Context, args: list):
-    if len(args) == 0:
-        print("ERROR: no workflow URL given")
+    print("Importing workflow from URL")
+    url = None
+    install = True
+    for arg in args:
+        if arg in ['-n', '--no-tools']:
+            print("Skipping tools")
+            install = False
+        else:
+            url = arg
+    if url is None:
+        print("ERROR: no URL given")
         return
-    url = args[0]
 
     # There is a bug in ephemeris (for lack of a better term) that assumes all
     # Runnable objects can be found on the local file system
@@ -93,17 +111,27 @@ def import_from_url(context: Context, args: list):
     result = gi.workflows.import_workflow_dict(workflow, publish=True)
     print(json.dumps(result, indent=4))
     runnable = for_path(cached_file)
-    # runnable = for_uri(url)
-    print("Installing tools")
-    result = install_shed_repos(runnable, gi, False, install_tool_dependencies=True)
-    pprint(result)
+    if install:
+        print("Installing tools")
+        result = install_shed_repos(runnable, gi, False, install_tool_dependencies=True)
+        pprint(result)
 
 
 def import_from_config(context: Context, args: list):
-    if len(args) == 0:
+    print("Importing workflow from configuration")
+    key = None
+    install = True
+    for arg in args:
+        if arg in ['-n', '--no-tools']:
+            print("Skipping tools")
+            install = False
+        else:
+            key = arg
+    if key is None:
         print("ERROR: no workflow ID given")
         return
-    key = args[0]
+
+
     userfile = os.path.join(Path.home(), ".abm", "workflows.yml")
     if not os.path.exists(userfile):
         print("ERROR: this instance has not been configured to import workflows.")
@@ -116,7 +144,10 @@ def import_from_config(context: Context, args: list):
         return
 
     url = workflows[key]
-    import_from_url(context, [url])
+    argv = [url]
+    if not install:
+        argv.append('-n')
+    import_from_url(context, argv)
 
 
 def download(context: Context, args: list):

From 88e13b81536f9c4e41a18b8f1e2a4d4ee05064e8 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 28 May 2024 20:28:25 -0400
Subject: [PATCH 45/56] Bump dev version

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index 80a9e43..d501693 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.7
+2.9.0-dev.8
\ No newline at end of file

From 2ad1a1e8260c5fae3f7b1571c90ca7e8ce69a7dd Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 28 May 2024 20:28:50 -0400
Subject: [PATCH 46/56] Update bump script

---
 bump | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/bump b/bump
index 87bac5a..2da9275 100755
--- a/bump
+++ b/bump
@@ -19,23 +19,20 @@ def main():
 		
 	with open(VERSION_FILE, 'r') as f:
 		version_string = f.read().strip()
-	
-	parts = version_string.split('.')
+
+	prefix = version_string
+	suffix = None
+	release = None
+	build = None
+	if '-' in version_string:
+		# This is a development build
+		prefix, suffix = version_string.split('-')
+		release,build = suffix.split('.')
+		build = int(build)
+	parts = prefix.split('.')
 	major = int(parts[0])
 	minor = int(parts[1])
-	release = None
-	if '-' in parts[2]:
-		revs = parts[2].split('-')
-		revision = int(revs[0])
-		if 'dev' in revs[1]:
-			release = 'dev'
-			build = int(revs[1].replace('dev', ''))
-		elif 'rc' in revs[1]:
-			release = 'rc'
-			build = int(revs[1].replace('rc', ''))
-	else:
-		revision = int(parts[2])
-		build = None
+	revision = int(parts[2])
 
 	if sys.argv[1] in ['major', 'minor', 'revision'] and release is not None:
 		print(f"ERROR: Cannot bump the {sys.argv[1]} version for a development build")
@@ -75,7 +72,7 @@ def main():
 	if build is None:
 		version_string = f"{major}.{minor}.{revision}"
 	else:
-		version_string = f"{major}.{minor}.{revision}-{release}{build}"
+		version_string = f"{major}.{minor}.{revision}-{release}.{build}"
 
 	with open(VERSION_FILE, 'w') as f:
 		f.write(version_string)

From 676572ceba03905ab9af15ec70491f3b37514e2b Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 29 May 2024 12:46:30 -0400
Subject: [PATCH 47/56] Try to fix problems resolving dataset collection IDs

---
 abm/__init__.py      |  2 --
 abm/lib/benchmark.py | 27 +++++++++++++++++++--------
 abm/lib/common.py    |  9 +++++++++
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/abm/__init__.py b/abm/__init__.py
index adc875e..fb8a5b7 100644
--- a/abm/__init__.py
+++ b/abm/__init__.py
@@ -1,8 +1,6 @@
 import os
 import sys
 
-import yaml
-
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 
 
diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index af2ecf8..b9962a2 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -145,11 +145,13 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
                         dsid = find_collection_id(gi, dsname)
                         dsdata = _get_dataset_data(gi, dsid)
                         if dsdata is None:
-                            raise Exception(
-                                f"ERROR: unable to resolve {dsname} to a dataset."
-                            )
-                        dsid = dsdata['id']
-                        dssize = dsdata['size']
+                            # raise Exception(
+                            #     f"ERROR: unable to resolve {dsname} to a dataset."
+                            # )
+                            dssize = 0
+                        else:
+                            dsid = dsdata['id']
+                            dssize = dsdata['size']
                         input_data_size.append(dssize)
                         print(f"Input collection ID: {dsname} [{dsid}] {dssize}")
                         inputs[input[0]] = {'id': dsid, 'src': 'hdca', 'size': dssize}
@@ -625,7 +627,16 @@ def test(context: Context, args: list):
     :param args: varies
     :return: varies, typically None.
     """
-    id = 'c90fffcf98b31cd3'
+    # id = 'c90fffcf98b31cd3'
+    # gi = connect(context)
+    # inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input')
+    # pprint(inputs)
+
     gi = connect(context)
-    inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input')
-    pprint(inputs)
+    print("Calling find_collection_id")
+    dsid = find_collection_id(gi, args[0])
+    print(f"Collection ID: {dsid}")
+    print("Calling _get_dataset_data")
+    dsdata = _get_dataset_data(gi, dsid)
+    pprint(dsdata)
+
diff --git a/abm/lib/common.py b/abm/lib/common.py
index 0fe6b3f..f75fe3a 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -406,6 +406,7 @@ def find_history(gi, name_or_id):
 
 
 def _get_dataset_data(gi, name_or_id):
+    print(f"Getting dataset data for {name_or_id}")
     def make_result(data):
         return {
             'id': data['id'],
@@ -415,14 +416,18 @@ def make_result(data):
 
     try:
         ds = gi.datasets.show_dataset(name_or_id)
+        print(f"Got dataset data for {name_or_id} {ds['id']}")
         return make_result(ds)
     except Exception as e:
+        print(f"Failed to get dataset data for {name_or_id}")
         pass
 
     try:
+        print("Getting all datasets")
         datasets = gi.datasets.get_datasets(
             name=name_or_id
         )  # , deleted=True, purged=True)
+        print(f"List of datasets for {name_or_id} is {len(datasets)}")
         for ds in datasets:
             # print_json(ds)
             state = True
@@ -431,7 +436,11 @@ def make_result(data):
             if state and not ds['deleted'] and ds['visible']:
                 # The dict returned by get_datasets does not include the input
                 # file sizes so we need to make another call to show_datasets
+                print(f"Getting dataset data for {ds['id']}")
                 return make_result(gi.datasets.show_dataset(ds['id']))
+            else:
+                print(f"Skipping dataset {ds['id']}")
+                print_json(ds)
     except Exception as e:
         pass
 

From fd36cd36a0a1d048e8ff505e78a02b18beebb4ab Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 29 May 2024 15:08:27 -0400
Subject: [PATCH 48/56] Bump build number

---
 abm/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abm/VERSION b/abm/VERSION
index d501693..ea61abf 100644
--- a/abm/VERSION
+++ b/abm/VERSION
@@ -1 +1 @@
-2.9.0-dev.8
\ No newline at end of file
+2.9.0-dev.9
\ No newline at end of file

From 6bc34397345748aeab9142c6e2f9fa410e28cf09 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 4 Jun 2024 10:54:19 -0400
Subject: [PATCH 49/56] Search for a local .abm directory before using the
 global directory

---
 abm/lib/common.py   | 12 ++++++++
 abm/lib/dataset.py  | 52 +++++++++++++++++++---------------
 abm/lib/history.py  | 68 ++++++++++++++-------------------------------
 abm/lib/workflow.py | 16 ++++++-----
 4 files changed, 72 insertions(+), 76 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index f75fe3a..045ecd4 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -2,6 +2,8 @@
 import os
 import subprocess
 import sys
+from pathlib import Path
+
 from math import ceil
 
 import bioblend.galaxy
@@ -405,6 +407,16 @@ def find_history(gi, name_or_id):
     return history[0]['id']
 
 
+def find_config(name: str) -> str:
+    if os.path.exists(".abm"):
+        if os.path.exists(f".abm/{name}"):
+            return f".abm/{name}"
+    config = os.path.join(Path.home(), ".abm", name)
+    if os.path.exists(config):
+        return config
+    return None
+
+
 def _get_dataset_data(gi, name_or_id):
     print(f"Getting dataset data for {name_or_id}")
     def make_result(data):
diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index a4f022c..829dde4 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -7,7 +7,7 @@
 import yaml
 from bioblend.galaxy import dataset_collections
 from common import (Context, _get_dataset_data, _make_dataset_element, connect,
-                    find_history, print_json)
+                    find_history, print_json, find_config)
 
 
 def do_list(context: Context, argv: list):
@@ -162,31 +162,42 @@ def collection(context: Context, args: list):
 
 
 def import_from_config(context: Context, args: list):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-c', '--create', help='create a new history for the dataset', action='store_true')
+    parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None)
+    parser.add_argument('--history', help='add datasets to the given history', required=False, default=None)
+    parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None)
+    parser.add_argument('key', help='the key of the dataset to import')
     gi = None
     key = None
     history = None
     kwargs = {}
-    while len(args) > 0:
-        arg = args.pop(0)
-        if arg in ['--hs', '--hist', '--history']:
-            history = args.pop(0)
-        elif arg in ['-c', '--create']:
-            gi = connect(context)
-            history = gi.histories.create_history(args.pop(0)).get('id')
-        elif arg in ['-n', '--name']:
-            kwargs['file_name'] = args.pop(0)
-        elif key is not None:
-            print(f"ERROR: key already set: {key}")
-            return
-        else:
-            key = arg
+    argv = parser.parse_args(args)
+    if argv.name is not None:
+        kwargs['file_name'] = argv.name
 
-    configfile = os.path.join(Path.home(), '.abm', 'datasets.yml')
-    if not os.path.exists(configfile):
-        print("ERROR: ABM has not been configured to import datasets.")
-        print(f"Please create {configfile}")
+    if argv.create and argv.history is not None:
+        print("ERROR: cannot specify both --create and --history")
         return
 
+    if argv.create:
+        gi = connect(context)
+        history = gi.histories.create_history(argv.key).get('id')
+    if argv.history is not None:
+        history = find_history(gi, argv.history)
+    key = argv.key
+    if argv.file is not None:
+        configfile = argv.file
+        if not os.path.exists(configfile):
+            print(f"ERROR: the specified file {configfile} was not found")
+            return
+    else:
+        configfile = find_config("datasets.yml")
+        if configfile is None:
+            print("ERROR: ABM has not been configured to import datasets.")
+            print(f"Please create {configfile}")
+            return
+
     with open(configfile, 'r') as f:
         datasets = yaml.safe_load(f)
     if not key in datasets:
@@ -196,9 +207,6 @@ def import_from_config(context: Context, args: list):
 
     if gi is None:
         gi = connect(context)
-    if history is not None:
-        history = find_history(gi, history)
-
     response = gi.tools.put_url(url, history, **kwargs)
     print(json.dumps(response, indent=4))
 
diff --git a/abm/lib/history.py b/abm/lib/history.py
index b19233b..1f13cbe 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -10,7 +10,7 @@
 from bioblend.galaxy.objects import GalaxyInstance
 from lib.common import (Context, connect, find_history, parse_profile,
                         print_json, summarize_metrics, print_markdown_table,
-                        get_float_key, get_str_key, print_table_header, try_for)
+                        get_float_key, get_str_key, print_table_header, try_for, find_config)
 
 #
 # History related functions
@@ -194,55 +194,29 @@ def _import(context: Context, args: list):
 
 
 def himport(context: Context, args: list):
-    def error_message(msg='Invalid command'):
-        print(f"ERROR: {msg}")
-        print(f"USAGE: {sys.argv[0]} history import SERVER HISTORY_ID JEHA_ID")
-        print(f"       {sys.argv[0]} history import http://GALAXY_SERVER_URL")
-        print(f"       {sys.argv[0]} history import [dna|rna]")
-
-    wait = True
-    if '-n' in args:
-        args.remove('-n')
-        wait = False
-    if '--no-wait' in args:
-        args.remove('--no-wait')
-        wait = False
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-n', '--no-wait', action='store_true', help='Do not wait for the import to complete', default=False)
+    parser.add_argument('-f', '--file', help='Use the specified histories.yml file', required=False, default=None)
+    parser.add_argument('identifier', help='The history alias or URL to import', required=True)
+    argv = parser.parse_args(args)
 
-    if len(args) == 1:
-        if 'http' in args[0]:
-            url = args[0]
+    wait = not argv.no_wait
+    if argv.identifier.startswith('http'):
+        url = argv.identifier
+    else:
+        if argv.file is not None:
+            config = argv.file
         else:
-            datasets = None
-            config = f'{os.path.dirname(os.path.abspath(__file__))}/histories.yml'
-            # First load the histories.yml file that is packaged with abm
-            if os.path.exists(config):
-                with open(config, 'r') as f:
-                    datasets = yaml.safe_load(f)
-            # Then load the user histories.yml, if any
-            userfile = os.path.join(Path.home(), ".abm", "histories.yml")
-            if os.path.exists(userfile):
-                if datasets is None:
-                    datasets = {}
-                with open(userfile, 'r') as f:
-                    userdata = yaml.safe_load(f)
-                    for key, item in userdata.items():
-                        datasets[key] = item
-            if datasets is None:
-                error_message("No history URLs have been configured.")
-                return
-            if not args[0] in datasets:
-                error_message('Please specify a URL or name of the history to import')
-                return
-            url = datasets[args[0]]
-    elif len(args) == 3:
-        server, key, kube, master = parse_profile(args[0])
-        if server is None:
-            error_message(f"Invalid server profile name: {args[0]}")
+            config = find_config("histories.yml")
+        if config is None:
+            print("ERROR: No histories.yml file found.")
             return
-        url = f"{server}history/export_archive?id={args[1]}&jeha_id={args[2]}"
-    else:
-        error_message()
-        return
+        with open(config, 'r') as f:
+            histories = yaml.safe_load(f)
+        if argv.identifier not in histories:
+            print(f"ERROR: No such history {argv.identifier}")
+            return
+        url = histories[argv.identifier]
 
     gi = connect(context)
     print(f"Importing history from {url}")
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 3a20885..9f6f2ac 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -8,7 +8,7 @@
 import requests
 import yaml
 from common import Context, connect, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \
-    print_table_header
+    print_table_header, find_config
 from planemo.galaxy.workflows import install_shed_repos
 from planemo.runnable import for_path, for_uri
 
@@ -118,26 +118,28 @@ def import_from_url(context: Context, args: list):
 
 
 def import_from_config(context: Context, args: list):
-    print("Importing workflow from configuration")
     key = None
     install = True
+    config = None
     for arg in args:
         if arg in ['-n', '--no-tools']:
             print("Skipping tools")
             install = False
+        elif arg in ['-f', '--file']:
+            config = arg
         else:
             key = arg
     if key is None:
         print("ERROR: no workflow ID given")
         return
 
-
-    userfile = os.path.join(Path.home(), ".abm", "workflows.yml")
-    if not os.path.exists(userfile):
+    if config is None:
+        config = find_config("workflows.yml")
+    if config is None:
         print("ERROR: this instance has not been configured to import workflows.")
-        print(f"Please configure {userfile} to enable workflow imports")
+        print(f"Please configure a workflows.yml file to enable imports")
         return
-    with open(userfile, 'r') as f:
+    with open(config, 'r') as f:
         workflows = yaml.safe_load(f)
     if not key in workflows:
         print(f"ERROR: no such workflow: {key}")

From 5e9ab1756743853cf92e775710bb97073a2ad057 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Tue, 4 Jun 2024 17:02:27 -0400
Subject: [PATCH 50/56] Allow multiple datasets to be imported at once.

---
 abm/lib/dataset.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 829dde4..2f1cb9a 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -167,13 +167,15 @@ def import_from_config(context: Context, args: list):
     parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None)
     parser.add_argument('--history', help='add datasets to the given history', required=False, default=None)
     parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None)
-    parser.add_argument('key', help='the key of the dataset to import')
+    parser.add_argument('keys', help='the key of the dataset to import', nargs='+')
     gi = None
-    key = None
     history = None
     kwargs = {}
     argv = parser.parse_args(args)
     if argv.name is not None:
+        if len(argv.keys) > 1:
+            print("ERROR: cannot specify --name with multiple keys")
+            return
         kwargs['file_name'] = argv.name
 
     if argv.create and argv.history is not None:
@@ -185,7 +187,6 @@ def import_from_config(context: Context, args: list):
         history = gi.histories.create_history(argv.key).get('id')
     if argv.history is not None:
         history = find_history(gi, argv.history)
-    key = argv.key
     if argv.file is not None:
         configfile = argv.file
         if not os.path.exists(configfile):
@@ -197,18 +198,18 @@ def import_from_config(context: Context, args: list):
             print("ERROR: ABM has not been configured to import datasets.")
             print(f"Please create {configfile}")
             return
-
     with open(configfile, 'r') as f:
         datasets = yaml.safe_load(f)
-    if not key in datasets:
-        print(f"ERROR: dataset {key} has not been defined.")
-        return
-    url = datasets[key]
-
     if gi is None:
         gi = connect(context)
-    response = gi.tools.put_url(url, history, **kwargs)
-    print(json.dumps(response, indent=4))
+    for key in argv.keys:
+        if not key in datasets:
+            print(f"ERROR: dataset {key} has not been defined.")
+        else:
+            url = datasets[key]
+            print(f"Importing {key} from {url}")
+            response = gi.tools.put_url(url, history, **kwargs)
+            print(json.dumps(response, indent=4))
 
 
 def _import_from_url(gi, history, url, **kwargs):

From 715098a37d795a874591e435e2daa5734fd98443 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Wed, 5 Jun 2024 13:25:58 -0400
Subject: [PATCH 51/56] Use local .abm directory for configurations if it
 exists.

---
 abm/lib/common.py  | 24 ++++++++++++++++++++++++
 abm/lib/dataset.py | 25 +++++++++++++++++--------
 abm/lib/history.py |  2 +-
 abm/lib/menu.yml   |  4 ++--
 4 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/abm/lib/common.py b/abm/lib/common.py
index 045ecd4..6ab69bc 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -407,6 +407,30 @@ def find_history(gi, name_or_id):
     return history[0]['id']
 
 
+def find_dataset(gi, history_id, name_or_id):
+    try:
+        dataset = gi.datasets.show_dataset(name=name_or_id)
+        return dataset['id']
+    except:
+        pass
+
+    try:
+        dataset = gi.datasets.show_dataset(name_or_id)
+        return dataset['id']
+    except:
+        pass
+    return None
+    # print("Calling get_datasets")
+    # datasets = gi.datasets.get_datasets(history_id=history_id, name=name_or_id)
+    # if datasets is None:
+    #     print("Not found")
+    #     return None
+    # if len(datasets) == 0:
+    #     print("No datasets found (len == 0)")
+    #     return None
+    # return datasets[0]['id']
+
+
 def find_config(name: str) -> str:
     if os.path.exists(".abm"):
         if os.path.exists(f".abm/{name}"):
diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 2f1cb9a..3f8d7a1 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -7,7 +7,7 @@
 import yaml
 from bioblend.galaxy import dataset_collections
 from common import (Context, _get_dataset_data, _make_dataset_element, connect,
-                    find_history, print_json, find_config)
+                    find_history, print_json, find_config, find_dataset)
 
 
 def do_list(context: Context, argv: list):
@@ -163,7 +163,7 @@ def collection(context: Context, args: list):
 
 def import_from_config(context: Context, args: list):
     parser = argparse.ArgumentParser()
-    parser.add_argument('-c', '--create', help='create a new history for the dataset', action='store_true')
+    parser.add_argument('-c', '--create', help='create a new history for the dataset', required=False, default=None)
     parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None)
     parser.add_argument('--history', help='add datasets to the given history', required=False, default=None)
     parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None)
@@ -178,14 +178,15 @@ def import_from_config(context: Context, args: list):
             return
         kwargs['file_name'] = argv.name
 
-    if argv.create and argv.history is not None:
+    if argv.create is not None and argv.history is not None:
         print("ERROR: cannot specify both --create and --history")
         return
 
-    if argv.create:
+    if argv.create is not None:
         gi = connect(context)
-        history = gi.histories.create_history(argv.key).get('id')
+        history = gi.histories.create_history(argv.create).get('id')
     if argv.history is not None:
+        gi = connect(context)
         history = find_history(gi, argv.history)
     if argv.file is not None:
         configfile = argv.file
@@ -249,9 +250,17 @@ def rename(context: Context, args: list):
         print("ERROR: please provide the history ID, dataset ID, and new name.")
         return
     gi = connect(context)
-    response = gi.histories.update_dataset(args[0], args[1], name=args[2])
-    result = {'state': response['state'], 'name': response['name']}
-    print(json.dumps(result, indent=4))
+    hid = find_history(gi, args[0])
+    if hid is None:
+        print("ERROR: no such history")
+        return
+    dsid = find_dataset(gi, hid, args[1])
+    if dsid is None:
+        print("ERROR: no such dataset")
+        return
+    response = gi.histories.update_dataset(hid, dsid, name=args[2])
+    # result = {'state': response['state'], 'name': response['name']}
+    print(json.dumps(response, indent=4))
 
 
 def test(context: Context, args: list):
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 1f13cbe..12cd35e 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -197,7 +197,7 @@ def himport(context: Context, args: list):
     parser = argparse.ArgumentParser()
     parser.add_argument('-n', '--no-wait', action='store_true', help='Do not wait for the import to complete', default=False)
     parser.add_argument('-f', '--file', help='Use the specified histories.yml file', required=False, default=None)
-    parser.add_argument('identifier', help='The history alias or URL to import', required=True)
+    parser.add_argument('identifier', help='The history alias or URL to import')
     argv = parser.parse_args(args)
 
     wait = not argv.no_wait
diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 9ee7f3e..90f877f 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -89,8 +89,8 @@
       help: download a dataset from the server
     - name: ['import', 'imp']
       handler: dataset.import_from_config
-      params: KEY [--hs|--hist|--history HISTORY_ID | -c|--create "History name"]
-      help: imports a dataset to the server from a URL specified in the datasets.yml config file.
+      params: '[--hs|--hist|--history HISTORY_ID | -c|--create "History name"] KEY [KEY...]'
+      help: imports one or more datasets to the server from a URL specified in the datasets.yml config file.
     - name: ['list', 'ls']
       handler: dataset.do_list
       help: lists all the datasets on the server

From 774f956d6d063ef3fadc2e98fbcbca580c2f0afa Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Thu, 6 Jun 2024 14:37:21 -0400
Subject: [PATCH 52/56] Use gi.jobs.get_metrics(job_id) to get the metrics for
 a job before writing the metrics file.

---
 abm/lib/benchmark.py  | 1 +
 abm/lib/common.py     | 3 ++-
 abm/lib/experiment.py | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index b9962a2..f9c5655 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -456,6 +456,7 @@ def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):
     jobs = gi.jobs.get_jobs(history_id=hid)
     for job in jobs:
         data = gi.jobs.show_job(job['id'], full_details=True)
+        data['job_metrics'] = gi.jobs.get_job_metrics(job['id'])
         metrics = {
             'run': run,
             'cloud': cloud,
diff --git a/abm/lib/common.py b/abm/lib/common.py
index f75fe3a..a6d113d 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -287,7 +287,8 @@ def find_executable(name):
     "galaxy_slots",
     # "memory.failcnt",
     "memory.limit_in_bytes",
-    "memory.max_usage_in_bytes",
+    "memory.peak",
+    #"memory.max_usage_in_bytes",
     # "memory.memsw.limit_in_bytes",
     # "memory.memsw.max_usage_in_bytes",
     # "memory.oom_control.oom_kill_disable",
diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index 54a1707..ae0addc 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -233,7 +233,8 @@ def summarize(context: Context, args: list):
     'runtime_seconds',
     'cpuacct.usage',
     'memory.limit_in_bytes',
-    'memory.max_usage_in_bytes',
+    'memory.peak'
+    #'memory.max_usage_in_bytes',
 ]  # ,'memory.soft_limit_in_bytes']
 
 

From 1a86f2f7b34ed9f94b2015a9f9a175be547309e2 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 12 Jul 2024 22:11:27 -0400
Subject: [PATCH 53/56] Allow --history to be specified by name

---
 abm/lib/dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 3f8d7a1..373a002 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -118,6 +118,7 @@ def upload(context: Context, args: list):
         return
     if gi is None:
         gi = connect(context)
+    history = find_history(gi, history)
     if name:
         _import_from_url(gi, history, url, file_name=name)
     else:

From 6de42f9ecc5176cd16d6b4e8b54069a4b3bd91fb Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 12 Jul 2024 22:11:54 -0400
Subject: [PATCH 54/56] Document --name option when uploading/downloading
 datasets

---
 abm/lib/menu.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
index 90f877f..eab2d00 100644
--- a/abm/lib/menu.yml
+++ b/abm/lib/menu.yml
@@ -81,7 +81,7 @@
   menu:
     - name: ['upload', 'up']
       handler: dataset.upload
-      params: PATH [-id HISTORY_ID | -c "History name"]
+      params: PATH [--history "History name_or_id" | -c|--create "History name"] [-m|--name "Dataset name"]
       help: upload a dataset to the server from the specified URL
     - name: ['download', 'dl']
       handler: dataset.download
@@ -89,7 +89,7 @@
       help: download a dataset from the server
     - name: ['import', 'imp']
       handler: dataset.import_from_config
-      params: '[--hs|--hist|--history HISTORY_ID | -c|--create "History name"] KEY [KEY...]'
+      params: '[--hs|--hist|--history HISTORY_ID | -c|--create "History name"] [-m|--name "Dataset name"] KEY [KEY...]'
       help: imports one or more datasets to the server from a URL specified in the datasets.yml config file.
     - name: ['list', 'ls']
       handler: dataset.do_list

From 71f9aea26bb77aa7650dec0bee8209446a0a688a Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 12 Jul 2024 22:12:23 -0400
Subject: [PATCH 55/56] Update requirements

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 7284f55..bc7653c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 bioblend
 pyyaml
 planemo
-cloudlaunch-cli
\ No newline at end of file
+cloudlaunch-cli

From da7f6f4222c7b7bae720c85e978b347e352511b9 Mon Sep 17 00:00:00 2001
From: Keith Suderman <suderman@jhu.edu>
Date: Fri, 12 Jul 2024 22:27:01 -0400
Subject: [PATCH 56/56] Format code with Black and iSort

---
 abm/lib/benchmark.py  |  7 ++++---
 abm/lib/common.py     | 26 ++++++++++++++++++++------
 abm/lib/dataset.py    | 29 ++++++++++++++++++++++++-----
 abm/lib/experiment.py |  7 +++++--
 abm/lib/helm.py       |  3 ++-
 abm/lib/history.py    | 30 +++++++++++++++++++++++-------
 abm/lib/invocation.py |  6 ++++--
 abm/lib/job.py        |  5 +++--
 abm/lib/workflow.py   |  5 +++--
 9 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/abm/lib/benchmark.py b/abm/lib/benchmark.py
index f9c5655..a1a1b69 100644
--- a/abm/lib/benchmark.py
+++ b/abm/lib/benchmark.py
@@ -187,7 +187,9 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
                                     # print(f"Getting dataset for {key} = {item[key]}")
                                     value = _get_dataset_data(gi, item[key])
                                     if value is None:
-                                        print(f"ERROR: Unable to find dataset {item[key]}")
+                                        print(
+                                            f"ERROR: Unable to find dataset {item[key]}"
+                                        )
                                         return
                                     if size in value:
                                         size += value['size']
@@ -231,7 +233,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
                     else:
                         raise Exception(f'Invalid input value')
             print(f"Running workflow {wfid} in history {new_history_name}")
-            f = lambda : gi.workflows.invoke_workflow(
+            f = lambda: gi.workflows.invoke_workflow(
                 wfid, inputs=inputs, history_name=new_history_name
             )
             invocation = try_for(f, 3)
@@ -640,4 +642,3 @@ def test(context: Context, args: list):
     print("Calling _get_dataset_data")
     dsdata = _get_dataset_data(gi, dsid)
     pprint(dsdata)
-
diff --git a/abm/lib/common.py b/abm/lib/common.py
index 397f086..dd5ff2f 100644
--- a/abm/lib/common.py
+++ b/abm/lib/common.py
@@ -2,9 +2,8 @@
 import os
 import subprocess
 import sys
-from pathlib import Path
-
 from math import ceil
+from pathlib import Path
 
 import bioblend.galaxy
 import lib
@@ -59,11 +58,17 @@ class Context:
     API_KEY      : a user's API key to make API calls on the Galaxy instance
     KUBECONFIG:  : the kubeconfig file needed to make changes via Helm
     """
+
     def __init__(self, *args):
         if len(args) == 1:
             arg = args[0]
             if type(arg) == str:
-                self.GALAXY_SERVER, self.API_KEY, self.KUBECONFIG, self.MASTER_KEY = parse_profile(arg)
+                (
+                    self.GALAXY_SERVER,
+                    self.API_KEY,
+                    self.KUBECONFIG,
+                    self.MASTER_KEY,
+                ) = parse_profile(arg)
             elif type(arg) == dict:
                 self.GALAXY_SERVER = arg['GALAXY_SERVER']
                 self.API_KEY = arg['API_KEY']
@@ -128,7 +133,9 @@ def _set_active_profile(profile_name: str):
     :param profile_name:
     :return:
     """
-    lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG, lib.MASTER_KEY = parse_profile(profile_name)
+    lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG, lib.MASTER_KEY = parse_profile(
+        profile_name
+    )
     return lib.GALAXY_SERVER != None
 
 
@@ -290,7 +297,7 @@ def find_executable(name):
     # "memory.failcnt",
     "memory.limit_in_bytes",
     "memory.peak",
-    #"memory.max_usage_in_bytes",
+    # "memory.max_usage_in_bytes",
     # "memory.memsw.limit_in_bytes",
     # "memory.memsw.max_usage_in_bytes",
     # "memory.oom_control.oom_kill_disable",
@@ -304,6 +311,7 @@ def find_executable(name):
     # "uname"
 ]
 
+
 def print_table_header():
     """
     Prints the table header suitable for inclusion in CSV files.
@@ -314,6 +322,8 @@ def print_table_header():
 
 
 history_name_cache = dict()
+
+
 def get_history_name(gi, hid: str) -> str:
     if hid in history_name_cache:
         return history_name_cache[hid]
@@ -444,6 +454,7 @@ def find_config(name: str) -> str:
 
 def _get_dataset_data(gi, name_or_id):
     print(f"Getting dataset data for {name_or_id}")
+
     def make_result(data):
         return {
             'id': data['id'],
@@ -488,17 +499,20 @@ def _make_dataset_element(name, value):
     # print(f"Making dataset element for {name} = {value}({type(value)})")
     return dataset_collections.HistoryDatasetElement(name=name, id=value)
 
+
 def get_float_key(column: int):
     def get_key(row: list):
         if row[column] == '':
             return -1
         return float(row[column])
+
     return get_key
 
+
 def get_str_key(column: int):
     # print(f"Getting string key for column {column}")
     def get_key(row: list):
         # print(f"Sorting by column {column} key {row[column]}")
         return row[column]
-    return get_key
 
+    return get_key
diff --git a/abm/lib/dataset.py b/abm/lib/dataset.py
index 373a002..9ac3811 100644
--- a/abm/lib/dataset.py
+++ b/abm/lib/dataset.py
@@ -7,7 +7,7 @@
 import yaml
 from bioblend.galaxy import dataset_collections
 from common import (Context, _get_dataset_data, _make_dataset_element, connect,
-                    find_history, print_json, find_config, find_dataset)
+                    find_config, find_dataset, find_history, print_json)
 
 
 def do_list(context: Context, argv: list):
@@ -164,10 +164,29 @@ def collection(context: Context, args: list):
 
 def import_from_config(context: Context, args: list):
     parser = argparse.ArgumentParser()
-    parser.add_argument('-c', '--create', help='create a new history for the dataset', required=False, default=None)
-    parser.add_argument('-f', '--file', help='use instead of the datasets.yml', required=False, default=None)
-    parser.add_argument('--history', help='add datasets to the given history', required=False, default=None)
-    parser.add_argument('-n', '--name', help='set the name of the dataset', required=False, default=None)
+    parser.add_argument(
+        '-c',
+        '--create',
+        help='create a new history for the dataset',
+        required=False,
+        default=None,
+    )
+    parser.add_argument(
+        '-f',
+        '--file',
+        help='use instead of the datasets.yml',
+        required=False,
+        default=None,
+    )
+    parser.add_argument(
+        '--history',
+        help='add datasets to the given history',
+        required=False,
+        default=None,
+    )
+    parser.add_argument(
+        '-n', '--name', help='set the name of the dataset', required=False, default=None
+    )
     parser.add_argument('keys', help='the key of the dataset to import', nargs='+')
     gi = None
     history = None
diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
index ae0addc..1e35d6e 100644
--- a/abm/lib/experiment.py
+++ b/abm/lib/experiment.py
@@ -11,7 +11,8 @@
 import benchmark
 import helm
 import yaml
-from common import Context, load_profiles, print_markdown_table, get_str_key, get_float_key
+from common import (Context, get_float_key, get_str_key, load_profiles,
+                    print_markdown_table)
 
 INVOCATIONS_DIR = "invocations"
 METRICS_DIR = "metrics"
@@ -221,7 +222,9 @@ def summarize(context: Context, args: list):
             # cpu = '' if len(row[11]) == 0 else f"{float(row[11])/10**9:4.1f}"
             memory = '' if len(row[13]) == 0 else f"{float(row[13])/GB:4.3f}"
             # memory = float(row[13]) / GB
-            print(f"| {row[0]} | {row[5].split(' ')[0]} |{row[2]} | {row[6]} | {row[7]} | {runtime}  | {memory} |")
+            print(
+                f"| {row[0]} | {row[5].split(' ')[0]} |{row[2]} | {row[6]} | {row[7]} | {runtime}  | {memory} |"
+            )
     else:
         for row in table:
             print(separator.join([str(x) for x in row]))
diff --git a/abm/lib/helm.py b/abm/lib/helm.py
index da92bc3..b1cfa20 100644
--- a/abm/lib/helm.py
+++ b/abm/lib/helm.py
@@ -146,7 +146,8 @@ def wait_until_ready(namespace: str, env: dict):
     for deployment in deployments:
         print(
             run(
-                f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch", env
+                f"{kubectl} rollout status deployment -n {namespace} {deployment} --watch",
+                env,
             )
         )
 
diff --git a/abm/lib/history.py b/abm/lib/history.py
index 12cd35e..bc78e95 100644
--- a/abm/lib/history.py
+++ b/abm/lib/history.py
@@ -8,9 +8,10 @@
 
 import yaml
 from bioblend.galaxy.objects import GalaxyInstance
-from lib.common import (Context, connect, find_history, parse_profile,
-                        print_json, summarize_metrics, print_markdown_table,
-                        get_float_key, get_str_key, print_table_header, try_for, find_config)
+from lib.common import (Context, connect, find_config, find_history,
+                        get_float_key, get_str_key, parse_profile, print_json,
+                        print_markdown_table, print_table_header,
+                        summarize_metrics, try_for)
 
 #
 # History related functions
@@ -19,6 +20,7 @@
 # The number of times a failed job will be restarted.
 RESTART_MAX = 3
 
+
 def longest_name(histories: list):
     longest = 0
     for history in histories:
@@ -195,8 +197,20 @@ def _import(context: Context, args: list):
 
 def himport(context: Context, args: list):
     parser = argparse.ArgumentParser()
-    parser.add_argument('-n', '--no-wait', action='store_true', help='Do not wait for the import to complete', default=False)
-    parser.add_argument('-f', '--file', help='Use the specified histories.yml file', required=False, default=None)
+    parser.add_argument(
+        '-n',
+        '--no-wait',
+        action='store_true',
+        help='Do not wait for the import to complete',
+        default=False,
+    )
+    parser.add_argument(
+        '-f',
+        '--file',
+        help='Use the specified histories.yml file',
+        required=False,
+        default=None,
+    )
     parser.add_argument('identifier', help='The history alias or URL to import')
     argv = parser.parse_args(args)
 
@@ -373,14 +387,16 @@ def wait(context: Context, args: list):
     wait_for(gi, history_id)
 
 
-def kill_all_jobs(gi: GalaxyInstance, job_list:list):
+def kill_all_jobs(gi: GalaxyInstance, job_list: list):
     cancel_states = ['new', 'running', 'paused']
     for job in job_list:
         if job['state'] in cancel_states:
             print(f"Cancelling job {job['tool_id']}")
             gi.jobs.cancel_job(job['id'])
         else:
-            print(f"Job {job['id']} for tool {job['tool_id']} is in state {job['state']}")
+            print(
+                f"Job {job['id']} for tool {job['tool_id']} is in state {job['state']}"
+            )
 
 
 def wait_for(gi: GalaxyInstance, history_id: str):
diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
index 84e3906..c76e200 100644
--- a/abm/lib/invocation.py
+++ b/abm/lib/invocation.py
@@ -1,6 +1,8 @@
 import argparse
-from common import Context, connect, print_json, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \
-    print_table_header, print_yaml
+
+from common import (Context, connect, get_float_key, get_str_key, print_json,
+                    print_markdown_table, print_table_header, print_yaml,
+                    summarize_metrics)
 
 
 def doList(context: Context, args: list):
diff --git a/abm/lib/job.py b/abm/lib/job.py
index 4973209..92ae693 100644
--- a/abm/lib/job.py
+++ b/abm/lib/job.py
@@ -1,8 +1,9 @@
+import argparse
 import datetime
 import json
 import logging
 import time
-import argparse
+
 from .common import Context, connect, find_history, print_json
 
 log = logging.getLogger('abm')
@@ -61,7 +62,7 @@ def wait(context: Context, args: list):
     timeout = params.timeout
     job_id = params.job_id
     gi = connect(context)
-    start_time = time.time() # we only interested in precision to the second
+    start_time = time.time()  # we only interested in precision to the second
     waiting = True
     while waiting:
         job = gi.jobs.show_job(job_id, full_details=False)
diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
index 9f6f2ac..e14bbf9 100644
--- a/abm/lib/workflow.py
+++ b/abm/lib/workflow.py
@@ -7,8 +7,9 @@
 
 import requests
 import yaml
-from common import Context, connect, summarize_metrics, print_markdown_table, get_float_key, get_str_key, \
-    print_table_header, find_config
+from common import (Context, connect, find_config, get_float_key, get_str_key,
+                    print_markdown_table, print_table_header,
+                    summarize_metrics)
 from planemo.galaxy.workflows import install_shed_repos
 from planemo.runnable import for_path, for_uri