Merge pull request #243 from galaxyproject/242-results-to-markdown

Markdown tables Closes #242
galaxyproject · Dec 12, 2023 · 3ce1924 · 3ce1924
2 parents 03b0109 + bfe272b
commit 3ce1924
Show file tree

Hide file tree

Showing 7 changed files with 91 additions and 24 deletions.
diff --git a/abm/VERSION b/abm/VERSION
@@ -1 +1 @@
-2.8.0-dev.6
+2.9.0-dev.1
diff --git a/abm/lib/common.py b/abm/lib/common.py
@@ -194,6 +194,7 @@ def find_executable(name):
 
 
 def summarize_metrics(gi, jobs: list):
+    table = []
     header = [
         "id",
         "history_id",
@@ -221,24 +222,46 @@ def summarize_metrics(gi, jobs: list):
         # "swaptotal",
         # "uname"
     ]
-
-    print(','.join(header))
+    table.append(header)
+    # print(','.join(header))
     for job in jobs:
         job_metrics = gi.jobs.get_metrics(job['id'])
         row = []
+        toolid = job.get('tool_id', 'unknown')
+        if '/' in toolid:
+            parts = toolid.split('/')
+            toolid = f'{parts[-2]}/{parts[-1]}'
         metrics = metrics_to_dict(job_metrics, header)
         metrics['id'] = job.get('id', 'unknown')
         metrics['history_id'] = job.get('history_id', 'unknown')
         metrics['history_name'] = job.get('history_name', 'unknown')
         metrics['state'] = job.get('state', 'unknown')
-        metrics['tool_id'] = job.get('tool_id', 'unknown')
+        metrics['tool_id'] = toolid
         metrics['invocation_id'] = job.get('invocation_id', 'unknown')
         for key in header:
             if key in metrics:
                 row.append(metrics[key])
             else:
                 row.append('')
-        print(','.join(row), end='\n')
+        # print(','.join(row), end='\n')
+        table.append(row)
+    return table
+
+
+def print_markdown_table(table: list) -> None:
+    print('| Tool ID | History | State | CPU (sec) | Memory (GB) | Runtime (sec)|')
+    print('|---|---|---|---:|---:|---:|')
+    GB = 1024 * 1024 * 1024
+    for row in table[1:]:
+        history = row[2]
+        state = row[3]
+        tool_id = row[4]
+        cpu = float(row[7]) / 10**9
+        memory = float(row[11]) / GB
+        runtime = float(row[15])
+        # line = ' | '.join( row[i] for i in [0,2,3,4,7,11,15])
+        # print(f'| {line} |')
+        print(f'| {tool_id} | {history} | {state} | {cpu:5.1f} | {memory:3.3f} | {runtime:5.1f} |')
 
 
 def metrics_to_dict(metrics: list, accept: list):

diff --git a/abm/lib/experiment.py b/abm/lib/experiment.py
@@ -9,7 +9,7 @@
 import benchmark
 import helm
 import yaml
-from common import Context, load_profiles
+from common import Context, load_profiles, print_markdown_table
 
 INVOCATIONS_DIR = "invocations"
 METRICS_DIR = "metrics"
@@ -114,31 +114,37 @@ def summarize(context: Context, args: list):
     :param args[0]: The path to the directory containing metrics filees
     :return: None
     """
+    markdown = False
     separator = None
     input_dirs = []
     make_row = make_table_row
     header_row = "Run,Cloud,Job Conf,Workflow,History,Inputs,Tool,Tool Version,State,Slots,Memory,Runtime (Sec),CPU,Memory Limit (Bytes),Memory Max usage (Bytes)"
     for arg in args:
         if arg in ['-t', '--tsv']:
-            if separator is not None:
+            if separator is not None or markdown:
                 print('ERROR: The output format is specified more than once')
                 return
             print('tsv')
             separator = '\t'
         elif arg in ['-c', '--csv']:
-            if separator is not None:
+            if separator is not None or markdown:
                 print('ERROR: The output format is specified more than once')
                 return
             separator = ','
             print('csv')
         elif arg in ['-m', '--model']:
-            if separator is not None:
+            if separator is not None or markdown:
                 print('ERROR: The output format is specified more than once')
                 return
             print('making a model')
             separator = ','
             make_row = make_model_row
             header_row = "job_id,tool_id,tool_version,state,memory.max_usage_in_bytes,cpuacct.usage,process_count,galaxy_slots,runtime_seconds,ref_data_size,input_data_size_1,input_data_size_2"
+        elif arg == '--markdown':
+            if separator is not None or markdown:
+                print('ERROR: The output format is specified more than once')
+                return
+            markdown = True
         else:
             # print(f"Input dir {arg}")
             input_dirs.append(arg)
@@ -149,7 +155,11 @@ def summarize(context: Context, args: list):
     if separator is None:
         separator = ','
 
-    print(header_row)
+    if markdown:
+        print("|Run|Job Conf|Tool|Tool Version|State|Runtime (Sec)|CPU|Max Memory|")
+        print("|---|---|---|---|---|---|---|---|")
+    else:
+        print(header_row)
     for input_dir in input_dirs:
         for file in os.listdir(input_dir):
             input_path = os.path.join(input_dir, file)
@@ -162,7 +172,11 @@ def summarize(context: Context, args: list):
                     # print('Ignoring upload tool')
                     continue
                 row = make_row(data)
-                print(separator.join([str(x) for x in row]))
+                if markdown:
+                    line = ' | '.join(row[i] for i in [0,2,6,7,8,11,12,14])
+                    print(f'| {line} |')
+                else:
+                    print(separator.join([str(x) for x in row]))
             except Exception as e:
                 # Silently fail to allow the remainder of the table to be generated.
                 print(f"Unable to process {input_path}")

diff --git a/abm/lib/history.py b/abm/lib/history.py
@@ -8,7 +8,7 @@
 import yaml
 from bioblend.galaxy.objects import GalaxyInstance
 from lib.common import (Context, connect, find_history, parse_profile,
-                        print_json, summarize_metrics)
+                        print_json, summarize_metrics, print_markdown_table)
 
 #
 # History related functions
@@ -339,6 +339,11 @@ def tag(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
+    markdown = False
+    if '--markdown' in args:
+        markdown = True
+        args.remove('--markdown')
+
     if len(args) == 0:
         print("ERROR: Provide one or more history ID values.")
         return
@@ -368,7 +373,12 @@ def summarize(context: Context, args: list):
         #             job['workflow_id'] = invocation['workflow_id']
         #         all_jobs.append(job)
     # summarize_metrics(gi, gi.jobs.get_jobs(history_id=args[0]))
-    summarize_metrics(gi, all_jobs)
+    table = summarize_metrics(gi, all_jobs)
+    if markdown:
+        print_markdown_table(table)
+    else:
+        for row in table:
+            print(','.join(row))
 
 
 def wait(context: Context, args: list):

diff --git a/abm/lib/invocation.py b/abm/lib/invocation.py
@@ -1,4 +1,4 @@
-from common import Context, connect, print_json, summarize_metrics
+from common import Context, connect, print_json, summarize_metrics, print_markdown_table
 
 
 def doList(context: Context, args: list):
@@ -25,6 +25,11 @@ def doList(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
+    markdown = False
+    if '--markdown' in args:
+        markdown = True
+        args.remove('--markdown')
+
     if len(args) == 0:
         print("ERROR: Provide one or more invocation ID values.")
         return
@@ -36,4 +41,9 @@ def summarize(context: Context, args: list):
         job['invocation_id'] = id
         job['workflow_id'] = ''
         all_jobs.append(job)
-    summarize_metrics(gi, all_jobs)
+    table = summarize_metrics(gi, all_jobs)
+    if markdown:
+        print_markdown_table(table)
+    else:
+        for row in table:
+            print(','.join(row))
diff --git a/abm/lib/menu.yml b/abm/lib/menu.yml
@@ -69,8 +69,8 @@
       handler: workflow.inputs
     - name: [summary, summarize]
       handler: workflow.summarize
-      help: generate a CSV with job metrics for all workflow runs
-      params: ID [ID ...]
+      help: generate a CSV or markdown table with job metrics for all workflow runs
+      params: "ID [ID ...] [--markdown]"
     - name: ['test']
       handler: workflow.test
       help: run some test code
@@ -158,8 +158,8 @@
       help: show detailed information about a history
     - name: [summarize, summary, table]
       handler: history.summarize
-      params: "ID [ID...]"
-      help: Generate a CSV table with runtime metrics for all jobs in the history.
+      params: "ID [ID...] [--markdown]"
+      help: Generate a CSV or markdown table with runtime metrics for all jobs in the history.
     - name: [publish, pub]
       handler: history.publish
       help: publish the given history
@@ -250,7 +250,7 @@
     - name: [summarize, summary]
       help: summarize metrics to a CSV or TSV file.
       handler: experiment.summarize
-      params: "[-c, --csv, -t, --tsv]"
+      params: "[-c, --csv, -t, --tsv, --markdown]"
     - name: [test]
       help: playground code
       handler: experiment.test
@@ -263,8 +263,8 @@
       handler: invocation.doList
       params: "[-w|--workflow ID] [-h|--history ID]"
     - name: [summarize]
-      help: generate a CSV of job metrics for an invocation
-      params: ID
+      help: generate a CSV or markdown table of job metrics for an invocation
+      params: "ID [--markdown]"
       handler: invocation.summarize
 - name: [helm]
   help: execute a helm command

diff --git a/abm/lib/workflow.py b/abm/lib/workflow.py
@@ -7,7 +7,7 @@
 import planemo
 import requests
 import yaml
-from common import Context, connect, summarize_metrics
+from common import Context, connect, summarize_metrics, print_markdown_table
 from planemo.galaxy.workflows import install_shed_repos
 from planemo.runnable import for_path, for_uri
 
@@ -236,6 +236,11 @@ def rename(context: Context, args: list):
 
 
 def summarize(context: Context, args: list):
+    markdown = False
+    if '--markdown' in args:
+        markdown = True
+        args.remove('--markdown')
+
     if len(args) == 0:
         print("ERROR: Provide one or more workflow ID values.")
         return
@@ -250,4 +255,9 @@ def summarize(context: Context, args: list):
             job['invocation_id'] = id
             job['workflow_id'] = wid
             all_jobs.append(job)
-    summarize_metrics(gi, all_jobs)
+    table = summarize_metrics(gi, all_jobs)
+    if markdown:
+        print_markdown_table(table)
+    else:
+        for row in table:
+            print(','.join(row))