Skip to content

Commit

Permalink
Merge pull request #65 from galaxyproject/dev
Browse files Browse the repository at this point in the history
Final 1.x release
  • Loading branch information
ksuderman authored Jan 2, 2022
2 parents 1673b6f + 2fe024d commit b53fc0b
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 43 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Benchmarking Scripts
# Automated Benchmarking
An opinionated Python Bioblend script for automating benchmarking tasks in Galaxy.


Expand All @@ -7,8 +7,8 @@ An opinionated Python Bioblend script for automating benchmarking tasks in Galax

1. Clone this repository.
```bash
git clone https://github.com/ksuderman/bioblend-scripts.git
cd bioblend-scripts
git clone https://github.com/galaxyproject/gxabm.git
cd gxabm
```
1. Create a virtual env and install the required libraries
```bash
Expand Down
6 changes: 3 additions & 3 deletions abm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from lib import job, dataset, workflow, history, library, folder, benchmark, helm, kubectl, config

log = logging.getLogger('abm')
log.setLevel(logging.ERROR)
log.setLevel(logging.INFO)

VERSION = '1.4.0'
VERSION = '1.4.1'

BOLD = '\033[1m'
CLEAR = '\033[0m'
Expand Down Expand Up @@ -118,7 +118,7 @@ def alias(shortcut, fullname):

def parse_menu():
log.debug('parse_menu')
menu_config = f'{os.path.dirname(__file__)}/lib/menu.yml'
menu_config = f'{os.path.dirname(os.path.abspath(__file__))}/lib/menu.yml'
if not os.path.exists(menu_config):
print(f"ERROR: Unable to load the menu configuration from {menu_config}")
sys.exit(1)
Expand Down
30 changes: 30 additions & 0 deletions config/dna2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
- workflow_id: d8beac5a0d816eee
output_history_base_name: DNA 29C 116G-MEM
runs:
- history_name: HG00171
inputs:
- name: FASTQ Dataset
dataset_id: 3ef6e7918b01e665
- history_name: HG00152
inputs:
- name: FASTQ Dataset
dataset_id: daf58ed87714bfc3
- history_name: HG00599
inputs:
- name: FASTQ Dataset
dataset_id: 1c8ebd93f5710bc6
- history_name: HG01167
inputs:
- name: FASTQ Dataset
dataset_id: c49cc12509f57ec1
- history_name: HG003
inputs:
- name: FASTQ Dataset
dataset_id: 845199e085d0abbe
- workflow_id: b94314cb9cb46380
output_history_base_name: PairedDNA 29C 116G-MEM
runs:
- history_name: SRS9540951
inputs:
- name: FASTQ Dataset
dataset_id: 4b0ef3aaf341f57b
71 changes: 49 additions & 22 deletions lib/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,49 @@ def run(args: list):

profiles = load_profiles()
num_runs = config['runs']
for n in range(num_runs):
for cloud in config['cloud']:
if cloud not in profiles:
print(f"WARNING: No profile found for {cloud}")
continue
if not set_active_profile(cloud):
print(f"ERROR: Unable to set the profile for {cloud}")
continue
if lib.KUBECONFIG is None:
print(f"ERROR: No kubeconfig set for {cloud}")
continue
print("------------------------")
print(f"Benchmarking run #{n+1}")
for cloud in config['cloud']:
if cloud not in profiles:
print(f"WARNING: no profile for instance {cloud}")
print(f"Benchmarking: {cloud}")
for conf in config['job_configs']:
job_conf_path = f"rules/{conf}.yml"
if not helm.update([job_conf_path]):
print(f"WARNING: job conf not found {conf}")
continue
if not set_active_profile(cloud):
print(f"WARNING: unable to set {cloud} as the active profile")
if lib.KUBECONFIG is None:
print(f"WARNGING: no kubeconfig for instance {cloud}")
continue
for job_conf in config['job_configs']:
job_conf_path = f"rules/{job_conf}.yml"
if not helm.update([job_conf_path]):
print(f"WARNING: job conf not found {job_conf}")
continue
history_name_prefix = f"Run {n} {job_conf}"
for n in range(num_runs):
history_name_prefix = f"{n} {cloud} {conf}"
for workflow_conf in config['workflow_conf']:
workflow.run([workflow_conf, history_name_prefix])

# for n in range(num_runs):
# print("------------------------")
# print(f"Benchmarking run #{n+1}")
# for cloud in config['cloud']:
# if cloud not in profiles:
# print(f"WARNING: no profile for instance {cloud}")
# continue
# if not set_active_profile(cloud):
# print(f"WARNING: unable to set {cloud} as the active profile")
# if lib.KUBECONFIG is None:
# print(f"WARNGING: no kubeconfig for instance {cloud}")
# continue
# for job_conf in config['job_configs']:
# job_conf_path = f"rules/{job_conf}.yml"
# if not helm.update([job_conf_path]):
# print(f"WARNING: job conf not found {job_conf}")
# continue
# history_name_prefix = f"Run {n} {job_conf}"
# for workflow_conf in config['workflow_conf']:
# workflow.run([workflow_conf, history_name_prefix])


def test(args: list):
print(common.GALAXY_SERVER)
Expand All @@ -73,15 +95,19 @@ def summarize(args: list):
:return: None
"""
row = [''] * 12
print("Workflow,History,Server,Tool ID,State,Slots,Memory,Runtime,CPU,Memory Limit,Memory Max usage,Memory Soft Limit")
for file in os.listdir(METRICS_DIR):
input_path = os.path.join(METRICS_DIR, file)
with open(input_path, 'r') as f:
data = json.load(f)
row[0] = data['workflow_id']
row[1] = data['history_id']
row[2] = data['server'] if data['server'] is not None else 'https://iu1.usegvl.org/galaxy'
row[3] = data['metrics']['tool_id']
row[4] = data['metrics']['state']
row[0] = data['run']
row[1] = data['cloud']
row[2] = data['conf']
row[3] = data['workflow_id']
row[4] = data['history_id']
row[5] = data['server'] if data['server'] is not None else 'https://iu1.usegvl.org/galaxy'
row[6] = data['metrics']['tool_id']
row[7] = data['metrics']['state']
add_metrics_to_row(data['metrics']['job_metrics'], row)
print(','.join(row))

Expand All @@ -91,7 +117,8 @@ def add_metrics_to_row(metrics_list: list, row: list):
for job_metrics in metrics_list:
if job_metrics['name'] in accept_metrics:
index = accept_metrics.index(job_metrics['name'])
row[index + 5] = job_metrics['raw_value']
row[index + 8] = job_metrics['raw_value']
# row.append(job_metrics['raw_value'])



Expand Down
3 changes: 1 addition & 2 deletions lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@ def connect():


def set_active_profile(profile_name: str):
print(f"Parsing profile for {profile_name}")
# print(f"Parsing profile for {profile_name}")
lib.GALAXY_SERVER, lib.API_KEY, lib.KUBECONFIG = parse_profile(profile_name)
print(lib.KUBECONFIG)
return lib.GALAXY_SERVER != None


Expand Down
41 changes: 28 additions & 13 deletions lib/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,11 @@ def run(args: list):
print(f"Wrote invocation data to {output_path}")
invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False)
print("Waiting for jobs")
if len(args) > 1:
for parts in args[1].split():
invocations['run'] = parts[0]
invocations['cloud'] = parts[1]
invocations['job_conf'] = parts[2]
wait_for_jobs(gi, invocations)
print("Benchmarking run complete")
return True
Expand Down Expand Up @@ -387,20 +392,30 @@ def wait_for_jobs(gi: GalaxyInstance, invocations: dict):
"""
wfid = invocations['workflow_id']
hid = invocations['history_id']
run = invocations['run']
cloud = invocations['cloud']
conf = invocations['job_conf']
for step in invocations['steps']:
job_id = step['job_id']
if job_id is not None:
print(f"Waiting for job {job_id} on {lib.GALAXY_SERVER}")
status = gi.jobs.wait_for_job(job_id, 86400, 10, False)
data = gi.jobs.show_job(job_id, full_details=True)
metrics = {
'workflow_id': wfid,
'history_id': hid,
'metrics': data,
'status': status,
'server': lib.GALAXY_SERVER
}
output_path = os.path.join(METRICS_DIR, f"{job_id}.json")
with open(output_path, "w") as f:
json.dump(metrics, f, indent=4)
print(f"Wrote metrics to {output_path}")
try:
# TDOD Should retry if anything throws an exception.
status = gi.jobs.wait_for_job(job_id, 86400, 10, False)
data = gi.jobs.show_job(job_id, full_details=True)
metrics = {
'run': run,
'cloud': cloud,
'job_conf': conf,
'workflow_id': wfid,
'history_id': hid,
'metrics': data,
'status': status,
'server': lib.GALAXY_SERVER
}
output_path = os.path.join(METRICS_DIR, f"{job_id}.json")
with open(output_path, "w") as f:
json.dump(metrics, f, indent=4)
print(f"Wrote metrics to {output_path}")
except Exception as e:
print(f"ERROR: {e}")

0 comments on commit b53fc0b

Please sign in to comment.