Skip to content

Commit

Permalink
Merge pull request #304 from galaxyproject/dev
Browse files Browse the repository at this point in the history
Preparing for v2.9.0
  • Loading branch information
ksuderman authored Jul 13, 2024
2 parents 0c9f5eb + 24b7b5b commit 2b1de88
Show file tree
Hide file tree
Showing 31 changed files with 908 additions and 499 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021 Galaxy Project
Copyright (c) 2024 Galaxy Project

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
.PHONY: dist
help:
@echo
@echo "GOALS"
@echo " clean - deletes the dist directory and egg-info"
@echo " dist - creates the distribution package (wheel)"
@echo " format - runs Black and isort"
@echo " test-deploy - deploys to test.pypi.org"
@echo " deploy - deploys to pypi.org"
@echo " release - creates a GitHub release package"
@echo

dist:
python3 setup.py sdist bdist_wheel

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ The `kubectl` program is only required when bootstrapping a new Galaxy instance,

### Credentials

You will need an [API key](https://training.galaxyproject.org/training-material/faqs/galaxy/preferences_admin_api_key.html) for every Galaxy instance you would like to intereact with. You will also need the *kubeconfig* file for each Kubernetes cluster. The `abm` script loads the Galaxy server URLs, API keys, and the location of the *kubeconfig* files from a Yaml configuration file that it expects to find in `$HOME/.abm/profile.yml` or `.abm-profile.yml` in the current directory. You can use the `profile-sample.yml` file as a starting point and it includes the URLs for all Galaxy instances we have used to date (December 22, 2021 as of this writing).
You will need an [API key](https://training.galaxyproject.org/training-material/faqs/galaxy/preferences_admin_api_key.html) for every Galaxy instance you would like to intereact with. You will also need the *kubeconfig* file for each Kubernetes cluster. The `abm` script loads the Galaxy server URLs, API keys, and the location of the *kubeconfig* files from a Yaml configuration file that it expects to find in `$HOME/.abm/profile.yml` or `.abm-profile.yml` in the current directory. You can use the `samples/profile.yml` file as a starting point and it includes the URLs for all Galaxy instances we have used to date (December 22, 2021 as of this writing).

:bulb: It is now possible (>=2.0.0) to create Galaxy users and their API keys directly with `abm`.

Expand Down
4 changes: 2 additions & 2 deletions abm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
The Automated Benchmarking Tool
Copyright 2023 The Galaxy Project. All rights reserved.
Copyright 2024 The Galaxy Project. All rights reserved.
"""

Expand Down Expand Up @@ -64,7 +64,7 @@ def command_list(commands: list):


def copyright():
print(f" Copyright 2023 The Galaxy Project. All Rights Reserved.\n")
print(f" Copyright 2024 The Galaxy Project. All Rights Reserved.\n")


def print_main_help(menu_data):
Expand Down
23 changes: 21 additions & 2 deletions abm/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@

sys.path.append(os.path.dirname(os.path.realpath(__file__)))

# from common import parse_profile

# Where the workflow invocation data returned by Galaxy will be saved.
INVOCATIONS_DIR = "invocations"
# Where workflow runtime metrics will be saved.
METRICS_DIR = "metrics"

# Global instance of a YAML parser so we can reuse it if needed.
parser = None


# Keys used in various dictionaries.
class Keys:
NAME = 'name'
RUNS = 'runs'
Expand All @@ -22,3 +24,20 @@ class Keys:
COLLECTION = 'collection'
HISTORY_BASE_NAME = 'output_history_base_name'
HISTORY_NAME = 'history_name'


# def get_master_api_key():
# '''
# Get the master API key from the environment or configuration file.
# '''
# if 'GALAXY_MASTER_API_KEY' in os.environ:
# return os.environ['GALAXY_MASTER_API_KEY']
# config_path = os.path.expanduser("~/.abm/config.yml")
# if not os.path.exists(config_path):
# raise RuntimeError(f"ERROR: Configuration file not found: {config_path}")
# with open(config_path, 'r') as f:
# config = yaml.safe_load(f)
# key = config.get('GALAXY_MASTER_API_KEY', None)
# if key == None:
# raise RuntimeError("ERROR: GALAXY_MASTER_API_KEY not found in config.yml")
# return key
129 changes: 101 additions & 28 deletions abm/lib/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,18 @@
from bioblend.galaxy import GalaxyInstance, dataset_collections
from lib import INVOCATIONS_DIR, METRICS_DIR, Keys
from lib.common import (Context, _get_dataset_data, _make_dataset_element,
connect, print_json)
connect, print_json, try_for)
from lib.history import wait_for

log = logging.getLogger('abm')


def run_cli(context: Context, args: list):
"""
Runs a single workflow defined by *args[0]*
Command line handler to run a single benchmark.
:param args: a list that contains:
args[0] - the path to the benchmark configuration file
args[1] - the prefix to use when creating the new history in Galaxy
args[2] - the name of the experiment, if part of one. This is used to
generate output folder names.
:param context: a context object the defines how to connect to the Galaxy server.
:param args: parameters from the command line
:return: True if the workflows completed sucessfully. False otherwise.
"""
Expand All @@ -43,11 +40,15 @@ def run_cli(context: Context, args: list):


def run(context: Context, workflow_path, history_prefix: str, experiment: str):
# if len(args) > 1:
# history_prefix = args[1]
# if len(args) > 2:
# experiment = args[2].replace(' ', '_').lower()
"""
Does the actual work of running a benchmark.
:param context: a context object the defines how to connect to the Galaxy server.
:param workflow_path: path to the ABM workflow file. (benchmark really). NOTE this is NOT the Galaxy .ga file.
:param history_prefix: a prefix value used when generating new history names.
:param experiment: the name of the experiment (arbitrary string). Used to generate new history names.
:return: True if the workflow run completed successfully. False otherwise.
"""
if os.path.exists(INVOCATIONS_DIR):
if not os.path.isdir(INVOCATIONS_DIR):
print('ERROR: Can not save invocation status, directory name in use.')
Expand Down Expand Up @@ -76,7 +77,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
workflows = parse_workflow(workflow_path)
if not workflows:
print(f"Unable to load any workflow definitions from {workflow_path}")
return
return False

print(f"Found {len(workflows)} workflow definitions")
for workflow in workflows:
Expand Down Expand Up @@ -144,11 +145,13 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
dsid = find_collection_id(gi, dsname)
dsdata = _get_dataset_data(gi, dsid)
if dsdata is None:
raise Exception(
f"ERROR: unable to resolve {dsname} to a dataset."
)
dsid = dsdata['id']
dssize = dsdata['size']
# raise Exception(
# f"ERROR: unable to resolve {dsname} to a dataset."
# )
dssize = 0
else:
dsid = dsdata['id']
dssize = dsdata['size']
input_data_size.append(dssize)
print(f"Input collection ID: {dsname} [{dsid}] {dssize}")
inputs[input[0]] = {'id': dsid, 'src': 'hdca', 'size': dssize}
Expand All @@ -173,7 +176,7 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
histories = gi.histories.get_histories(name=spec['history'])
if len(histories) == 0:
print(f"ERROR: History {spec['history']} not foune")
return
return False
hid = histories[0]['id']
pairs = 0
paired_list = spec['paired']
Expand All @@ -183,7 +186,13 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
for key in item.keys():
# print(f"Getting dataset for {key} = {item[key]}")
value = _get_dataset_data(gi, item[key])
size += value['size']
if value is None:
print(
f"ERROR: Unable to find dataset {item[key]}"
)
return
if size in value:
size += value['size']
elements.append(
_make_dataset_element(key, value['id'])
)
Expand Down Expand Up @@ -224,16 +233,20 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):
else:
raise Exception(f'Invalid input value')
print(f"Running workflow {wfid} in history {new_history_name}")
invocation = gi.workflows.invoke_workflow(
f = lambda: gi.workflows.invoke_workflow(
wfid, inputs=inputs, history_name=new_history_name
)
invocation = try_for(f, 3)
id = invocation['id']
# invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False)
f = lambda: gi.invocations.wait_for_invocation(id, 86400, 10, False)
try:
invocations = gi.invocations.wait_for_invocation(id, 86400, 10, False)
except:
invocations = try_for(f, 2)
except Exception as e:
print(f"Exception waiting for invocations")
pprint(invocation)
sys.exc_info()
raise e
print("Waiting for jobs")
if history_prefix is not None:
parts = history_prefix.split()
Expand Down Expand Up @@ -265,6 +278,14 @@ def run(context: Context, workflow_path, history_prefix: str, experiment: str):


def translate(context: Context, args: list):
"""
Translates the human readable names of datasets and workflows in to the Galaxy
ID that is unique to each server.
:param context: the conext object used to connect to the Galaxy server
:param args: [0] the path to the benchmarking YAML file to translate
:return: Nothing. Prints the translated workflow file to stdout.
"""
if len(args) == 0:
print('ERROR: no workflow configuration specified')
return
Expand Down Expand Up @@ -307,6 +328,14 @@ def translate(context: Context, args: list):


def validate(context: Context, args: list):
"""
Checks to see if the workflow and all datasets defined in the benchmark can
be found on the server.
:param context: the context object used to connect to the Galaxy instance
:param args: [0] the benchmark YAML file to be validated.
:return:
"""
if len(args) == 0:
print('ERROR: no workflow configuration specified')
return
Expand Down Expand Up @@ -412,10 +441,10 @@ def validate(context: Context, args: list):


def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):
"""Blocks until all jobs defined in the *invocations* to complete.
"""Blocks until all jobs defined in *invocations* are complete (in a terminal state).
:param gi: The *GalaxyInstance** running the jobs
:param invocations:
:param invocations: a dictionary containing information about the jobs invoked
:return:
"""
wfid = invocations['workflow_id']
Expand All @@ -429,6 +458,7 @@ def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):
jobs = gi.jobs.get_jobs(history_id=hid)
for job in jobs:
data = gi.jobs.show_job(job['id'], full_details=True)
data['job_metrics'] = gi.jobs.get_job_metrics(job['id'])
metrics = {
'run': run,
'cloud': cloud,
Expand Down Expand Up @@ -485,6 +515,11 @@ def wait_for_jobs(context, gi: GalaxyInstance, invocations: dict):


def parse_workflow(workflow_path: str):
"""
Loads the benchmark YAML file.
:param workflow_path: the path to the file to be loaded.
:return: a dictionary containing the benchmark.
"""
if not os.path.exists(workflow_path):
print(f'ERROR: could not find workflow file {workflow_path}')
return None
Expand All @@ -503,6 +538,14 @@ def parse_workflow(workflow_path: str):


def find_workflow_id(gi, name_or_id):
"""
Resolves the human-readable name for a workflow into the unique ID on the
Galaxy instance.
:param gi: the connection object to the Galaxy instance
:param name_or_id: the name of the workflow
:return: The Galaxy workflow ID or None if the workflow could not be located
"""
try:
wf = gi.workflows.show_workflow(name_or_id)
return wf['id']
Expand All @@ -519,7 +562,14 @@ def find_workflow_id(gi, name_or_id):


def find_dataset_id(gi, name_or_id):
# print(f"Finding dataset {name_or_id}")
"""
Resolves the human-readable name if a dataset into the unique ID on the
Galaxy instance
:param gi: the connection object to the Galaxy instance
:param name_or_id: the name of the dataset.
:return: the Galaxy dataset ID or None if the dataset could not be located.
"""
try:
ds = gi.datasets.show_dataset(name_or_id)
return ds['id']
Expand All @@ -544,6 +594,14 @@ def find_dataset_id(gi, name_or_id):


def find_collection_id(gi, name):
"""
Resolves a human-readable collection name into the unique Galaxy ID.
:param gi: the connection object to the Galaxy instance
:param name: the name of the collection to resolve
:return: The unique Galaxy ID of the collection or None if the collection
can not be located.
"""
kwargs = {'limit': 10000, 'offset': 0}
datasets = gi.datasets.get_datasets(**kwargs)
if len(datasets) == 0:
Expand All @@ -565,7 +623,22 @@ def find_collection_id(gi, name):


def test(context: Context, args: list):
id = 'c90fffcf98b31cd3'
"""
Allows running testing code from the command line.
:param context: a connection object to a Galaxy instance
:param args: varies
:return: varies, typically None.
"""
# id = 'c90fffcf98b31cd3'
# gi = connect(context)
# inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input')
# pprint(inputs)

gi = connect(context)
inputs = gi.workflows.get_workflow_inputs(id, 'PE fastq input')
pprint(inputs)
print("Calling find_collection_id")
dsid = find_collection_id(gi, args[0])
print(f"Collection ID: {dsid}")
print("Calling _get_dataset_data")
dsdata = _get_dataset_data(gi, dsid)
pprint(dsdata)
4 changes: 3 additions & 1 deletion abm/lib/cloudlaunch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from cloudlaunch_cli.main import create_api_client
from common import Context

# DEPRECATED - Cloudlaunch is no longer used to manage Galaxy clusters.

BOLD = '\033[1m'
CLEAR = '\033[0m'

Expand Down Expand Up @@ -40,7 +42,7 @@ def h1(text):
'''


def list(context: Context, args: list):
def do_list(context: Context, args: list):
archived = False
filter = None
status = lambda t: t.instance_status if t.instance_status else t.status
Expand Down
Loading

0 comments on commit 2b1de88

Please sign in to comment.