From d43b826666db22c19332b8741cb3bcb932c1248c Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 5 Apr 2024 11:31:11 +0200 Subject: [PATCH 1/4] Run make config-rebuild --- doc/source/admin/galaxy_options.rst | 41 +++++++++++-- lib/galaxy/config/sample/galaxy.yml.sample | 67 +++++++++++++--------- 2 files changed, 75 insertions(+), 33 deletions(-) diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 2100c81f8cae..f3021dc25d48 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -1453,7 +1453,7 @@ This option has no effect if the file specified by object_store_config_file exists. Otherwise, if this option is set, it overrides any other objectstore settings. - The syntax, available instrumenters, and documentation of their + The syntax, available storage plugins, and documentation of their options is explained in detail in the object store sample configuration file, `object_store_conf.sample.yml` :Default: ``None`` @@ -2606,8 +2606,20 @@ :Description: The upload store is a temporary directory in which files uploaded - by the tus middleware or server will be placed. Defaults to - new_file_path if not set. + by the tus middleware or server for user uploads will be placed. + Defaults to new_file_path if not set. +:Default: ``None`` +:Type: str + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``tus_upload_store_job_files`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + The upload store is a temporary directory in which files uploaded + by the tus middleware or server for remote job files (Pulsar) will + be placed. Defaults to tus_upload_store if not set. :Default: ``None`` :Type: str @@ -4030,6 +4042,23 @@ :Type: str +~~~~~~~~~~~~~~~~~~~~~ +``oidc_scope_prefix`` +~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Sets the prefix for OIDC scopes specific to this Galaxy instance. + If an API call is made against this Galaxy instance using an OIDC + bearer token, any scopes must be prefixed with this value e.g. + https://galaxyproject.org/api. More concretely, to request all + permissions that the user has, the scope would have to be + specified as ":*". e.g "https://galaxyproject.org/api:*". + Currently, only * is recognised as a valid scope, and future + iterations may provide more fine-grained scopes. +:Default: ``https://galaxyproject.org/api`` +:Type: str + + ~~~~~~~~~~~~~~~~~~~~ ``auth_config_file`` ~~~~~~~~~~~~~~~~~~~~ @@ -5439,9 +5468,9 @@ :Type: str -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``help_forum_tool_panel_integration_enabled`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``enable_help_forum_tool_panel_integration`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :Description: Enable the integration of the Galaxy Help Forum in the tool panel. diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index 96c2a84e0bda..21b325bdb490 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -1,21 +1,21 @@ # Galaxy is configured by default to be usable in a single-user development # environment. To tune the application for a multi-user production # environment, see the documentation at: -# +# # https://docs.galaxyproject.org/en/master/admin/production.html -# +# # Throughout this sample configuration file, except where stated otherwise, # uncommented values override the default if left unset, whereas commented # values are set to the default value. Relative paths are relative to the root # Galaxy directory. -# +# # Examples of many of these options are explained in more detail in the Galaxy # Community Hub. -# +# # https://galaxyproject.org/admin/config -# +# # Config hackers are encouraged to check there before asking for help. -# +# # Configuration for Gravity process manager. # ``uwsgi:`` section will be ignored if Galaxy is started via Gravity commands (e.g ``./run.sh``, ``galaxy`` or ``galaxyctl``). gravity: @@ -181,7 +181,7 @@ gravity: # enable: false # gx-it-proxy version - # version: '>=0.0.5' + # version: '>=0.0.6' # Public-facing IP of the proxy # ip: localhost @@ -248,6 +248,14 @@ gravity: # Must match ``tus_upload_store`` setting in ``galaxy:`` section. # upload_dir: + # Value of tusd -hooks-httpd option + # + # the default of is suitable for using tusd for Galaxy uploads and should not be changed unless you are using tusd for + # other purposes such as Pulsar staging. + # + # The value of galaxy_infrastructure_url is automatically prepended if the option starts with a `/` + # hooks_http: /api/upload/hooks + # Comma-separated string of enabled tusd hooks. # # Leave at the default value to require authorization at upload creation time. @@ -333,15 +341,9 @@ gravity: # names. # environment: {} - # Configure dynamic handlers in this section. Below is a simple example + # Configure dynamic handlers in this section. # See https://docs.galaxyproject.org/en/latest/admin/scaling.html#dynamically-defined-handlers for details. - #handlers: - # handler: - # processes: 3 - # pools: - # - job-handlers - # - workflow-schedulers - + # handlers: {} galaxy: # The directory that will be prepended to relative paths in options @@ -1030,7 +1032,7 @@ galaxy: # This option has no effect if the file specified by # object_store_config_file exists. Otherwise, if this option is set, # it overrides any other objectstore settings. - # The syntax, available instrumenters, and documentation of their + # The syntax, available storage plugins, and documentation of their # options is explained in detail in the object store sample # configuration file, `object_store_conf.sample.yml` #object_store_config: null @@ -1067,6 +1069,13 @@ galaxy: # for that object store entry. #object_store_cache_size: -1 + # Set this to true to indicate in the UI that a user's object store + # selection isn't simply a "preference" that job destinations often + # respect but in fact will always be respected. This should be set to + # true to simplify the UI as long as job destinations never override + # 'object_store_id's for a jobs. + #object_store_always_respect_user_selection: false + # What Dataset attribute is used to reference files in an ObjectStore # implementation, this can be 'uuid' or 'id'. The default will depend # on how the object store is configured, starting with 20.05 Galaxy @@ -1304,9 +1313,6 @@ galaxy: # The value of this option will be resolved with respect to # . #interactivetools_map: interactivetools_map.sqlite - # Note: the following config should still be used due to lack of - # support of data_dir resolution in gx-it-proxy and gravity: - #interactivetools_map: database/interactivetools_map.sqlite # Prefix to use in the formation of the subdomain or path for # interactive tools @@ -1547,10 +1553,15 @@ galaxy: #nginx_upload_job_files_path: null # The upload store is a temporary directory in which files uploaded by - # the tus middleware or server will be placed. Defaults to - # new_file_path if not set. + # the tus middleware or server for user uploads will be placed. + # Defaults to new_file_path if not set. #tus_upload_store: null + # The upload store is a temporary directory in which files uploaded by + # the tus middleware or server for remote job files (Pulsar) will be + # placed. Defaults to tus_upload_store if not set. + #tus_upload_store_job_files: null + # Galaxy can upload user files in chunks without using nginx. Enable # the chunk uploader by specifying a chunk size larger than 0. The # chunk size is specified in bytes (default: 10MB). @@ -2189,12 +2200,13 @@ galaxy: # . #oidc_backends_config_file: oidc_backends_config.xml - # Sets the prefix for OIDC scopes specific to this Galaxy instance. - # If an API call is made against this Galaxy instance using an OIDC bearer token, - # any scopes must be prefixed with this value e.g. https://galaxyproject.org/api. - # More concretely, to request all permissions that the user has, the scope - # would have to be specified as ":*". e.g "https://galaxyproject.org/api:*". - # Currently, only * is recognised as a valid scope, and future iterations may + # Sets the prefix for OIDC scopes specific to this Galaxy instance. If + # an API call is made against this Galaxy instance using an OIDC + # bearer token, any scopes must be prefixed with this value e.g. + # https://galaxyproject.org/api. More concretely, to request all + # permissions that the user has, the scope would have to be specified + # as ":*". e.g "https://galaxyproject.org/api:*". Currently, + # only * is recognised as a valid scope, and future iterations may # provide more fine-grained scopes. #oidc_scope_prefix: https://galaxyproject.org/api @@ -2897,3 +2909,4 @@ galaxy: # Enable the integration of the Galaxy Help Forum in the tool panel. # This requires the help_forum_api_url to be set. #enable_help_forum_tool_panel_integration: false + From eed755a3f72f5d1cea05a085c16f56ac1f10a8c5 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 5 Apr 2024 16:16:03 +0200 Subject: [PATCH 2/4] Reorder enable_celery_tasks in config schema This is the main flag and it makes sense to set it before the rest of the configuration. --- doc/source/admin/galaxy_options.rst | 24 ++++++++++----------- lib/galaxy/config/sample/galaxy.yml.sample | 10 ++++----- lib/galaxy/config/schemas/config_schema.yml | 17 ++++++++------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index f3021dc25d48..04ad0c5a1d5b 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -5145,6 +5145,18 @@ :Type: str +~~~~~~~~~~~~~~~~~~~~~~~ +``enable_celery_tasks`` +~~~~~~~~~~~~~~~~~~~~~~~ + +:Description: + Offload long-running tasks to a Celery task queue. Activate this + only if you have setup a Celery worker for Galaxy. For details, + see https://docs.galaxyproject.org/en/master/admin/production.html +:Default: ``false`` +:Type: bool + + ~~~~~~~~~~~~~~~ ``celery_conf`` ~~~~~~~~~~~~~~~ @@ -5166,18 +5178,6 @@ :Type: any -~~~~~~~~~~~~~~~~~~~~~~~ -``enable_celery_tasks`` -~~~~~~~~~~~~~~~~~~~~~~~ - -:Description: - Offload long-running tasks to a Celery task queue. Activate this - only if you have setup a Celery worker for Galaxy. For details, - see https://docs.galaxyproject.org/en/master/admin/production.html -:Default: ``false`` -:Type: bool - - ~~~~~~~~~~~~~~~~~~~~~~~~~~ ``celery_user_rate_limit`` ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index 21b325bdb490..532f582920ed 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -2757,6 +2757,11 @@ galaxy: # commented out line below). #amqp_internal_connection: sqlalchemy+sqlite:///./database/control.sqlite?isolation_level=IMMEDIATE + # Offload long-running tasks to a Celery task queue. Activate this + # only if you have setup a Celery worker for Galaxy. For details, see + # https://docs.galaxyproject.org/en/master/admin/production.html + #enable_celery_tasks: false + # Configuration options passed to Celery. # To refer to a task by name, use the template `galaxy.foo` where # `foo` is the function name of the task defined in the @@ -2774,11 +2779,6 @@ galaxy: # galaxy.fetch_data: galaxy.external # galaxy.set_job_metadata: galaxy.external - # Offload long-running tasks to a Celery task queue. Activate this - # only if you have setup a Celery worker for Galaxy. For details, see - # https://docs.galaxyproject.org/en/master/admin/production.html - #enable_celery_tasks: false - # If set to a non-0 value, upper limit on number of tasks that can be # executed per user per second. #celery_user_rate_limit: 0.0 diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index ea25af4fc4c4..04e0e41c6f5b 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -3755,6 +3755,15 @@ mapping: will automatically create and use a separate sqlite database located in your /database folder (indicated in the commented out line below). + enable_celery_tasks: + type: bool + default: false + required: false + desc: | + Offload long-running tasks to a Celery task queue. + Activate this only if you have setup a Celery worker for Galaxy. + For details, see https://docs.galaxyproject.org/en/master/admin/production.html + celery_conf: type: any required: false @@ -3776,14 +3785,6 @@ mapping: For details, see Celery documentation at https://docs.celeryq.dev/en/stable/userguide/configuration.html. - enable_celery_tasks: - type: bool - default: false - required: false - desc: | - Offload long-running tasks to a Celery task queue. - Activate this only if you have setup a Celery worker for Galaxy. - For details, see https://docs.galaxyproject.org/en/master/admin/production.html celery_user_rate_limit: type: float From 9fb43e58b2802edaefefe33033f206f52e6848ee Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 5 Apr 2024 17:08:28 +0200 Subject: [PATCH 3/4] Update production docs to mention Celery --- doc/source/admin/production.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/source/admin/production.md b/doc/source/admin/production.md index e5aa2a814129..d7237abfb834 100644 --- a/doc/source/admin/production.md +++ b/doc/source/admin/production.md @@ -179,3 +179,20 @@ Finally, if you are using Galaxy <= release_2014.06.02, we recommend that you in ### Make the proxy handle uploads and downloads By default, Galaxy receives file uploads as a stream from the proxy server and then writes this file to disk. Likewise, it sends files as a stream to the proxy server. This occupies the GIL in that Galaxy process and will decrease responsiveness for other operations in that process. To solve this problem, you can configure your proxy server to serve downloads directly, involving Galaxy only for the task of authorizing that the user has permission to read the dataset. If using nginx as the proxy, you can configure it to receive uploaded files and write them to disk itself, only notifying Galaxy of the upload once it's completed. All the details on how to configure these can be found on the [Apache](apache.md) and [nginx](nginx.md) proxy instruction pages. + +### Use Celery for asynchronous tasks + +Galaxy can use [Celery](https://docs.celeryq.dev/en/stable/index.html) to handle asynchronous tasks. This is useful for offloading tasks that are usually time-consuming and that would otherwise block the Galaxy process. Some use cases include: + +- Setting metadata on datasets +- Purging datasets +- Exporting histories or other data +- Running periodic tasks + +The list of tasks that are currently handled by `Celery` can be found in `lib/galaxy/celery/tasks.py`. + +To enable Celery in your instance you need to follow some additional steps: + +- Set `enable_celery_tasks: true` in the Galaxy config. +- Configure the `backend` under `celery_conf` to store the results of the tasks. For example, you can use [`redis` as the backend](https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#broker-redis). If you are using `redis`, make sure to install the `redis` dependency in your Galaxy environment with `pip install redis`. You can find more information on how to configure other backends in the [Celery documentation](https://docs.celeryq.dev/en/stable/userguide/tasks.html#task-result-backends). +- Configure one or more workers to handle the tasks. You can find more information on how to configure workers in the [Celery documentation](https://docs.celeryq.dev/en/stable/userguide/workers.html). If you are using [Gravity](https://github.com/galaxyproject/gravity) it will simplify the process of setting up Celery workers. From 2f4bc1a60091f0e91253c7d4da29d87b79968db4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Fri, 5 Apr 2024 17:09:28 +0200 Subject: [PATCH 4/4] Update config docs to mention the celery result_backend --- doc/source/admin/galaxy_options.rst | 9 ++++++--- lib/galaxy/config/sample/galaxy.yml.sample | 8 ++++++-- lib/galaxy/config/schemas/config_schema.yml | 8 ++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 04ad0c5a1d5b..441d5d2d85ad 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -5151,8 +5151,11 @@ :Description: Offload long-running tasks to a Celery task queue. Activate this - only if you have setup a Celery worker for Galaxy. For details, - see https://docs.galaxyproject.org/en/master/admin/production.html + only if you have setup a Celery worker for Galaxy and you have + configured the `celery_conf` option below. Specifically, you need + to set the `result_backend` option in the `celery_conf` option to + a valid Celery result backend URL. For details, see + https://docs.galaxyproject.org/en/master/admin/production.html#use-celery-for-asynchronous-tasks :Default: ``false`` :Type: bool @@ -5174,7 +5177,7 @@ disabled on a per-task basis at this time.) For details, see Celery documentation at https://docs.celeryq.dev/en/stable/userguide/configuration.html. -:Default: ``{'task_routes': {'galaxy.fetch_data': 'galaxy.external', 'galaxy.set_job_metadata': 'galaxy.external'}}`` +:Default: ``{'result_backend': 'redis://127.0.0.1:6379/0', 'task_routes': {'galaxy.fetch_data': 'galaxy.external', 'galaxy.set_job_metadata': 'galaxy.external'}}`` :Type: any diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index 532f582920ed..fffe8446af47 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -2758,8 +2758,11 @@ galaxy: #amqp_internal_connection: sqlalchemy+sqlite:///./database/control.sqlite?isolation_level=IMMEDIATE # Offload long-running tasks to a Celery task queue. Activate this - # only if you have setup a Celery worker for Galaxy. For details, see - # https://docs.galaxyproject.org/en/master/admin/production.html + # only if you have setup a Celery worker for Galaxy and you have + # configured the `celery_conf` option below. Specifically, you need to + # set the `result_backend` option in the `celery_conf` option to a + # valid Celery result backend URL. For details, see + # https://docs.galaxyproject.org/en/master/admin/production.html#use-celery-for-asynchronous-tasks #enable_celery_tasks: false # Configuration options passed to Celery. @@ -2775,6 +2778,7 @@ galaxy: # For details, see Celery documentation at # https://docs.celeryq.dev/en/stable/userguide/configuration.html. #celery_conf: + # result_backend: redis://127.0.0.1:6379/0 # task_routes: # galaxy.fetch_data: galaxy.external # galaxy.set_job_metadata: galaxy.external diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index 04e0e41c6f5b..8178826bbbf7 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -3761,13 +3761,17 @@ mapping: required: false desc: | Offload long-running tasks to a Celery task queue. - Activate this only if you have setup a Celery worker for Galaxy. - For details, see https://docs.galaxyproject.org/en/master/admin/production.html + Activate this only if you have setup a Celery worker for Galaxy and you have + configured the `celery_conf` option below. Specifically, you need to set the + `result_backend` option in the `celery_conf` option to a valid Celery result + backend URL. + For details, see https://docs.galaxyproject.org/en/master/admin/production.html#use-celery-for-asynchronous-tasks celery_conf: type: any required: false default: + result_backend: redis://127.0.0.1:6379/0 task_routes: 'galaxy.fetch_data': 'galaxy.external' 'galaxy.set_job_metadata': 'galaxy.external'