From e2530b46a99f1dca716a1e0c60357fceaa2ff992 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 7 Feb 2023 16:50:16 -0500 Subject: [PATCH 01/21] private objectstores & dataset.sharable Setup abstractions to prevent sharing transient or private per-user objects in an objectstore. --- .../DatasetStorage/DatasetStorage.test.js | 8 +- .../Dataset/DatasetStorage/DatasetStorage.vue | 19 ++- .../ObjectStoreRestrictionSpan.test.js | 27 ++++ .../ObjectStoreRestrictionSpan.vue | 38 ++++++ .../LibraryFolder/TopToolbar/library-model.js | 2 +- lib/galaxy/job_execution/output_collect.py | 2 +- lib/galaxy/jobs/__init__.py | 4 +- lib/galaxy/jobs/runners/__init__.py | 10 +- lib/galaxy/model/__init__.py | 47 ++++++- lib/galaxy/model/security.py | 54 ++++++-- lib/galaxy/objectstore/__init__.py | 103 ++++++++++++-- lib/galaxy/objectstore/azure_blob.py | 1 + lib/galaxy/objectstore/cloud.py | 1 + lib/galaxy/objectstore/irods.py | 2 + lib/galaxy/objectstore/pithos.py | 2 + lib/galaxy/objectstore/s3.py | 2 + lib/galaxy/tools/actions/upload_common.py | 4 +- .../webapps/galaxy/api/history_contents.py | 8 ++ .../webapps/galaxy/controllers/dataset.py | 7 +- .../webapps/galaxy/services/datasets.py | 4 + .../galaxy/services/history_contents.py | 15 ++ .../api/test_dataset_collections.py | 2 +- lib/galaxy_test/api/test_libraries.py | 1 + lib/galaxy_test/base/populators.py | 33 ++++- .../objectstore/test_private_handling.py | 51 +++++++ test/unit/data/test_galaxy_mapping.py | 129 ++++++++++++++++-- test/unit/objectstore/test_objectstore.py | 57 +++++++- 27 files changed, 573 insertions(+), 60 deletions(-) create mode 100644 client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js create mode 100644 client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue create mode 100644 test/integration/objectstore/test_private_handling.py diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js index 821126543925..7a35517177a7 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js @@ -10,14 +10,17 @@ const localVue = getLocalVue(); const TEST_STORAGE_API_RESPONSE_WITHOUT_ID = { object_store_id: null, + private: false, }; const TEST_STORAGE_API_RESPONSE_WITH_ID = { object_store_id: "foobar", + private: false, }; const TEST_STORAGE_API_RESPONSE_WITH_NAME = { object_store_id: "foobar", name: "my cool storage", description: "My cool **markdown**", + private: true, }; const TEST_DATASET_ID = "1"; const TEST_STORAGE_URL = `/api/datasets/${TEST_DATASET_ID}/storage`; @@ -46,9 +49,6 @@ describe("Dataset Storage", () => { wrapper = shallowMount(DatasetStorage, { propsData: { datasetId: TEST_DATASET_ID }, localVue, - stubs: { - "loading-span": true, - }, }); } @@ -102,6 +102,7 @@ describe("Dataset Storage", () => { expect(byIdSpan.length).toBe(1); const byNameSpan = wrapper.findAll(".display-os-by-name"); expect(byNameSpan.length).toBe(0); + expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeFalsy(); }); it("test dataset storage with object store name", async () => { @@ -116,6 +117,7 @@ describe("Dataset Storage", () => { expect(byIdSpan.length).toBe(0); const byNameSpan = wrapper.findAll(".display-os-by-name"); expect(byNameSpan.length).toBe(1); + expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeTruthy(); }); afterEach(() => { diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue index 8edb5fc3967c..fdfa2c1dce69 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue @@ -18,13 +18,17 @@

This dataset is stored in - - a Galaxy object store named {{ storageInfo.name }} + + a Galaxy object store named + {{ storageInfo.name }} - - a Galaxy object store with id {{ storageInfo.object_store_id }} + + a Galaxy object store with id + {{ storageInfo.object_store_id }} - the default configured Galaxy object store . + + the default configured Galaxy object store .

@@ -37,10 +41,12 @@ import { getAppRoot } from "onload/loadConfig"; import LoadingSpan from "components/LoadingSpan"; import MarkdownIt from "markdown-it"; import { errorMessageAsString } from "utils/simple-error"; +import ObjectStoreRestrictionSpan from "./ObjectStoreRestrictionSpan"; export default { components: { LoadingSpan, + ObjectStoreRestrictionSpan, }, props: { datasetId: { @@ -80,6 +86,9 @@ export default { } return rootSources[0].source_uri; }, + isPrivate() { + return this.storageInfo.private; + }, }, created() { const datasetId = this.datasetId; diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js new file mode 100644 index 000000000000..a022b92aa5c4 --- /dev/null +++ b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js @@ -0,0 +1,27 @@ +import { shallowMount } from "@vue/test-utils"; +import { getLocalVue } from "tests/jest/helpers"; +import ObjectStoreRestrictionSpan from "./ObjectStoreRestrictionSpan"; + +const localVue = getLocalVue(); + +describe("ObjectStoreRestrictionSpan", () => { + let wrapper; + + it("should render info about private storage if isPrivate", () => { + wrapper = shallowMount(ObjectStoreRestrictionSpan, { + propsData: { isPrivate: true }, + localVue, + }); + expect(wrapper.find(".stored-how").text()).toBe("private"); + expect(wrapper.find(".stored-how").attributes("title")).toBeTruthy(); + }); + + it("should render info about unrestricted storage if not isPrivate", () => { + wrapper = shallowMount(ObjectStoreRestrictionSpan, { + propsData: { isPrivate: false }, + localVue, + }); + expect(wrapper.find(".stored-how").text()).toBe("unrestricted"); + expect(wrapper.find(".stored-how").attributes("title")).toBeTruthy(); + }); +}); diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue new file mode 100644 index 000000000000..29d313a72142 --- /dev/null +++ b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue @@ -0,0 +1,38 @@ + + + + + diff --git a/client/src/components/Libraries/LibraryFolder/TopToolbar/library-model.js b/client/src/components/Libraries/LibraryFolder/TopToolbar/library-model.js index 3db3e4a2e6ef..c16ff708ab3f 100644 --- a/client/src/components/Libraries/LibraryFolder/TopToolbar/library-model.js +++ b/client/src/components/Libraries/LibraryFolder/TopToolbar/library-model.js @@ -172,7 +172,7 @@ var HistoryContents = Backbone.Collection.extend({ this.id = options.id; }, url: function () { - return `${this.urlRoot + this.id}/contents`; + return `${this.urlRoot + this.id}/contents?shareable=true`; }, model: HistoryItem, }); diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index 818c2a03d89c..22399d45073f 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -338,7 +338,7 @@ def add_library_dataset_to_folder(self, library_folder, ld): trans.app.security_agent.copy_library_permissions(trans, ld, ldda) # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset trans.app.security_agent.set_all_dataset_permissions( - ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user) + ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), flush=False, new=True ) library_folder.add_library_dataset(ld, genome_build=ldda.dbkey) trans.sa_session.add(library_folder) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 722af5fc6bb5..216b06a1b36c 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1599,6 +1599,8 @@ def _set_object_store_ids(self, job): object_store_populator = ObjectStorePopulator(self.app, job.user) object_store_id = self.get_destination_configuration("object_store_id", None) + require_shareable = job.requires_shareable_storage(self.app.security_agent) + if object_store_id: object_store_populator.object_store_id = object_store_id @@ -1610,7 +1612,7 @@ def _set_object_store_ids(self, job): # afterward. State below needs to happen the same way. for dataset_assoc in job.output_datasets + job.output_library_datasets: dataset = dataset_assoc.dataset - object_store_populator.set_object_store_id(dataset) + object_store_populator.set_object_store_id(dataset, require_shareable=require_shareable) job.object_store_id = object_store_populator.object_store_id self._setup_working_directory(job=job) diff --git a/lib/galaxy/jobs/runners/__init__.py b/lib/galaxy/jobs/runners/__init__.py index 2ec20b6ccf4f..20f40a159157 100644 --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -171,7 +171,15 @@ def run_next(self): def put(self, job_wrapper: "MinimalJobWrapper"): """Add a job to the queue (by job identifier), indicate that the job is ready to run.""" put_timer = ExecutionTimer() - queue_job = job_wrapper.enqueue() + try: + queue_job = job_wrapper.enqueue() + except Exception as e: + queue_job = False + # Required for exceptions thrown by object store incompatiblity. + # tested by test/integration/objectstore/test_private_handling.py + job_wrapper.fail(str(e), exception=e) + log.debug(f"Job [{job_wrapper.job_id}] failed to queue {put_timer}") + return if queue_job: self.mark_as_queued(job_wrapper) log.debug(f"Job [{job_wrapper.job_id}] queued {put_timer}") diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 616ddd755589..f3020498d5e2 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -175,6 +175,7 @@ # Tags that get automatically propagated from inputs to outputs when running jobs. AUTO_PROPAGATED_TAGS = ["name"] YIELD_PER_ROWS = 100 +CANNOT_SHARE_PRIVATE_DATASET_MESSAGE = "Attempting to share a non-shareable dataset." if TYPE_CHECKING: @@ -1483,6 +1484,19 @@ def remap_objects(p, k, obj): job_attrs["params"] = params_dict return job_attrs + def requires_shareable_storage(self, security_agent): + # An easy optimization would be to calculate this in galaxy.tools.actions when the + # job is created and all the output permissions are already known. Having to reload + # these permissions in the job code shouldn't strictly be needed. + + requires_sharing = False + for dataset_assoc in self.output_datasets + self.output_library_datasets: + if not security_agent.dataset_is_private_to_a_user(dataset_assoc.dataset.dataset): + requires_sharing = True + break + + return requires_sharing + def to_dict(self, view="collection", system_details=False): if view == "admin_job_list": rval = super().to_dict(view="collection") @@ -3032,7 +3046,14 @@ def __filter_contents(self, content_class, **kwds): visible = galaxy.util.string_as_bool_or_none(kwds.get("visible", None)) if visible is not None: query = query.filter(content_class.visible == visible) + if "object_store_ids" in kwds: + if content_class == HistoryDatasetAssociation: + query = query.join(content_class.dataset).filter( + Dataset.table.c.object_store_id.in_(kwds.get("object_store_ids")) + ) + # else ignoring object_store_ids on HDCAs... if "ids" in kwds: + assert "object_store_ids" not in kwds ids = kwds["ids"] max_in_filter_length = kwds.get("max_in_filter_length", MAX_IN_FILTER_LENGTH) if len(ids) < max_in_filter_length: @@ -3538,14 +3559,27 @@ def is_new(self): def in_ready_state(self): return self.state in self.ready_states + @property + def shareable(self): + """Return True if placed into an objectstore not labeled as ``private``.""" + if self.external_filename: + return True + else: + object_store = self._assert_object_store_set() + return not object_store.is_private(self) + + def ensure_shareable(self): + if not self.shareable: + raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) + def get_file_name(self): if self.purged: log.warning(f"Attempt to get file name of purged dataset {self.id}") return "" if not self.external_filename: - assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" - if self.object_store.exists(self): - file_name = self.object_store.get_filename(self) + object_store = self._assert_object_store_set() + if object_store.exists(self): + file_name = object_store.get_filename(self) else: file_name = "" if not file_name and self.state not in (self.states.NEW, self.states.QUEUED): @@ -3566,6 +3600,10 @@ def set_file_name(self, filename): file_name = property(get_file_name, set_file_name) + def _assert_object_store_set(self): + assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" + return self.object_store + def get_extra_files_path(self): # Unlike get_file_name - external_extra_files_path is not backed by an # actual database column so if SA instantiates this object - the @@ -4601,6 +4639,9 @@ def to_library_dataset_dataset_association( """ Copy this HDA to a library optionally replacing an existing LDDA. """ + if not self.dataset.shareable: + raise Exception("Attempting to share a non-shareable dataset.") + if replace_dataset: # The replace_dataset param ( when not None ) refers to a LibraryDataset that # is being replaced with a new version. diff --git a/lib/galaxy/model/security.py b/lib/galaxy/model/security.py index 881e3276bf47..7c698feac779 100644 --- a/lib/galaxy/model/security.py +++ b/lib/galaxy/model/security.py @@ -899,16 +899,23 @@ def set_all_dataset_permissions(self, dataset, permissions=None, new=False, flus # Make sure that DATASET_MANAGE_PERMISSIONS is associated with at least 1 role has_dataset_manage_permissions = False permissions = permissions or {} - for action, roles in permissions.items(): - if isinstance(action, Action): - if action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS and roles: - has_dataset_manage_permissions = True - break - elif action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS.action and roles: - has_dataset_manage_permissions = True - break + for _ in _walk_action_roles(permissions, self.permitted_actions.DATASET_MANAGE_PERMISSIONS): + has_dataset_manage_permissions = True + break if not has_dataset_manage_permissions: return "At least 1 role must be associated with manage permissions on this dataset." + + # If this is new, the objectstore likely hasn't been set yet - defer check until + # the job handler assigns it. + if not new and not dataset.shareable: + # ensure dataset not shared. + dataset_access_roles = [] + for _, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_ACCESS): + dataset_access_roles.extend(roles) + + if len(dataset_access_roles) != 1 or dataset_access_roles[0].type != self.model.Role.types.PRIVATE: + return galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE + flush_needed = False # Delete all of the current permissions on the dataset if not new: @@ -937,6 +944,12 @@ def set_dataset_permission(self, dataset, permission=None): Permission looks like: { Action.action : [ Role, Role ] } """ permission = permission or {} + + # if modifying access - ensure it is shareable. + for _ in _walk_action_roles(permission, self.permitted_actions.DATASET_ACCESS): + dataset.ensure_shareable() + break + flush_needed = False for action, roles in permission.items(): if isinstance(action, Action): @@ -976,6 +989,7 @@ def copy_dataset_permissions(self, src, dst): self.set_all_dataset_permissions(dst, self.get_permissions(src)) def privately_share_dataset(self, dataset, users=None): + dataset.ensure_shareable() intersect = None users = users or [] for user in users: @@ -1154,6 +1168,19 @@ def dataset_is_private_to_user(self, trans, dataset): else: return False + def dataset_is_private_to_a_user(self, dataset): + """ + If the Dataset object has exactly one access role and that is + the current user's private role then we consider the dataset private. + """ + access_roles = dataset.get_access_roles(self) + + if len(access_roles) != 1: + return False + else: + access_role = access_roles[0] + return access_role.type == self.model.Role.types.PRIVATE + def datasets_are_public(self, trans, datasets): """ Given a transaction object and a list of Datasets, return @@ -1188,6 +1215,8 @@ def datasets_are_public(self, trans, datasets): def make_dataset_public(self, dataset): # A dataset is considered public if there are no "access" actions associated with it. Any # other actions ( 'manage permissions', 'edit metadata' ) are irrelevant. + dataset.ensure_shareable() + flush_needed = False for dp in dataset.actions: if dp.action == self.permitted_actions.DATASET_ACCESS.action: @@ -1635,3 +1664,12 @@ def set_dataset_permissions(self, hda, user, site): hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization(hda=hda, user=user, site=site) self.sa_session.add(hdadaa) self.sa_session.flush() + + +def _walk_action_roles(permissions, query_action): + for action, roles in permissions.items(): + if isinstance(action, Action): + if action == query_action and roles: + yield action, roles + elif action == query_action.action and roles: + yield action, roles diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 17708ecc7db2..a4628b926ac2 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -43,6 +43,7 @@ NO_SESSION_ERROR_MESSAGE = ( "Attempted to 'create' object store entity in configuration with no database session present." ) +DEFAULT_PRIVATE = False log = logging.getLogger(__name__) @@ -105,6 +106,9 @@ def create( This method will create a proper directory structure for the file if the directory does not already exist. + + The method returns the concrete objectstore the supplied object is stored + in. """ raise NotImplementedError() @@ -244,6 +248,19 @@ def get_concrete_store_description_markdown(self, obj): yet been set, this may return ``None``. """ + @abc.abstractmethod + def is_private(self, obj): + """Return True iff supplied object is stored in private ConcreteObjectStore.""" + + def object_store_ids(self, private=None): + """Return IDs of all concrete object stores - either private ones or non-private ones. + + This should just return an empty list for non-DistributedObjectStore object stores, + i.e. concrete objectstores and the HierarchicalObjectStore since these do not + use the object_store_id column for objects (Galaxy Datasets). + """ + return [] + @abc.abstractmethod def get_store_usage_percent(self): """Return the percentage indicating how full the store is.""" @@ -329,10 +346,11 @@ def to_dict(self): extra_dirs = [] for extra_dir_type, extra_dir_path in self.extra_dirs.items(): extra_dirs.append({"type": extra_dir_type, "path": extra_dir_path}) + store_type = self.store_type return { "config": config_to_dict(self.config), "extra_dirs": extra_dirs, - "type": self.store_type, + "type": store_type, } def _get_object_id(self, obj): @@ -389,6 +407,16 @@ def get_store_usage_percent(self): def get_store_by(self, obj, **kwargs): return self._invoke("get_store_by", obj, **kwargs) + def is_private(self, obj): + return self._invoke("is_private", obj) + + @classmethod + def parse_private_from_config_xml(clazz, config_xml): + private = DEFAULT_PRIVATE + if config_xml is not None: + private = asbool(config_xml.attrib.get("private", DEFAULT_PRIVATE)) + return private + class ConcreteObjectStore(BaseObjectStore): """Subclass of ObjectStore for stores that don't delegate (non-nested). @@ -416,9 +444,12 @@ def __init__(self, config, config_dict=None, **kwargs): self.store_by = config_dict.get("store_by", None) or getattr(config, "object_store_store_by", "id") self.name = config_dict.get("name", None) self.description = config_dict.get("description", None) + # Annotate this as true to prevent sharing of data. + self.private = config_dict.get("private", DEFAULT_PRIVATE) def to_dict(self): rval = super().to_dict() + rval["private"] = self.private rval["store_by"] = self.store_by rval["name"] = self.name rval["description"] = self.description @@ -433,6 +464,9 @@ def _get_concrete_store_description_markdown(self, obj): def _get_store_by(self, obj): return self.store_by + def _is_private(self, obj): + return self.private + class DiskObjectStore(ConcreteObjectStore): """ @@ -445,7 +479,7 @@ class DiskObjectStore(ConcreteObjectStore): >>> file_path=tempfile.mkdtemp() >>> obj = Bunch(id=1) >>> s = DiskObjectStore(Bunch(umask=0o077, jobs_directory=file_path, new_file_path=file_path, object_store_check_old_style=False), dict(files_dir=file_path)) - >>> s.create(obj) + >>> o = s.create(obj) >>> s.exists(obj) True >>> assert s.get_filename(obj) == file_path + '/000/dataset_1.dat' @@ -492,6 +526,7 @@ def parse_xml(clazz, config_xml): extra_dirs.append({"type": e.get("type"), "path": e.get("path")}) config_dict["extra_dirs"] = extra_dirs + config_dict["private"] = BaseObjectStore.parse_private_from_config_xml(config_xml) return config_dict def to_dict(self): @@ -631,6 +666,7 @@ def _create(self, obj, **kwargs): if not dir_only: open(path, "w").close() # Should be rb? umask_fix_perms(path, self.config.umask, 0o666) + return self def _empty(self, obj, **kwargs): """Override `ObjectStore`'s stub by checking file size on disk.""" @@ -767,7 +803,8 @@ def file_ready(self, obj, **kwargs): def _create(self, obj, **kwargs): """Create a backing file in a random backend.""" - random.choice(list(self.backends.values())).create(obj, **kwargs) + objectstore = random.choice(list(self.backends.values())) + return objectstore.create(obj, **kwargs) def _empty(self, obj, **kwargs): """For the first backend that has this `obj`, determine if it is empty.""" @@ -806,6 +843,9 @@ def _get_concrete_store_name(self, obj): def _get_concrete_store_description_markdown(self, obj): return self._call_method("_get_concrete_store_description_markdown", obj, None, False) + def _is_private(self, obj): + return self._call_method("_is_private", obj, ObjectNotFound, True) + def _get_store_by(self, obj): return self._call_method("_get_store_by", obj, None, False) @@ -980,23 +1020,27 @@ def __filesystem_monitor(self, sleeper: Sleeper): def _create(self, obj, **kwargs): """The only method in which obj.object_store_id may be None.""" - if obj.object_store_id is None or not self._exists(obj, **kwargs): - if obj.object_store_id is None or obj.object_store_id not in self.backends: + object_store_id = obj.object_store_id + if object_store_id is None or not self._exists(obj, **kwargs): + if object_store_id is None or object_store_id not in self.backends: try: - obj.object_store_id = random.choice(self.weighted_backend_ids) + object_store_id = random.choice(self.weighted_backend_ids) + obj.object_store_id = object_store_id except IndexError: raise ObjectInvalid( f"objectstore.create, could not generate obj.object_store_id: {obj}, kwargs: {kwargs}" ) log.debug( - "Selected backend '%s' for creation of %s %s", obj.object_store_id, obj.__class__.__name__, obj.id + "Selected backend '%s' for creation of %s %s", object_store_id, obj.__class__.__name__, obj.id ) else: log.debug( "Using preferred backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id) + % (object_store_id, obj.__class__.__name__, obj.id) ) - self.backends[obj.object_store_id].create(obj, **kwargs) + return self.backends[object_store_id].create(obj, **kwargs) + else: + return self.backends[object_store_id] def _call_method(self, method, obj, default, default_is_exception, **kwargs): object_store_id = self.__get_store_id_for(obj, **kwargs) @@ -1032,6 +1076,14 @@ def __get_store_id_for(self, obj, **kwargs): return id return None + def object_store_ids(self, private=None): + object_store_ids = [] + for backend_id, backend in self.backends.items(): + object_store_ids.extend(backend.object_store_ids(private=private)) + if backend.private is private or private is None: + object_store_ids.append(backend_id) + return object_store_ids + class HierarchicalObjectStore(NestedObjectStore): @@ -1049,22 +1101,33 @@ def __init__(self, config, config_dict, fsmon=False): super().__init__(config, config_dict) backends: Dict[int, ObjectStore] = {} + is_private = config_dict.get("private", DEFAULT_PRIVATE) for order, backend_def in enumerate(config_dict["backends"]): + backend_is_private = backend_def.get("private") + if backend_is_private is not None: + assert ( + is_private == backend_is_private + ), "The private attribute must be defined on the HierarchicalObjectStore and not contained concrete objectstores." backends[order] = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) self.backends = backends + self.private = is_private @classmethod def parse_xml(clazz, config_xml): backends_list = [] + is_private = BaseObjectStore.parse_private_from_config_xml(config_xml) for backend in sorted(config_xml.find("backends"), key=lambda b: int(b.get("order"))): store_type = backend.get("type") objectstore_class, _ = type_to_object_store_class(store_type) backend_config_dict = objectstore_class.parse_xml(backend) + backend_config_dict["private"] = is_private backend_config_dict["type"] = store_type backends_list.append(backend_config_dict) - return {"backends": backends_list} + config_dict = {"backends": backends_list} + config_dict["private"] = is_private + return config_dict def to_dict(self): as_dict = super().to_dict() @@ -1073,6 +1136,7 @@ def to_dict(self): backend_as_dict = backend.to_dict() backends.append(backend_as_dict) as_dict["backends"] = backends + as_dict["private"] = self.private return as_dict def _exists(self, obj, **kwargs): @@ -1084,7 +1148,13 @@ def _exists(self, obj, **kwargs): def _create(self, obj, **kwargs): """Call the primary object store.""" - self.backends[0].create(obj, **kwargs) + return self.backends[0].create(obj, **kwargs) + + def _is_private(self, obj): + # Unlink the DistributedObjectStore - the HierarchicalObjectStore does not use + # object_store_id - so all the contained object stores need to define is_private + # the same way. + return self.private def type_to_object_store_class(store: str, fsmon: bool = False) -> Tuple[Type[BaseObjectStore], Dict[str, Any]]: @@ -1250,16 +1320,19 @@ def __init__(self, has_object_store, user): self.object_store_id = None self.user = user - def set_object_store_id(self, data): - self.set_dataset_object_store_id(data.dataset) + def set_object_store_id(self, data, require_shareable=False): + self.set_dataset_object_store_id(data.dataset, require_shareable=require_shareable) - def set_dataset_object_store_id(self, dataset): + def set_dataset_object_store_id(self, dataset, require_shareable=True): # Create an empty file immediately. The first dataset will be # created in the "default" store, all others will be created in # the same store as the first. dataset.object_store_id = self.object_store_id try: - self.object_store.create(dataset) + ensure_non_private = require_shareable + concrete_store = self.object_store.create(dataset, ensure_non_private=ensure_non_private) + if concrete_store.private and require_shareable: + raise Exception("Attempted to create shared output datasets in objectstore with sharing disabled") except ObjectInvalid: raise Exception("Unable to create output dataset: object store is full") self.object_store_id = dataset.object_store_id # these will be the same thing after the first output diff --git a/lib/galaxy/objectstore/azure_blob.py b/lib/galaxy/objectstore/azure_blob.py index f90fc565458e..db8a047f553e 100644 --- a/lib/galaxy/objectstore/azure_blob.py +++ b/lib/galaxy/objectstore/azure_blob.py @@ -77,6 +77,7 @@ def parse_config_xml(config_xml): "path": staging_path, }, "extra_dirs": extra_dirs, + "private": ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. diff --git a/lib/galaxy/objectstore/cloud.py b/lib/galaxy/objectstore/cloud.py index 15f7ef987461..e4a9ed5bc14e 100644 --- a/lib/galaxy/objectstore/cloud.py +++ b/lib/galaxy/objectstore/cloud.py @@ -608,6 +608,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else f"dataset_{self._get_object_id(obj)}.dat") open(os.path.join(self.staging_path, rel_path), "w").close() self._push_to_os(rel_path, from_string="") + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index cf0f8a7770d7..3dde6e991a1d 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -111,6 +111,7 @@ def parse_config_xml(config_xml): "path": staging_path, }, "extra_dirs": extra_dirs, + "private": DiskObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -599,6 +600,7 @@ def _create(self, obj, **kwargs): open(os.path.join(self.staging_path, rel_path), "w").close() self._push_to_irods(rel_path, from_string="") log.debug("irods_pt _create: %s", ipt_timer) + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/pithos.py b/lib/galaxy/objectstore/pithos.py index f28baf6c31de..9eaa113bc887 100644 --- a/lib/galaxy/objectstore/pithos.py +++ b/lib/galaxy/objectstore/pithos.py @@ -77,6 +77,7 @@ def parse_config_xml(config_xml): log.error(msg) raise Exception(msg) r["extra_dirs"] = [{k: e.get(k) for k in attrs} for e in extra_dirs] + r["private"] = ConcreteObjectStore.parse_private_from_config_xml(config_xml) if "job_work" not in (d["type"] for d in r["extra_dirs"]): msg = f'No value for {tag}:type="job_work" in XML tree' log.error(msg) @@ -297,6 +298,7 @@ def _create(self, obj, **kwargs): new_file = os.path.join(self.staging_path, rel_path) open(new_file, "w").close() self.pithos.upload_from_string(rel_path, "") + return self def _empty(self, obj, **kwargs): """ diff --git a/lib/galaxy/objectstore/s3.py b/lib/galaxy/objectstore/s3.py index cca72815dc8d..52432b3bf6bf 100644 --- a/lib/galaxy/objectstore/s3.py +++ b/lib/galaxy/objectstore/s3.py @@ -105,6 +105,7 @@ def parse_config_xml(config_xml): "path": staging_path, }, "extra_dirs": extra_dirs, + "private": ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -624,6 +625,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else f"dataset_{self._get_object_id(obj)}.dat") open(os.path.join(self.staging_path, rel_path), "w").close() self._push_to_os(rel_path, from_string="") + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py index 412e83d0b548..cee7aef04fc2 100644 --- a/lib/galaxy/tools/actions/upload_common.py +++ b/lib/galaxy/tools/actions/upload_common.py @@ -140,7 +140,7 @@ def __new_history_upload(trans, uploaded_dataset, history=None, state=None): trans.sa_session.flush() history.add_dataset(hda, genome_build=uploaded_dataset.dbkey, quota=False) permissions = trans.app.security_agent.history_get_default_permissions(history) - trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions) + trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions, new=True, flush=False) trans.sa_session.flush() return hda @@ -211,7 +211,7 @@ def __new_library_upload(trans, cntrller, uploaded_dataset, library_bunch, tag_h else: # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset trans.app.security_agent.set_all_dataset_permissions( - ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user) + ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), new=True ) folder.add_library_dataset(ld, genome_build=uploaded_dataset.dbkey) trans.sa_session.add(folder) diff --git a/lib/galaxy/webapps/galaxy/api/history_contents.py b/lib/galaxy/webapps/galaxy/api/history_contents.py index add89f3c1981..472cb3e47cc7 100644 --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -198,6 +198,11 @@ def get_legacy_index_query_params( description="Whether to return visible or hidden datasets only. Leave unset for both.", deprecated=True, ), + shareable: Optional[bool] = Query( + default=None, + title="Shareable", + description="Whether to return only shareable or not shareable datasets. Leave unset for both.", + ), ) -> LegacyHistoryContentsIndexParams: """This function is meant to be used as a dependency to render the OpenAPI documentation correctly""" @@ -207,6 +212,7 @@ def get_legacy_index_query_params( details=details, deleted=deleted, visible=visible, + shareable=shareable, ) @@ -216,6 +222,7 @@ def parse_legacy_index_query_params( details: Optional[str] = None, deleted: Optional[bool] = None, visible: Optional[bool] = None, + shareable: Optional[bool] = None, **_, # Additional params are ignored ) -> LegacyHistoryContentsIndexParams: """Parses (legacy) query parameters for the history contents `index` operation @@ -242,6 +249,7 @@ def parse_legacy_index_query_params( ids=id_list, deleted=deleted, visible=visible, + shareable=shareable, dataset_details=dataset_details, ) except ValidationError as e: diff --git a/lib/galaxy/webapps/galaxy/controllers/dataset.py b/lib/galaxy/webapps/galaxy/controllers/dataset.py index fe1578907b93..671a731b34fb 100644 --- a/lib/galaxy/webapps/galaxy/controllers/dataset.py +++ b/lib/galaxy/webapps/galaxy/controllers/dataset.py @@ -327,7 +327,12 @@ def get_edit(self, trans, dataset_id=None, **kwd): permission_disable = True permission_inputs = list() if trans.user: - if data.dataset.actions: + if not data.dataset.shareable: + permission_message = "The dataset is stored on private storage to you and cannot be shared." + permission_inputs.append( + {"name": "not_shareable", "type": "hidden", "label": permission_message, "readonly": True} + ) + elif data.dataset.actions: in_roles = {} for action, roles in trans.app.security_agent.get_permissions(data.dataset).items(): in_roles[action.action] = [trans.security.encode_id(role.id) for role in roles] diff --git a/lib/galaxy/webapps/galaxy/services/datasets.py b/lib/galaxy/webapps/galaxy/services/datasets.py index 201820078164..7f7ee2293d70 100644 --- a/lib/galaxy/webapps/galaxy/services/datasets.py +++ b/lib/galaxy/webapps/galaxy/services/datasets.py @@ -116,6 +116,9 @@ class DatasetStorageDetails(Model): ) hashes: List[dict] = Field(description="The file contents hashes associated with the supplied dataset instance.") sources: List[dict] = Field(description="The file sources associated with the supplied dataset instance.") + shareable: bool = Field( + description="Is this dataset shareable.", + ) class DatasetInheritanceChainEntry(Model): @@ -378,6 +381,7 @@ def show_storage( sources = [s.to_dict() for s in dataset.sources] return DatasetStorageDetails( object_store_id=object_store_id, + shareable=dataset.shareable, name=name, description=description, percent_used=percent_used, diff --git a/lib/galaxy/webapps/galaxy/services/history_contents.py b/lib/galaxy/webapps/galaxy/services/history_contents.py index c388bae518cf..8c6952cda05c 100644 --- a/lib/galaxy/webapps/galaxy/services/history_contents.py +++ b/lib/galaxy/webapps/galaxy/services/history_contents.py @@ -61,6 +61,7 @@ User, ) from galaxy.model.security import GalaxyRBACAgent +from galaxy.objectstore import BaseObjectStore from galaxy.schema import ( FilterQueryParams, SerializationParams, @@ -142,6 +143,11 @@ class LegacyHistoryContentsIndexParams(Model): dataset_details: Optional[DatasetDetailsType] deleted: Optional[bool] visible: Optional[bool] + shareable: Optional[bool] = Field( + default=None, + title="Sharable", + description="Whether to return only shareable or not shareable datasets. Leave unset for both.", + ) class HistoryContentsIndexJobsSummaryParams(Model): @@ -252,6 +258,7 @@ class HistoriesContentsService(ServiceBase, ServesExportStores, ConsumesModelSto def __init__( self, security: IdEncodingHelper, + object_store: BaseObjectStore, history_manager: histories.HistoryManager, history_contents_manager: HistoryContentsManager, hda_manager: hdas.HDAManager, @@ -281,6 +288,7 @@ def __init__( self.item_operator = HistoryItemOperator(self.hda_manager, self.hdca_manager, self.dataset_collection_manager) self.short_term_storage_allocator = short_term_storage_allocator self.genomes_manager = genomes_manager + self.object_store = object_store def index( self, @@ -918,6 +926,13 @@ def __index_legacy( ids = legacy_params_dict.get("ids") if ids: legacy_params_dict["ids"] = self.decode_ids(ids) + + object_store_ids = None + shareable = legacy_params.shareable + if shareable is not None: + object_store_ids = self.object_store.object_store_ids(private=not shareable) + if object_store_ids: + legacy_params_dict["object_store_ids"] = object_store_ids contents = history.contents_iter(**legacy_params_dict) items = [ self._serialize_legacy_content_item(trans, content, legacy_params_dict.get("dataset_details")) diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index db93e5d2702a..168713509f57 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -185,7 +185,7 @@ def test_list_list_list_download(self): @requires_new_user def test_hda_security(self): with self.dataset_populator.test_history(require_new=False) as history_id: - element_identifiers = self.dataset_collection_populator.pair_identifiers(history_id) + element_identifiers = self.dataset_collection_populator.pair_identifiers(history_id, wait=True) self.dataset_populator.make_private(history_id, element_identifiers[0]["id"]) with self._different_user(): history_id = self.dataset_populator.new_history() diff --git a/lib/galaxy_test/api/test_libraries.py b/lib/galaxy_test/api/test_libraries.py index ffc680204c38..9fb218ee338c 100644 --- a/lib/galaxy_test/api/test_libraries.py +++ b/lib/galaxy_test/api/test_libraries.py @@ -599,4 +599,5 @@ def _create_dataset_in_folder_in_library(self, library_name, content="1 2 3", wa hda_id = self.dataset_populator.new_dataset(history_id, content=content, wait=wait)["id"] payload = {"from_hda_id": hda_id, "create_type": "file", "folder_id": folder_id} ld = self._post(f"libraries/{folder_id}/contents", payload) + ld.raise_for_status() return ld diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index a73be6f9085f..07ff1bb09139 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -409,7 +409,10 @@ def new_dataset_request( run_response = self.tools_post(payload) else: payload = self.fetch_payload(history_id, content=content, **kwds) - run_response = self.fetch(payload, wait=wait) + fetch_kwds = dict(wait=wait) + if "assert_ok" in kwds: + fetch_kwds["assert_ok"] = kwds["assert_ok"] + run_response = self.fetch(payload, **fetch_kwds) if wait: self.wait_for_tool_run(history_id, run_response, assert_ok=kwds.get("assert_ok", True)) return run_response @@ -1105,10 +1108,24 @@ def make_private(self, history_id: str, dataset_id: str) -> dict: "access": [role_id], "manage": [role_id], } + response = self.update_permissions_raw(history_id, dataset_id, payload) + response.raise_for_status() + return response.json() + + def make_public_raw(self, history_id: str, dataset_id: str) -> Response: + role_id = self.user_private_role_id() + payload = { + "access": json.dumps([]), + "manage": json.dumps([role_id]), + } + response = self.update_permissions_raw(history_id, dataset_id, payload) + return response + + def update_permissions_raw(self, history_id: str, dataset_id: str, payload: dict) -> Response: url = f"histories/{history_id}/contents/{dataset_id}/permissions" update_response = self._put(url, payload, admin=True, json=True) - assert update_response.status_code == 200, update_response.content - return update_response.json() + update_response.raise_for_status() + return update_response def make_public(self, history_id: str) -> dict: using_requirement("new_published_objects") @@ -2697,8 +2714,8 @@ def __create_payload_collection(self, history_id: str, identifiers_func, collect payload = dict(history_id=history_id, collection_type=collection_type, **kwds) return payload - def pair_identifiers(self, history_id: str, contents=None): - hda1, hda2 = self.__datasets(history_id, count=2, contents=contents) + def pair_identifiers(self, history_id: str, contents=None, wait: bool = False): + hda1, hda2 = self.__datasets(history_id, count=2, contents=contents, wait=wait) element_identifiers = [ dict(name="forward", src="hda", id=hda1["id"]), @@ -2734,10 +2751,12 @@ def __create(self, payload, wait=False): else: return self.dataset_populator.fetch(payload, wait=wait) - def __datasets(self, history_id: str, count: int, contents=None): + def __datasets(self, history_id: str, count: int, contents=None, wait: bool = False): datasets = [] for i in range(count): - new_kwds = {} + new_kwds = { + "wait": wait, + } if contents: new_kwds["content"] = contents[i] datasets.append(self.dataset_populator.new_dataset(history_id, **new_kwds)) diff --git a/test/integration/objectstore/test_private_handling.py b/test/integration/objectstore/test_private_handling.py new file mode 100644 index 000000000000..48d6e97c44d1 --- /dev/null +++ b/test/integration/objectstore/test_private_handling.py @@ -0,0 +1,51 @@ +"""Integration tests for mixing store_by.""" + +import string + +from galaxy_test.base import api_asserts +from ._base import BaseObjectStoreIntegrationTestCase + +PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE = string.Template( + """ + + + + + +""" +) + +TEST_INPUT_FILES_CONTENT = "1 2 3" + + +class PrivatePreventsSharingObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = True + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + """Test each object store configures files correctly.""" + with self.dataset_populator.test_history() as history_id: + hda = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True) + content = self.dataset_populator.get_history_dataset_content(history_id, hda["id"]) + assert content.startswith(TEST_INPUT_FILES_CONTENT) + response = self.dataset_populator.make_public_raw(history_id, hda["id"]) + assert response.status_code != 200 + api_asserts.assert_error_message_contains(response, "Attempting to share a non-shareable dataset.") + + +class PrivateCannotWritePublicDataObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = False + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + with self.dataset_populator.test_history() as history_id: + response = self.dataset_populator.new_dataset_request( + history_id, content=TEST_INPUT_FILES_CONTENT, wait=True, assert_ok=False + ) + job = response.json()["jobs"][0] + final_state = self.dataset_populator.wait_for_job(job["id"]) + assert final_state == "error" diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index 8ea27b19b04e..5b6ae2daa821 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -35,6 +35,7 @@ not os.environ.get("GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE"), reason="GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE not set", ) +PRIVATE_OBJECT_STORE_ID = "my_private_data" class BaseModelTestCase(TestCase): @@ -153,8 +154,12 @@ def assert_display_name_converts_to_unicode(item, name): assert history.get_display_name() == "Hello₩◎ґʟⅾ" def test_hda_to_library_dataset_dataset_association(self): - u = model.User(email="mary@example.com", password="password") - hda = model.HistoryDatasetAssociation(name="hda_name") + model = self.model + u = self.model.User(email="mary@example.com", password="password") + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation( + name="hda_name", create_dataset=True, history=h1, sa_session=model.session + ) self.persist(hda) trans = collections.namedtuple("trans", "user") target_folder = model.LibraryFolder(name="library_folder") @@ -180,6 +185,24 @@ def test_hda_to_library_dataset_dataset_association(self): assert new_ldda.library_dataset.expired_datasets[0] == ldda assert target_folder.item_count == 1 + def test_hda_to_library_dataset_dataset_association_fails_if_private(self): + model = self.model + u = model.User(email="mary2@example.com", password="password") + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation( + name="hda_name", create_dataset=True, history=h1, sa_session=model.session + ) + hda.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self.persist(hda) + trans = collections.namedtuple("trans", "user") + target_folder = model.LibraryFolder(name="library_folder") + with pytest.raises(Exception) as exec_info: + hda.to_library_dataset_dataset_association( + trans=trans(user=u), + target_folder=target_folder, + ) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + def test_tags(self): TAG_NAME = "Test Tag" my_tag = model.Tag(name=TAG_NAME) @@ -588,8 +611,8 @@ def test_history_contents(self): self.persist(u, h1, expunge=False) d1 = self.new_hda(h1, name="1") - d2 = self.new_hda(h1, name="2", visible=False) - d3 = self.new_hda(h1, name="3", deleted=True) + d2 = self.new_hda(h1, name="2", visible=False, object_store_id="foobar") + d3 = self.new_hda(h1, name="3", deleted=True, object_store_id="three_store") d4 = self.new_hda(h1, name="4", visible=False, deleted=True) self.session().flush() @@ -603,8 +626,11 @@ def contents_iter_names(**kwds): assert contents_iter_names() == ["1", "2", "3", "4"] assert contents_iter_names(deleted=False) == ["1", "2"] assert contents_iter_names(visible=True) == ["1", "3"] + assert contents_iter_names(visible=True, object_store_ids=["three_store"]) == ["3"] assert contents_iter_names(visible=False) == ["2", "4"] assert contents_iter_names(deleted=True, visible=False) == ["4"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar"]) == ["2"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar2"]) == [] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id]) == ["1", "2", "3", "4"] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id], max_in_filter_length=1) == ["1", "2", "3", "4"] @@ -960,6 +986,77 @@ def test_next_hid(self): h._next_hid(n=3) assert h.hid_counter == 5 + def test_cannot_make_private_objectstore_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_public") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + self._make_owned(security_agent, u_from, d1) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_objectstore_dataset_shared(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_shared") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.privately_share_dataset(d1.dataset, [u_to]) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_set_dataset_permisson_on_private(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_set_permissions_on_private") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + role = security_agent.get_private_user_role(u_to, auto_create=True) + access_action = security_agent.permitted_actions.DATASET_ACCESS.action + + with pytest.raises(Exception) as exec_info: + security_agent.set_dataset_permission(d1.dataset, {access_action: [role]}) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, u_other = self._three_users("cannot_make_private_dataset_public") + + h = self.model.History(name="History for Annotation", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.make_dataset_public(d1.dataset) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + def _three_users(self, suffix): email_from = f"user_{suffix}e1@example.com" email_to = f"user_{suffix}e2@example.com" @@ -976,18 +1073,26 @@ def _make_private(self, security_agent, user, hda): access_action = security_agent.permitted_actions.DATASET_ACCESS.action manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {access_action: [role], manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) def _make_owned(self, security_agent, user, hda): role = security_agent.get_private_user_role(user, auto_create=True) manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) + + def _set_permissions(self, security_agent, dataset, permissions): + # TODO: refactor set_all_dataset_permissions to actually throw an exception :| + error = security_agent.set_all_dataset_permissions(dataset, permissions) + if error: + raise Exception(error) def new_hda(self, history, **kwds): - return history.add_dataset( - model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds) - ) + object_store_id = kwds.pop("object_store_id", None) + hda = self.model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds) + if object_store_id is not None: + hda.dataset.object_store_id = object_store_id + return history.add_dataset(hda) @skip_if_not_postgres_base @@ -1044,3 +1149,9 @@ def get_store_by(self, *args, **kwds): def update_from_file(self, *arg, **kwds): pass + + def is_private(self, object): + if object.object_store_id == PRIVATE_OBJECT_STORE_ID: + return True + else: + return False diff --git a/test/unit/objectstore/test_objectstore.py b/test/unit/objectstore/test_objectstore.py index fa6ed55a1b5f..f880f4f2af41 100644 --- a/test/unit/objectstore/test_objectstore.py +++ b/test/unit/objectstore/test_objectstore.py @@ -307,8 +307,26 @@ def test_concrete_name_without_objectstore_id(): assert files1_name is None +MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG = """ + + + + + + + + + + + + + + +""" + + MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG = """ - + @@ -326,12 +344,45 @@ def test_concrete_name_without_objectstore_id(): def test_mixed_store_by(): + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): + as_dict = object_store.to_dict() + assert as_dict["backends"][0]["store_by"] == "id" + assert as_dict["backends"][1]["store_by"] == "uuid" + with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): as_dict = object_store.to_dict() assert as_dict["backends"][0]["store_by"] == "id" assert as_dict["backends"][1]["store_by"] == "uuid" +def test_mixed_private(): + # Distributed object store can combine private and non-private concrete objectstores + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): + ids = object_store.object_store_ids() + print(ids) + assert len(ids) == 2 + + ids = object_store.object_store_ids(private=True) + assert len(ids) == 1 + assert ids[0] == "files2" + + ids = object_store.object_store_ids(private=False) + assert len(ids) == 1 + assert ids[0] == "files1" + + as_dict = object_store.to_dict() + assert not as_dict["backends"][0]["private"] + assert as_dict["backends"][1]["private"] + + with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): + as_dict = object_store.to_dict() + assert as_dict["backends"][0]["private"] + assert as_dict["backends"][1]["private"] + + assert object_store.private + assert as_dict["private"] is True + + DISTRIBUTED_TEST_CONFIG = """ @@ -486,7 +537,7 @@ def test_config_parse_pithos(): assert len(extra_dirs) == 2 -S3_TEST_CONFIG = """ +S3_TEST_CONFIG = """ @@ -498,6 +549,7 @@ def test_config_parse_pithos(): S3_TEST_CONFIG_YAML = """ type: s3 +private: true auth: access_key: access_moo secret_key: secret_cow @@ -521,6 +573,7 @@ def test_config_parse_pithos(): def test_config_parse_s3(): for config_str in [S3_TEST_CONFIG, S3_TEST_CONFIG_YAML]: with TestConfig(config_str, clazz=UnitializeS3ObjectStore) as (directory, object_store): + assert object_store.private assert object_store.access_key == "access_moo" assert object_store.secret_key == "secret_cow" From 50ffad2b845aae49f9a350d9f8c03085bc9fe326 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sat, 11 Jun 2022 12:20:15 -0400 Subject: [PATCH 02/21] Rename test_selection to be more specific. --- ...lection.py => test_selection_with_resources_parameters.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename test/integration/objectstore/{test_selection.py => test_selection_with_resources_parameters.py} (97%) diff --git a/test/integration/objectstore/test_selection.py b/test/integration/objectstore/test_selection_with_resources_parameters.py similarity index 97% rename from test/integration/objectstore/test_selection.py rename to test/integration/objectstore/test_selection_with_resources_parameters.py index bb008229d8bb..f918cdea474c 100644 --- a/test/integration/objectstore/test_selection.py +++ b/test/integration/objectstore/test_selection_with_resources_parameters.py @@ -1,4 +1,4 @@ -"""Integration tests for object stores.""" +"""Test selecting an object store with resource parameters configured in job configuration.""" import os import string @@ -48,7 +48,7 @@ ) -class TestObjectStoreSelectionIntegration(BaseObjectStoreIntegrationTestCase): +class TestObjectStoreSelectionWithResourceParameterIntegration(BaseObjectStoreIntegrationTestCase): # populated by config_object_store files_default_path: str files_static_path: str From 8e0a01e59d48fd93bd89a2ea91fd6f01ecf4e71a Mon Sep 17 00:00:00 2001 From: John Chilton Date: Fri, 11 Sep 2020 20:20:18 -0400 Subject: [PATCH 03/21] get_quota in SQL --- lib/galaxy/quota/__init__.py | 97 +++++++++++++++++++++--------------- test/unit/data/test_quota.py | 3 +- 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/lib/galaxy/quota/__init__.py b/lib/galaxy/quota/__init__.py index 7c119f6bdc0c..ae074228f0dc 100644 --- a/lib/galaxy/quota/__init__.py +++ b/lib/galaxy/quota/__init__.py @@ -1,6 +1,8 @@ """Galaxy Quotas""" import logging +from sqlalchemy.sql import text + import galaxy.util log = logging.getLogger(__name__) @@ -98,51 +100,64 @@ def get_quota(self, user): quotas. """ if not user: - return self.default_unregistered_quota - quotas = [] - for group in [uga.group for uga in user.groups]: - for quota in [gqa.quota for gqa in group.quotas]: - if quota not in quotas: - quotas.append(quota) - for quota in [uqa.quota for uqa in user.quotas]: - if quota not in quotas: - quotas.append(quota) - use_default = True - max = 0 - adjustment = 0 - rval = 0 - for quota in quotas: - if quota.deleted: - continue - if quota.operation == "=" and quota.bytes == -1: - rval = None - break - elif quota.operation == "=": - use_default = False - if quota.bytes > max: - max = quota.bytes - elif quota.operation == "+": - adjustment += quota.bytes - elif quota.operation == "-": - adjustment -= quota.bytes - if use_default: - max = self.default_registered_quota - if max is None: - rval = None - if rval is not None: - rval = max + adjustment - if rval <= 0: - rval = 0 - return rval + return self._default_unregistered_quota + query = text( + """ +SELECT ( + COALESCE(MAX(CASE WHEN union_quota.operation = '=' + THEN union_quota.bytes + ELSE NULL + END), + (SELECT default_quota.bytes + FROM quota as default_quota + LEFT JOIN default_quota_association on default_quota.id = default_quota_association.quota_id + WHERE default_quota_association.type == 'registered' + AND default_quota.deleted != :is_true)) + + + (CASE WHEN SUM(CASE WHEN union_quota.operation = '=' AND union_quota.bytes = -1 + THEN 1 ELSE 0 + END) > 0 + THEN NULL + ELSE 0 END) + + + (COALESCE(SUM( + CASE WHEN union_quota.operation = '+' THEN union_quota.bytes + WHEN union_quota.operation = '-' THEN -1 * union_quota.bytes + ELSE 0 + END + ), 0)) + ) +FROM ( + SELECT user_quota.operation as operation, user_quota.bytes as bytes + FROM galaxy_user as user + LEFT JOIN user_quota_association as uqa on user.id = uqa.user_id + LEFT JOIN quota as user_quota on user_quota.id = uqa.quota_id + WHERE user_quota.deleted != :is_true + AND user.id = :user_id + UNION ALL + SELECT group_quota.operation as operation, group_quota.bytes as bytes + FROM galaxy_user as user + LEFT JOIN user_group_association as uga on user.id = uga.user_id + LEFT JOIN galaxy_group on galaxy_group.id = uga.group_id + LEFT JOIN group_quota_association as gqa on galaxy_group.id = gqa.group_id + LEFT JOIN quota as group_quota on group_quota.id = gqa.quota_id + WHERE group_quota.deleted != :is_true + AND user.id = :user_id +) as union_quota +""" + ) + conn = self.sa_session.connection() + with conn.begin(): + res = conn.execute(query, is_true=True, user_id=user.id).fetchone() + if res: + return res[0] + else: + return None @property - def default_unregistered_quota(self): + def _default_unregistered_quota(self): return self._default_quota(self.model.DefaultQuotaAssociation.types.UNREGISTERED) - @property - def default_registered_quota(self): - return self._default_quota(self.model.DefaultQuotaAssociation.types.REGISTERED) - def _default_quota(self, default_type): dqa = ( self.sa_session.query(self.model.DefaultQuotaAssociation) diff --git a/test/unit/data/test_quota.py b/test/unit/data/test_quota.py index 002253109dae..65421f8454b1 100644 --- a/test/unit/data/test_quota.py +++ b/test/unit/data/test_quota.py @@ -98,7 +98,8 @@ def _add_user_quota(self, user, quota): self.persist(quota, uqa, user) def _assert_user_quota_is(self, user, amount): - assert amount == self.quota_agent.get_quota(user) + actual_quota = self.quota_agent.get_quota(user) + assert amount == actual_quota, "Expected quota [%s], got [%s]" % (amount, actual_quota) if amount is None: user.total_disk_usage = 1000 job = model.Job() From f7a4992b4b20fa328fa82e6d57be4b3e3cf6bc27 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 7 Feb 2023 17:27:17 -0500 Subject: [PATCH 04/21] Implement quota tracking options per ObjectStore. --- .../DatasetStorage/DatasetStorage.test.js | 67 +--- .../Dataset/DatasetStorage/DatasetStorage.vue | 33 +- .../ObjectStore/DescribeObjectStore.test.js | 80 +++++ .../ObjectStore/DescribeObjectStore.vue | 70 ++++ .../ObjectStoreRestrictionSpan.test.js | 0 .../ObjectStoreRestrictionSpan.vue | 2 +- .../User/DiskUsage/Quota/QuotaUsageBar.vue | 29 +- .../DiskUsage/Quota/QuotaUsageProvider.js | 25 ++ lib/galaxy/jobs/__init__.py | 8 +- lib/galaxy/managers/configuration.py | 3 + lib/galaxy/managers/hdas.py | 5 +- lib/galaxy/managers/quotas.py | 6 +- lib/galaxy/managers/users.py | 36 +- lib/galaxy/model/__init__.py | 338 +++++++++++++++--- .../d0583094c8cd_add_quota_source_labels.py | 50 +++ lib/galaxy/model/migrations/util.py | 22 ++ lib/galaxy/objectstore/__init__.py | 134 ++++++- lib/galaxy/quota/__init__.py | 128 ++++--- lib/galaxy/quota/_schema.py | 10 + lib/galaxy/webapps/base/webapp.py | 2 +- lib/galaxy/webapps/galaxy/api/users.py | 34 +- lib/galaxy/webapps/galaxy/buildapp.py | 6 + .../webapps/galaxy/controllers/admin.py | 22 +- .../webapps/galaxy/controllers/dataset.py | 2 +- .../webapps/galaxy/controllers/history.py | 2 +- .../webapps/galaxy/services/datasets.py | 18 + lib/galaxy_test/base/populators.py | 27 +- scripts/cleanup_datasets/pgcleanup.py | 51 ++- scripts/set_user_disk_usage.py | 10 +- .../objectstore/test_private_handling.py | 9 +- .../objectstore/test_quota_limit.py | 72 ++++ ...est_selection_with_resource_parameters.py} | 31 +- test/integration/test_quota.py | 21 ++ test/unit/data/test_galaxy_mapping.py | 10 +- test/unit/data/test_quota.py | 319 +++++++++++++++-- test/unit/objectstore/test_objectstore.py | 41 +++ 36 files changed, 1434 insertions(+), 289 deletions(-) create mode 100644 client/src/components/ObjectStore/DescribeObjectStore.test.js create mode 100644 client/src/components/ObjectStore/DescribeObjectStore.vue rename client/src/components/{Dataset/DatasetStorage => ObjectStore}/ObjectStoreRestrictionSpan.test.js (100%) rename client/src/components/{Dataset/DatasetStorage => ObjectStore}/ObjectStoreRestrictionSpan.vue (93%) create mode 100644 client/src/components/User/DiskUsage/Quota/QuotaUsageProvider.js create mode 100644 lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py create mode 100644 test/integration/objectstore/test_quota_limit.py rename test/integration/objectstore/{test_selection_with_resources_parameters.py => test_selection_with_resource_parameters.py} (83%) diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js index 7a35517177a7..bfdedc98bda0 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js @@ -4,7 +4,6 @@ import { getLocalVue } from "tests/jest/helpers"; import flushPromises from "flush-promises"; import MockAdapter from "axios-mock-adapter"; import axios from "axios"; -import MarkdownIt from "markdown-it"; const localVue = getLocalVue(); @@ -12,32 +11,11 @@ const TEST_STORAGE_API_RESPONSE_WITHOUT_ID = { object_store_id: null, private: false, }; -const TEST_STORAGE_API_RESPONSE_WITH_ID = { - object_store_id: "foobar", - private: false, -}; -const TEST_STORAGE_API_RESPONSE_WITH_NAME = { - object_store_id: "foobar", - name: "my cool storage", - description: "My cool **markdown**", - private: true, -}; const TEST_DATASET_ID = "1"; const TEST_STORAGE_URL = `/api/datasets/${TEST_DATASET_ID}/storage`; -const TEST_RENDERED_MARKDOWN_AS_HTML = "

My cool markdown\n"; const TEST_ERROR_MESSAGE = "Opps all errors."; -// works fine without mocking but I guess it is more JS unit-y with the mock? -jest.mock("markdown-it"); -MarkdownIt.mockImplementation(() => { - return { - render(markdown) { - return TEST_RENDERED_MARKDOWN_AS_HTML; - }, - }; -}); - -describe("Dataset Storage", () => { +describe("DatasetStorage.vue", () => { let axiosMock; let wrapper; @@ -62,6 +40,7 @@ describe("Dataset Storage", () => { mount(); await wrapper.vm.$nextTick(); expect(wrapper.findAll("loading-span-stub").length).toBe(1); + expect(wrapper.findAll("describe-object-store-stub").length).toBe(0); }); it("test error rendering...", async () => { @@ -78,46 +57,8 @@ describe("Dataset Storage", () => { it("test dataset storage with object store without id", async () => { await mountWithResponse(TEST_STORAGE_API_RESPONSE_WITHOUT_ID); expect(wrapper.findAll("loading-span-stub").length).toBe(0); - expect(wrapper.vm.descriptionRendered).toBeNull(); - const header = wrapper.findAll("h2"); - expect(header.length).toBe(1); - expect(header.at(0).text()).toBe("Dataset Storage"); - const byIdSpan = wrapper.findAll(".display-os-by-id"); - expect(byIdSpan.length).toBe(0); - const byNameSpan = wrapper.findAll(".display-os-by-name"); - expect(byNameSpan.length).toBe(0); - const byDefaultSpan = wrapper.findAll(".display-os-default"); - expect(byDefaultSpan.length).toBe(1); - }); - - it("test dataset storage with object store id", async () => { - await mountWithResponse(TEST_STORAGE_API_RESPONSE_WITH_ID); - expect(wrapper.findAll("loading-span-stub").length).toBe(0); - expect(wrapper.vm.storageInfo.object_store_id).toBe("foobar"); - expect(wrapper.vm.descriptionRendered).toBeNull(); - const header = wrapper.findAll("h2"); - expect(header.length).toBe(1); - expect(header.at(0).text()).toBe("Dataset Storage"); - const byIdSpan = wrapper.findAll(".display-os-by-id"); - expect(byIdSpan.length).toBe(1); - const byNameSpan = wrapper.findAll(".display-os-by-name"); - expect(byNameSpan.length).toBe(0); - expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeFalsy(); - }); - - it("test dataset storage with object store name", async () => { - await mountWithResponse(TEST_STORAGE_API_RESPONSE_WITH_NAME); - expect(wrapper.findAll("loading-span-stub").length).toBe(0); - expect(wrapper.vm.storageInfo.object_store_id).toBe("foobar"); - expect(wrapper.vm.descriptionRendered).toBe(TEST_RENDERED_MARKDOWN_AS_HTML); - const header = wrapper.findAll("h2"); - expect(header.length).toBe(1); - expect(header.at(0).text()).toBe("Dataset Storage"); - const byIdSpan = wrapper.findAll(".display-os-by-id"); - expect(byIdSpan.length).toBe(0); - const byNameSpan = wrapper.findAll(".display-os-by-name"); - expect(byNameSpan.length).toBe(1); - expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeTruthy(); + expect(wrapper.findAll("describe-object-store-stub").length).toBe(1); + expect(wrapper.vm.storageInfo.private).toEqual(false); }); afterEach(() => { diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue index fdfa2c1dce69..6c35e101caa0 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue @@ -16,21 +16,7 @@

-

- This dataset is stored in - - a Galaxy object store named - {{ storageInfo.name }} - - - a Galaxy object store with id - {{ storageInfo.object_store_id }} - - - the default configured Galaxy object store . -

-
+
@@ -38,15 +24,14 @@ diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js b/client/src/components/ObjectStore/ObjectStoreRestrictionSpan.test.js similarity index 100% rename from client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js rename to client/src/components/ObjectStore/ObjectStoreRestrictionSpan.test.js diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue b/client/src/components/ObjectStore/ObjectStoreRestrictionSpan.vue similarity index 93% rename from client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue rename to client/src/components/ObjectStore/ObjectStoreRestrictionSpan.vue index 29d313a72142..5f1bb689db60 100644 --- a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue +++ b/client/src/components/ObjectStore/ObjectStoreRestrictionSpan.vue @@ -1,5 +1,5 @@ diff --git a/client/src/components/User/DiskUsage/Quota/QuotaUsageProvider.js b/client/src/components/User/DiskUsage/Quota/QuotaUsageProvider.js new file mode 100644 index 000000000000..5679fa3b7c17 --- /dev/null +++ b/client/src/components/User/DiskUsage/Quota/QuotaUsageProvider.js @@ -0,0 +1,25 @@ +import axios from "axios"; +import { SingleQueryProvider } from "components/providers/SingleQueryProvider"; +import { getAppRoot } from "onload/loadConfig"; +import { rethrowSimple } from "utils/simple-error"; +import { QuotaUsage } from "./model"; + +/** + * Fetches the disk usage corresponding to one quota source label - + * or the default quota sources if the supplied label is null. + * @returns {} + */ +async function fetchQuotaSourceUsage({ quotaSourceLabel = null }) { + if (quotaSourceLabel == null) { + quotaSourceLabel = "__null__"; + } + const url = `${getAppRoot()}api/users/current/usage/${quotaSourceLabel}`; + try { + const { data } = await axios.get(url); + return new QuotaUsage(data); + } catch (e) { + rethrowSimple(e); + } +} + +export const QuotaSourceUsageProvider = SingleQueryProvider(fetchQuotaSourceUsage); diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 216b06a1b36c..57c973064a4a 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1895,13 +1895,17 @@ def fail(message=job.info, exception=None): # custom post process setup collected_bytes = 0 + quota_source_info = None # Once datasets are collected, set the total dataset size (includes extra files) for dataset_assoc in job.output_datasets: if not dataset_assoc.dataset.dataset.purged: + # assume all datasets in a job get written to the same objectstore + quota_source_info = dataset_assoc.dataset.dataset.quota_source_info collected_bytes += dataset_assoc.dataset.set_total_size() - if job.user: - job.user.adjust_total_disk_usage(collected_bytes) + user = job.user + if user and collected_bytes > 0 and quota_source_info is not None and quota_source_info.use: + user.adjust_total_disk_usage(collected_bytes, quota_source_info.label) # Certain tools require tasks to be completed after job execution # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ). diff --git a/lib/galaxy/managers/configuration.py b/lib/galaxy/managers/configuration.py index a1f8fba0d76f..fbc761f787a1 100644 --- a/lib/galaxy/managers/configuration.py +++ b/lib/galaxy/managers/configuration.py @@ -195,6 +195,9 @@ def _config_is_truthy(item, key, **context): "expose_user_email": _use_config, "enable_tool_source_display": _use_config, "enable_celery_tasks": _use_config, + "quota_source_labels": lambda item, key, **context: list( + self.app.object_store.get_quota_source_map().get_quota_source_labels() + ), "user_library_import_dir_available": lambda item, key, **context: bool(item.get("user_library_import_dir")), "welcome_directory": _use_config, "themes": _use_config, diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 7abbee7f3b7f..224f3c2afffc 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -215,8 +215,9 @@ def _purge(self, hda, flush=True): quota_amount_reduction = hda.quota_amount(user) super().purge(hda, flush=flush) # decrease the user's space used - if quota_amount_reduction: - user.adjust_total_disk_usage(-quota_amount_reduction) + quota_source_info = hda.dataset.quota_source_info + if quota_amount_reduction and quota_source_info.use: + user.adjust_total_disk_usage(-quota_amount_reduction, quota_source_info.label) # .... states def error_if_uploading(self, hda): diff --git a/lib/galaxy/managers/quotas.py b/lib/galaxy/managers/quotas.py index 886f06489bb4..33013474de6a 100644 --- a/lib/galaxy/managers/quotas.py +++ b/lib/galaxy/managers/quotas.py @@ -59,7 +59,11 @@ def create_quota(self, payload: dict, decode_id=None) -> Tuple[model.Quota, str] raise ActionInputError("Operation for an unlimited quota must be '='.") # Create the quota quota = model.Quota( - name=params.name, description=params.description, amount=create_amount, operation=params.operation + name=params.name, + description=params.description, + amount=create_amount, + operation=params.operation, + quota_source_label=params.quota_source_label, ) self.sa_session.add(quota) # If this is a default quota, create the DefaultQuotaAssociation diff --git a/lib/galaxy/managers/users.py b/lib/galaxy/managers/users.py index 0190386099f6..84188d906b1b 100644 --- a/lib/galaxy/managers/users.py +++ b/lib/galaxy/managers/users.py @@ -7,7 +7,12 @@ import re import time from datetime import datetime -from typing import Optional +from typing import ( + Any, + Dict, + List, + Optional, +) from markupsafe import escape from sqlalchemy import ( @@ -381,13 +386,13 @@ def sharing_roles(self, user): def default_permissions(self, user): return self.app.security_agent.user_get_default_permissions(user) - def quota(self, user, total=False): + def quota(self, user, total=False, quota_source_label=None): if total: - return self.app.quota_agent.get_quota_nice_size(user) - return self.app.quota_agent.get_percent(user=user) + return self.app.quota_agent.get_quota_nice_size(user, quota_source_label=quota_source_label) + return self.app.quota_agent.get_percent(user=user, quota_source_label=quota_source_label) - def quota_bytes(self, user): - return self.app.quota_agent.get_quota(user=user) + def quota_bytes(self, user, quota_source_label: Optional[str] = None): + return self.app.quota_agent.get_quota(user=user, quota_source_label=quota_source_label) def tags_used(self, user, tag_models=None): """ @@ -643,6 +648,25 @@ def add_serializers(self): } ) + def serialize_disk_usage(self, user: model.User) -> List[Dict[str, Any]]: + rval = user.dictify_usage(self.app.object_store) + for usage in rval: + quota_source_label = usage["quota_source_label"] + usage["quota_percent"] = self.user_manager.quota(user, quota_source_label=quota_source_label) + usage["quota"] = self.user_manager.quota(user, total=True, quota_source_label=quota_source_label) + usage["quota_bytes"] = self.user_manager.quota_bytes(user, quota_source_label=quota_source_label) + usage["nice_total_disk_usage"] = util.nice_size(usage["total_disk_usage"]) + return rval + + def serialize_disk_usage_for(self, user: model.User, label: Optional[str]) -> Dict[str, Any]: + usage = user.dictify_usage_for(label) + quota_source_label = usage["quota_source_label"] + usage["quota_percent"] = self.user_manager.quota(user, quota_source_label=quota_source_label) + usage["quota"] = self.user_manager.quota(user, total=True, quota_source_label=quota_source_label) + usage["quota_bytes"] = self.user_manager.quota_bytes(user, quota_source_label=quota_source_label) + usage["nice_total_disk_usage"] = util.nice_size(usage["total_disk_usage"]) + return usage + class UserDeserializer(base.ModelDeserializer): """ diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index f3020498d5e2..65f4447f62b2 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -27,6 +27,7 @@ List, NamedTuple, Optional, + Set, Tuple, Type, TYPE_CHECKING, @@ -51,6 +52,7 @@ and_, asc, BigInteger, + bindparam, Boolean, Column, DateTime, @@ -526,6 +528,109 @@ def stderr(self, stderr): raise NotImplementedError("Attempt to set stdout, must set tool_stderr or job_stderr") +UNIQUE_DATASET_USER_USAGE = """ +WITH per_user_histories AS +( + SELECT id + FROM history + WHERE user_id = :id + AND NOT purged +), +per_hist_hdas AS ( + SELECT DISTINCT dataset_id + FROM history_dataset_association + WHERE NOT purged + AND history_id IN (SELECT id FROM per_user_histories) +) +SELECT COALESCE(SUM(COALESCE(dataset.total_size, dataset.file_size, 0)), 0) +FROM dataset +LEFT OUTER JOIN library_dataset_dataset_association ON dataset.id = library_dataset_dataset_association.dataset_id +WHERE dataset.id IN (SELECT dataset_id FROM per_hist_hdas) + AND library_dataset_dataset_association.id IS NULL + AND ( + {dataset_condition} + ) +""" + + +def calculate_user_disk_usage_statements(user_id, quota_source_map, for_sqlite=False): + """Standalone function so can be reused for postgres directly in pgcleanup.py.""" + statements = [] + default_quota_enabled = quota_source_map.default_quota_enabled + default_exclude_ids = quota_source_map.default_usage_excluded_ids() + default_cond = "dataset.object_store_id IS NULL" if default_quota_enabled else "" + exclude_cond = "dataset.object_store_id NOT IN :exclude_object_store_ids" if default_exclude_ids else "" + use_or = " OR " if (default_cond != "" and exclude_cond != "") else "" + default_usage_dataset_condition = "{default_cond} {use_or} {exclude_cond}".format( + default_cond=default_cond, + exclude_cond=exclude_cond, + use_or=use_or, + ) + default_usage = UNIQUE_DATASET_USER_USAGE.format(dataset_condition=default_usage_dataset_condition) + default_usage = ( + """ +UPDATE galaxy_user SET disk_usage = (%s) +WHERE id = :id +""" + % default_usage + ) + params = {"id": user_id} + if default_exclude_ids: + params["exclude_object_store_ids"] = default_exclude_ids + statements.append((default_usage, params)) + source = quota_source_map.ids_per_quota_source() + # TODO: Merge a lot of these settings together by generating a temp table for + # the object_store_id to quota_source_label into a temp table of values + for (quota_source_label, object_store_ids) in source.items(): + label_usage = UNIQUE_DATASET_USER_USAGE.format( + dataset_condition="dataset.object_store_id IN :include_object_store_ids" + ) + if for_sqlite: + # hacky alternative for older sqlite + statement = """ +WITH new (user_id, quota_source_label, disk_usage) AS ( + VALUES(:id, :label, ({label_usage})) +) +INSERT OR REPLACE INTO user_quota_source_usage (id, user_id, quota_source_label, disk_usage) +SELECT old.id, new.user_id, new.quota_source_label, new.disk_usage +FROM new + LEFT JOIN user_quota_source_usage AS old + ON new.user_id = old.user_id + AND new.quota_source_label = old.quota_source_label +""".format( + label_usage=label_usage + ) + else: + statement = """ +INSERT INTO user_quota_source_usage(user_id, quota_source_label, disk_usage) +VALUES(:user_id, :label, ({label_usage})) +ON CONFLICT +ON constraint uqsu_unique_label_per_user +DO UPDATE SET disk_usage = excluded.disk_usage +""".format( + label_usage=label_usage + ) + statements.append( + (statement, {"id": user_id, "label": quota_source_label, "include_object_store_ids": object_store_ids}) + ) + + params = {"id": user_id} + source_labels = list(source.keys()) + if len(source_labels) > 0: + clean_old_statement = """ +DELETE FROM user_quota_source_usage +WHERE user_id = :id AND quota_source_label NOT IN :labels +""" + params["labels"] = source_labels + else: + clean_old_statement = """ +DELETE FROM user_quota_source_usage +WHERE user_id = :id AND quota_source_label IS NOT NULL +""" + statements.append((clean_old_statement, params)) + return statements + + class User(Base, Dictifiable, RepresentById): """ Data for a Galaxy user or admin and relations to their @@ -572,6 +677,7 @@ class User(Base, Dictifiable, RepresentById): "GalaxySession", back_populates="user", order_by=lambda: desc(GalaxySession.update_time) # type: ignore[has-type] ) quotas = relationship("UserQuotaAssociation", back_populates="user") + quota_source_usages = relationship("UserQuotaSourceUsage", back_populates="user") social_auth = relationship("UserAuthnzToken", back_populates="user") stored_workflow_menu_entries = relationship( "StoredWorkflowMenuEntry", @@ -728,14 +834,31 @@ def all_roles_exploiting_cache(self): roles.append(role) return roles - def get_disk_usage(self, nice_size=False): + def get_disk_usage(self, nice_size=False, quota_source_label=None): """ Return byte count of disk space used by user or a human-readable string if `nice_size` is `True`. """ - rval = 0 - if self.disk_usage is not None: - rval = self.disk_usage + if quota_source_label is None: + rval = 0 + if self.disk_usage is not None: + rval = self.disk_usage + else: + statement = """ +SELECT DISK_USAGE +FROM user_quota_source_usage +WHERE user_id = :user_id and quota_source_label = :label +""" + sa_session = object_session(self) + params = { + "user_id": self.id, + "label": quota_source_label, + } + row = sa_session.execute(statement, params).fetchone() + if row is not None: + rval = row[0] + else: + rval = 0 if nice_size: rval = galaxy.util.nice_size(rval) return rval @@ -748,9 +871,36 @@ def set_disk_usage(self, bytes): total_disk_usage = property(get_disk_usage, set_disk_usage) - def adjust_total_disk_usage(self, amount): + def adjust_total_disk_usage(self, amount, quota_source_label): + assert amount is not None if amount != 0: - self.disk_usage = func.coalesce(self.table.c.disk_usage, 0) + amount + if quota_source_label is None: + self.disk_usage = func.coalesce(self.table.c.disk_usage, 0) + amount + else: + # else would work on newer sqlite - 3.24.0 + sa_session = object_session(self) + if "sqlite" in sa_session.bind.dialect.name: + # hacky alternative for older sqlite + statement = """ +WITH new (user_id, quota_source_label) AS ( VALUES(:user_id, :label) ) +INSERT OR REPLACE INTO user_quota_source_usage (id, user_id, quota_source_label, disk_usage) +SELECT old.id, new.user_id, new.quota_source_label, COALESCE(old.disk_usage + :amount, :amount) +FROM new LEFT JOIN user_quota_source_usage AS old ON new.user_id = old.user_id AND NEW.quota_source_label = old.quota_source_label; +""" + else: + statement = """ +INSERT INTO user_quota_source_usage(user_id, disk_usage, quota_source_label) +VALUES(:user_id, :amount, :label) +ON CONFLICT + ON constraint uqsu_unique_label_per_user + DO UPDATE SET disk_usage = user_quota_source_usage.disk_usage + :amount +""" + params = { + "user_id": self.id, + "amount": int(amount), + "label": quota_source_label, + } + sa_session.execute(statement, params) @property def nice_total_disk_usage(self): @@ -759,53 +909,54 @@ def nice_total_disk_usage(self): """ return self.get_disk_usage(nice_size=True) - def calculate_disk_usage(self): + def calculate_disk_usage_default_source(self, object_store): """ Return byte count total of disk space used by all non-purged, non-library - HDAs in non-purged histories. + HDAs in non-purged histories assigned to default quota source. """ - # maintain a list so that we don't double count - return self._calculate_or_set_disk_usage(dryrun=True) + # only used in set_user_disk_usage.py + assert object_store is not None + quota_source_map = object_store.get_quota_source_map() + default_quota_enabled = quota_source_map.default_quota_enabled + default_cond = "dataset.object_store_id IS NULL OR" if default_quota_enabled else "" + default_usage_dataset_condition = ( + "{default_cond} dataset.object_store_id NOT IN :exclude_object_store_ids".format( + default_cond=default_cond, + ) + ) + default_usage = UNIQUE_DATASET_USER_USAGE.format(dataset_condition=default_usage_dataset_condition) + sql_calc = text(default_usage) + sql_calc = sql_calc.bindparams(bindparam("id"), bindparam("exclude_object_store_ids", expanding=True)) + params = {"id": self.id, "exclude_object_store_ids": quota_source_map.default_usage_excluded_ids()} + sa_session = object_session(self) + usage = sa_session.scalar(sql_calc, params) + return usage - def calculate_and_set_disk_usage(self): + def calculate_and_set_disk_usage(self, object_store): """ Calculates and sets user disk usage. """ - self._calculate_or_set_disk_usage(dryrun=False) + self._calculate_or_set_disk_usage(object_store=object_store) - def _calculate_or_set_disk_usage(self, dryrun=True): + def _calculate_or_set_disk_usage(self, object_store): """ Utility to calculate and return the disk usage. If dryrun is False, the new value is set immediately. """ - sql_calc = text( - """ - WITH per_user_histories AS - ( - SELECT id - FROM history - WHERE user_id = :id - AND NOT purged - ), - per_hist_hdas AS ( - SELECT DISTINCT dataset_id - FROM history_dataset_association - WHERE NOT purged - AND history_id IN (SELECT id FROM per_user_histories) - ) - SELECT SUM(COALESCE(dataset.total_size, dataset.file_size, 0)) - FROM dataset - LEFT OUTER JOIN library_dataset_dataset_association ON dataset.id = library_dataset_dataset_association.dataset_id - WHERE dataset.id IN (SELECT dataset_id FROM per_hist_hdas) - AND library_dataset_dataset_association.id IS NULL - """ - ) + assert object_store is not None + quota_source_map = object_store.get_quota_source_map() sa_session = object_session(self) - usage = sa_session.scalar(sql_calc, {"id": self.id}) - if not dryrun: - self.set_disk_usage(usage) + for_sqlite = "sqlite" in sa_session.bind.dialect.name + statements = calculate_user_disk_usage_statements(self.id, quota_source_map, for_sqlite) + for (sql, args) in statements: + statement = text(sql) + binds = [] + for key, _ in args.items(): + expand_binding = key.endswith("s") + binds.append(bindparam(key, expanding=expand_binding)) + statement = statement.bindparams(*binds) + sa_session.execute(statement, args) sa_session.flush() - return usage @staticmethod def user_template_environment(user): @@ -869,6 +1020,66 @@ def attempt_create_private_role(self): session.add(assoc) session.flush() + def dictify_usage(self, object_store=None) -> List[Dict[str, Any]]: + """Include object_store to include empty/unused usage info.""" + used_labels: Set[Union[str, None]] = set() + rval: List[Dict[str, Any]] = [ + { + "quota_source_label": None, + "total_disk_usage": float(self.disk_usage or 0), + } + ] + used_labels.add(None) + for quota_source_usage in self.quota_source_usages: + label = quota_source_usage.quota_source_label + rval.append( + { + "quota_source_label": label, + "total_disk_usage": float(quota_source_usage.disk_usage), + } + ) + used_labels.add(label) + + if object_store is not None: + for label in object_store.get_quota_source_map().ids_per_quota_source().keys(): + if label not in used_labels: + rval.append( + { + "quota_source_label": label, + "total_disk_usage": 0.0, + } + ) + + return rval + + def dictify_usage_for(self, quota_source_label: Optional[str]) -> Dict[str, Any]: + rval: Dict[str, Any] + if quota_source_label is None: + rval = { + "quota_source_label": None, + "total_disk_usage": float(self.disk_usage or 0), + } + else: + quota_source_usage = self.quota_source_usage_for(quota_source_label) + if quota_source_usage is None: + rval = { + "quota_source_label": quota_source_label, + "total_disk_usage": 0.0, + } + else: + rval = { + "quota_source_label": quota_source_label, + "total_disk_usage": float(quota_source_usage.disk_usage), + } + + return rval + + def quota_source_usage_for(self, quota_source_label: Optional[str]) -> Optional["UserQuotaSourceUsage"]: + for quota_source_usage in self.quota_source_usages: + if quota_source_usage.quota_source_label == quota_source_label: + return quota_source_usage + return None + class PasswordResetToken(Base): __tablename__ = "password_reset_token" @@ -2728,7 +2939,9 @@ def add_dataset(self, dataset, parent_id=None, genome_build=None, set_hid=True, dataset.hid = self._next_hid() add_object_to_object_session(dataset, self) if quota and is_dataset and self.user: - self.user.adjust_total_disk_usage(dataset.quota_amount(self.user)) + quota_source_info = dataset.dataset.quota_source_info + if quota_source_info.use: + self.user.adjust_total_disk_usage(dataset.quota_amount(self.user), quota_source_info.label) dataset.history = self if is_dataset and genome_build not in [None, "?"]: self.genome_build = genome_build @@ -2746,7 +2959,10 @@ def add_datasets( self.__add_datasets_optimized(datasets, genome_build=genome_build) if quota and self.user: disk_usage = sum(d.get_total_size() for d in datasets if is_hda(d)) - self.user.adjust_total_disk_usage(disk_usage) + if disk_usage: + quota_source_info = datasets[0].dataset.quota_source_info + if quota_source_info.use: + self.user.adjust_total_disk_usage(disk_usage, quota_source_info.label) sa_session.add_all(datasets) if flush: sa_session.flush() @@ -3146,6 +3362,20 @@ def __init__(self, name=None, description=None, type=types.SYSTEM, deleted=False self.deleted = deleted +class UserQuotaSourceUsage(Base, Dictifiable, RepresentById): + __tablename__ = "user_quota_source_usage" + __table_args__ = (UniqueConstraint("user_id", "quota_source_label", name="uqsu_unique_label_per_user"),) + + dict_element_visible_keys = ["disk_usage", "quota_source_label"] + + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey("galaxy_user.id"), index=True) + quota_source_label = Column(String(32), index=True) + # user had an index on disk_usage - does that make any sense? -John + disk_usage = Column(Numeric(15, 0), default=0, nullable=False) + user = relationship("User", back_populates="quota_source_usages") + + class UserQuotaAssociation(Base, Dictifiable, RepresentById): __tablename__ = "user_quota_association" @@ -3186,6 +3416,7 @@ def __init__(self, group, quota): class Quota(Base, Dictifiable, RepresentById): __tablename__ = "quota" + __table_args__ = (Index("ix_quota_quota_source_label", "quota_source_label"),) id = Column(Integer, primary_key=True) create_time = Column(DateTime, default=now) @@ -3195,11 +3426,12 @@ class Quota(Base, Dictifiable, RepresentById): bytes = Column(BigInteger) operation = Column(String(8)) deleted = Column(Boolean, index=True, default=False) + quota_source_label = Column(String(32), default=None) default = relationship("DefaultQuotaAssociation", back_populates="quota") groups = relationship("GroupQuotaAssociation", back_populates="quota") users = relationship("UserQuotaAssociation", back_populates="quota") - dict_collection_visible_keys = ["id", "name"] + dict_collection_visible_keys = ["id", "name", "quota_source_label"] dict_element_visible_keys = [ "id", "name", @@ -3210,10 +3442,11 @@ class Quota(Base, Dictifiable, RepresentById): "default", "users", "groups", + "quota_source_label", ] valid_operations = ("+", "-", "=") - def __init__(self, name=None, description=None, amount=0, operation="="): + def __init__(self, name=None, description=None, amount=0, operation="=", quota_source_label=None): self.name = name self.description = description if amount is None: @@ -3221,6 +3454,7 @@ def __init__(self, name=None, description=None, amount=0, operation="="): else: self.bytes = amount self.operation = operation + self.quota_source_label = quota_source_label def get_amount(self): if self.bytes == -1: @@ -3249,7 +3483,7 @@ class DefaultQuotaAssociation(Base, Dictifiable, RepresentById): id = Column(Integer, primary_key=True) create_time = Column(DateTime, default=now) update_time = Column(DateTime, default=now, onupdate=now) - type = Column(String(32), index=True, unique=True) + type = Column(String(32), index=True) quota_id = Column(Integer, ForeignKey("quota.id"), index=True) quota = relationship("Quota", back_populates="default") @@ -3592,6 +3826,16 @@ def get_file_name(self): # Make filename absolute return os.path.abspath(filename) + @property + def quota_source_label(self): + return self.quota_source_info.label + + @property + def quota_source_info(self): + object_store_id = self.object_store_id + quota_source_map = self.object_store.get_quota_source_map() + return quota_source_map.get_quota_source_info(object_store_id) + def set_file_name(self, filename): if not filename: self.external_filename = None @@ -4706,10 +4950,10 @@ def get_access_roles(self, security_agent): """ return self.dataset.get_access_roles(security_agent) - def purge_usage_from_quota(self, user): + def purge_usage_from_quota(self, user, quota_source_info): """Remove this HDA's quota_amount from user's quota.""" - if user: - user.adjust_total_disk_usage(-self.quota_amount(user)) + if user and quota_source_info.use: + user.adjust_total_disk_usage(-self.quota_amount(user), quota_source_info.label) def quota_amount(self, user): """ diff --git a/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py b/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py new file mode 100644 index 000000000000..7284fef938b3 --- /dev/null +++ b/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py @@ -0,0 +1,50 @@ +"""add quota source labels + +Revision ID: d0583094c8cd +Revises: c39f1de47a04 +Create Date: 2022-06-09 12:24:44.329038 + +""" +from alembic import op +from sqlalchemy import ( + Column, + ForeignKey, + Integer, + Numeric, + String, +) + +from galaxy.model.migrations.util import ( + add_unique_constraint, + drop_column, + drop_unique_constraint, +) + +# revision identifiers, used by Alembic. +revision = "d0583094c8cd" +down_revision = "c39f1de47a04" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column("quota", Column("quota_source_label", String(32), default=None)) + + op.create_table( + "user_quota_source_usage", + Column("id", Integer, primary_key=True), + Column("user_id", Integer, ForeignKey("galaxy_user.id"), index=True), + Column("quota_source_label", String(32), index=True), + # user had an index on disk_usage - does that make any sense? -John + Column("disk_usage", Numeric(15, 0)), + ) + add_unique_constraint("uqsu_unique_label_per_user", "user_quota_source_usage", ["user_id", "quota_source_label"]) + drop_unique_constraint("ix_default_quota_association_type", "default_quota_association") + op.create_index("ix_quota_quota_source_label", "quota", ["quota_source_label"]) + + +def downgrade(): + add_unique_constraint("ix_default_quota_association_type", "default_quota_association", ["type"]) + op.drop_table("user_quota_source_usage") + op.drop_index("ix_quota_quota_source_label", "quota") + drop_column("quota", "quota_source_label") diff --git a/lib/galaxy/model/migrations/util.py b/lib/galaxy/model/migrations/util.py index 37d4eb2d95f5..fc5e38ff3d3d 100644 --- a/lib/galaxy/model/migrations/util.py +++ b/lib/galaxy/model/migrations/util.py @@ -1,4 +1,5 @@ import logging +from typing import List from alembic import ( context, @@ -17,6 +18,22 @@ def drop_column(table_name, column_name): batch_op.drop_column(column_name) +def add_unique_constraint(index_name: str, table_name: str, columns: List[str]): + if _is_sqlite(): + with op.batch_alter_table(table_name) as batch_op: + batch_op.create_unique_constraint(index_name, columns) + else: + op.create_unique_constraint(index_name, table_name, columns) + + +def drop_unique_constraint(index_name: str, table_name: str): + if _is_sqlite(): + with op.batch_alter_table(table_name) as batch_op: + batch_op.drop_constraint(index_name) + else: + op.drop_constraint(index_name, table_name) + + def column_exists(table_name, column_name): if context.is_offline_mode(): return _handle_offline_mode(f"column_exists({table_name}, {column_name})", False) @@ -34,3 +51,8 @@ def _handle_offline_mode(code, return_value): ) log.info(msg) return return_value + + +def _is_sqlite() -> bool: + bind = op.get_context().bind + return bool(bind and bind.engine.name == "sqlite") diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index a4628b926ac2..92100b035834 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -16,6 +16,8 @@ Any, Dict, List, + NamedTuple, + Optional, Tuple, Type, ) @@ -44,6 +46,8 @@ "Attempted to 'create' object store entity in configuration with no database session present." ) DEFAULT_PRIVATE = False +DEFAULT_QUOTA_SOURCE = None # Just track quota right on user object in Galaxy. +DEFAULT_QUOTA_ENABLED = True # enable quota tracking in object stores by default log = logging.getLogger(__name__) @@ -278,6 +282,10 @@ def get_store_by(self, obj): def to_dict(self) -> Dict[str, Any]: raise NotImplementedError + @abc.abstractmethod + def get_quota_source_map(self): + """Return QuotaSourceMap describing mapping of object store IDs to quota sources.""" + class BaseObjectStore(ObjectStore): store_by: str @@ -417,12 +425,17 @@ def parse_private_from_config_xml(clazz, config_xml): private = asbool(config_xml.attrib.get("private", DEFAULT_PRIVATE)) return private + def get_quota_source_map(self): + # I'd rather keep this abstract... but register_singleton wants it to be instantiable... + raise NotImplementedError() + class ConcreteObjectStore(BaseObjectStore): """Subclass of ObjectStore for stores that don't delegate (non-nested). - Currently only adds store_by functionality. Which doesn't make - sense for the delegating object stores. + Adds store_by and quota_source functionality. These attributes do not make + sense for the delegating object stores, they should describe files at actually + persisted, not how a file is routed to a persistence source. """ def __init__(self, config, config_dict=None, **kwargs): @@ -446,6 +459,11 @@ def __init__(self, config, config_dict=None, **kwargs): self.description = config_dict.get("description", None) # Annotate this as true to prevent sharing of data. self.private = config_dict.get("private", DEFAULT_PRIVATE) + # short label describing the quota source or null to use default + # quota source right on user object. + quota_config = config_dict.get("quota", {}) + self.quota_source = quota_config.get("source", DEFAULT_QUOTA_SOURCE) + self.quota_enabled = quota_config.get("enabled", DEFAULT_QUOTA_ENABLED) def to_dict(self): rval = super().to_dict() @@ -453,6 +471,10 @@ def to_dict(self): rval["store_by"] = self.store_by rval["name"] = self.name rval["description"] = self.description + rval["quota"] = { + "source": self.quota_source, + "enabled": self.quota_enabled, + } return rval def _get_concrete_store_name(self, obj): @@ -467,6 +489,13 @@ def _get_store_by(self, obj): def _is_private(self, obj): return self.private + def get_quota_source_map(self): + quota_source_map = QuotaSourceMap( + self.quota_source, + self.quota_enabled, + ) + return quota_source_map + class DiskObjectStore(ConcreteObjectStore): """ @@ -518,7 +547,12 @@ def parse_xml(clazz, config_xml): if name is not None: config_dict["name"] = name for e in config_xml: - if e.tag == "files_dir": + if e.tag == "quota": + config_dict["quota"] = { + "source": e.get("source", DEFAULT_QUOTA_SOURCE), + "enabled": asbool(e.get("enabled", DEFAULT_QUOTA_ENABLED)), + } + elif e.tag == "files_dir": config_dict["files_dir"] = e.get("path") elif e.tag == "description": config_dict["description"] = e.text @@ -899,6 +933,7 @@ def __init__(self, config, config_dict, fsmon=False): removing backends when they get too full. """ super().__init__(config, config_dict) + self._quota_source_map = None self.backends = {} self.weighted_backend_ids = [] @@ -1054,6 +1089,21 @@ def _call_method(self, method, obj, default, default_is_exception, **kwargs): else: return default + def get_quota_source_map(self): + if self._quota_source_map is None: + quota_source_map = QuotaSourceMap() + self._merge_quota_source_map(quota_source_map, self) + self._quota_source_map = quota_source_map + return self._quota_source_map + + @classmethod + def _merge_quota_source_map(clz, quota_source_map, object_store): + for backend_id, backend in object_store.backends.items(): + if isinstance(backend, DistributedObjectStore): + clz._merge_quota_source_map(quota_source_map, backend) + else: + quota_source_map.backends[backend_id] = backend.get_quota_source_map() + def __get_store_id_for(self, obj, **kwargs): if obj.object_store_id is not None: if obj.object_store_id in self.backends: @@ -1086,7 +1136,6 @@ def object_store_ids(self, private=None): class HierarchicalObjectStore(NestedObjectStore): - """ ObjectStore that defers to a list of backends. @@ -1108,10 +1157,20 @@ def __init__(self, config, config_dict, fsmon=False): assert ( is_private == backend_is_private ), "The private attribute must be defined on the HierarchicalObjectStore and not contained concrete objectstores." + backend_quota = backend_def.get("quota") + if backend_quota is not None: + # Make sure just was using defaults - because cannot override what is + # is setup by the HierarchicalObjectStore. + assert backend_quota.get("source", DEFAULT_QUOTA_SOURCE) == DEFAULT_QUOTA_SOURCE + assert backend_quota.get("enabled", DEFAULT_QUOTA_ENABLED) == DEFAULT_QUOTA_ENABLED + backends[order] = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) self.backends = backends self.private = is_private + quota_config = config_dict.get("quota", {}) + self.quota_source = quota_config.get("source", DEFAULT_QUOTA_SOURCE) + self.quota_enabled = quota_config.get("enabled", DEFAULT_QUOTA_ENABLED) @classmethod def parse_xml(clazz, config_xml): @@ -1156,6 +1215,13 @@ def _is_private(self, obj): # the same way. return self.private + def get_quota_source_map(self): + quota_source_map = QuotaSourceMap( + self.quota_source, + self.quota_enabled, + ) + return quota_source_map + def type_to_object_store_class(store: str, fsmon: bool = False) -> Tuple[Type[BaseObjectStore], Dict[str, Any]]: objectstore_class: Type[BaseObjectStore] @@ -1306,6 +1372,66 @@ def config_to_dict(config): } +class QuotaSourceInfo(NamedTuple): + label: Optional[str] + use: bool + + +class QuotaSourceMap: + def __init__(self, source=DEFAULT_QUOTA_SOURCE, enabled=DEFAULT_QUOTA_ENABLED): + self.default_quota_source = source + self.default_quota_enabled = enabled + self.info = QuotaSourceInfo(self.default_quota_source, self.default_quota_enabled) + self.backends = {} + self._labels = None + + def get_quota_source_info(self, object_store_id): + if object_store_id in self.backends: + return self.backends[object_store_id].get_quota_source_info(object_store_id) + else: + return self.info + + def get_quota_source_label(self, object_store_id): + if object_store_id in self.backends: + return self.backends[object_store_id].get_quota_source_label(object_store_id) + else: + return self.default_quota_source + + def get_quota_source_labels(self): + if self._labels is None: + labels = set() + if self.default_quota_source: + labels.add(self.default_quota_source) + for backend in self.backends.values(): + labels = labels.union(backend.get_quota_source_labels()) + self._labels = labels + return self._labels + + def default_usage_excluded_ids(self): + exclude_object_store_ids = [] + for backend_id, backend_source_map in self.backends.items(): + if backend_source_map.default_quota_source is not None: + exclude_object_store_ids.append(backend_id) + elif not backend_source_map.default_quota_enabled: + exclude_object_store_ids.append(backend_id) + return exclude_object_store_ids + + def get_id_to_source_pairs(self): + pairs = [] + for backend_id, backend_source_map in self.backends.items(): + if backend_source_map.default_quota_source is not None and backend_source_map.default_quota_enabled: + pairs.append((backend_id, backend_source_map.default_quota_source)) + return pairs + + def ids_per_quota_source(self): + quota_sources: Dict[str, List[str]] = {} + for (object_id, quota_source_label) in self.get_id_to_source_pairs(): + if quota_source_label not in quota_sources: + quota_sources[quota_source_label] = [] + quota_sources[quota_source_label].append(object_id) + return quota_sources + + class ObjectStorePopulator: """Small helper for interacting with the object store and making sure all datasets from a job end up with the same object_store_id. diff --git a/lib/galaxy/quota/__init__.py b/lib/galaxy/quota/__init__.py index ae074228f0dc..b01cf47d5ae6 100644 --- a/lib/galaxy/quota/__init__.py +++ b/lib/galaxy/quota/__init__.py @@ -23,12 +23,12 @@ class QuotaAgent: # metaclass=abc.ABCMeta """ # TODO: make abstractmethod after they work better with mypy - def get_quota(self, user): + def get_quota(self, user, quota_source_label=None): """Return quota in bytes or None if no quota is set.""" - def get_quota_nice_size(self, user): + def get_quota_nice_size(self, user, quota_source_label=None): """Return quota as a human-readable string or 'unlimited' if no quota is set.""" - quota_bytes = self.get_quota(user) + quota_bytes = self.get_quota(user, quota_source_label=quota_source_label) if quota_bytes is not None: quota_str = galaxy.util.nice_size(quota_bytes) else: @@ -36,10 +36,10 @@ def get_quota_nice_size(self, user): return quota_str # TODO: make abstractmethod after they work better with mypy - def get_percent(self, trans=None, user=False, history=False, usage=False, quota=False): + def get_percent(self, trans=None, user=False, history=False, usage=False, quota=False, quota_source_label=None): """Return the percentage of any storage quota applicable to the user/transaction.""" - def get_usage(self, trans=None, user=False, history=False): + def get_usage(self, trans=None, user=False, history=False, quota_source_label=None): if trans: user = trans.user history = trans.history @@ -48,7 +48,14 @@ def get_usage(self, trans=None, user=False, history=False): assert history, "Could not determine anonymous user's history." usage = history.disk_size else: - usage = user.total_disk_usage + if quota_source_label is None: + usage = user.total_disk_usage + else: + quota_source_usage = user.quota_source_usage_for(quota_source_label) + if not quota_source_usage or quota_source_usage.disk_usage is None: + usage = 0.0 + else: + usage = quota_source_usage.disk_usage return usage def is_over_quota(self, app, job, job_destination): @@ -66,14 +73,14 @@ class NoQuotaAgent(QuotaAgent): def __init__(self): pass - def get_quota(self, user): + def get_quota(self, user, quota_source_label=None): return None @property def default_quota(self): return None - def get_percent(self, trans=None, user=False, history=False, usage=False, quota=False): + def get_percent(self, trans=None, user=False, history=False, usage=False, quota=False, quota_source_label=None): return None def is_over_quota(self, app, job, job_destination): @@ -87,7 +94,7 @@ def __init__(self, model): self.model = model self.sa_session = model.context - def get_quota(self, user): + def get_quota(self, user, quota_source_label=None): """ Calculated like so: @@ -100,7 +107,7 @@ def get_quota(self, user): quotas. """ if not user: - return self._default_unregistered_quota + return self._default_unregistered_quota(quota_source_label) query = text( """ SELECT ( @@ -111,8 +118,9 @@ def get_quota(self, user): (SELECT default_quota.bytes FROM quota as default_quota LEFT JOIN default_quota_association on default_quota.id = default_quota_association.quota_id - WHERE default_quota_association.type == 'registered' - AND default_quota.deleted != :is_true)) + WHERE default_quota_association.type = 'registered' + AND default_quota.deleted != :is_true + AND default_quota.quota_source_label {label_cond})) + (CASE WHEN SUM(CASE WHEN union_quota.operation = '=' AND union_quota.bytes = -1 THEN 1 ELSE 0 @@ -129,46 +137,60 @@ def get_quota(self, user): ) FROM ( SELECT user_quota.operation as operation, user_quota.bytes as bytes - FROM galaxy_user as user - LEFT JOIN user_quota_association as uqa on user.id = uqa.user_id + FROM galaxy_user as guser + LEFT JOIN user_quota_association as uqa on guser.id = uqa.user_id LEFT JOIN quota as user_quota on user_quota.id = uqa.quota_id WHERE user_quota.deleted != :is_true - AND user.id = :user_id + AND user_quota.quota_source_label {label_cond} + AND guser.id = :user_id UNION ALL SELECT group_quota.operation as operation, group_quota.bytes as bytes - FROM galaxy_user as user - LEFT JOIN user_group_association as uga on user.id = uga.user_id + FROM galaxy_user as guser + LEFT JOIN user_group_association as uga on guser.id = uga.user_id LEFT JOIN galaxy_group on galaxy_group.id = uga.group_id LEFT JOIN group_quota_association as gqa on galaxy_group.id = gqa.group_id LEFT JOIN quota as group_quota on group_quota.id = gqa.quota_id WHERE group_quota.deleted != :is_true - AND user.id = :user_id + AND group_quota.quota_source_label {label_cond} + AND guser.id = :user_id ) as union_quota -""" +""".format( + label_cond="IS NULL" if quota_source_label is None else " = :label" + ) ) conn = self.sa_session.connection() with conn.begin(): - res = conn.execute(query, is_true=True, user_id=user.id).fetchone() + res = conn.execute(query, is_true=True, user_id=user.id, label=quota_source_label).fetchone() if res: - return res[0] + return int(res[0]) if res[0] else None else: return None - @property - def _default_unregistered_quota(self): - return self._default_quota(self.model.DefaultQuotaAssociation.types.UNREGISTERED) + def _default_unregistered_quota(self, quota_source_label): + return self._default_quota(self.model.DefaultQuotaAssociation.types.UNREGISTERED, quota_source_label) - def _default_quota(self, default_type): - dqa = ( - self.sa_session.query(self.model.DefaultQuotaAssociation) - .filter(self.model.DefaultQuotaAssociation.type == default_type) - .first() + def _default_quota(self, default_type, quota_source_label): + label_condition = "IS NULL" if quota_source_label is None else "= :label" + query = text( + """ +SELECT bytes +FROM quota as default_quota +LEFT JOIN default_quota_association on default_quota.id = default_quota_association.quota_id +WHERE default_quota_association.type = :default_type + AND default_quota.deleted != :is_true + AND default_quota.quota_source_label {label_condition} +""".format( + label_condition=label_condition + ) ) - if not dqa: - return None - if dqa.quota.bytes < 0: - return None - return dqa.quota.bytes + + conn = self.sa_session.connection() + with conn.begin(): + res = conn.execute(query, is_true=True, label=quota_source_label, default_type=default_type).fetchone() + if res: + return res[0] + else: + return None def set_default_quota(self, default_type, quota): # Unset the current default(s) associated with this quota, if there are any @@ -180,20 +202,25 @@ def set_default_quota(self, default_type, quota): for gqa in quota.groups: self.sa_session.delete(gqa) # Find the old default, assign the new quota if it exists - dqa = ( + label = quota.quota_source_label + dqas = ( self.sa_session.query(self.model.DefaultQuotaAssociation) - .filter(self.model.DefaultQuotaAssociation.type == default_type) - .first() + .filter(self.model.DefaultQuotaAssociation.table.c.type == default_type) + .all() ) - if dqa: - dqa.quota = quota + target_default = None + for dqa in dqas: + if dqa.quota.quota_source_label == label and not dqa.quota.deleted: + target_default = dqa + if target_default: + target_default.quota = quota # Or create if necessary else: - dqa = self.model.DefaultQuotaAssociation(default_type, quota) - self.sa_session.add(dqa) + target_default = self.model.DefaultQuotaAssociation(default_type, quota) + self.sa_session.add(target_default) self.sa_session.flush() - def get_percent(self, trans=None, user=False, history=False, usage=False, quota=False): + def get_percent(self, trans=None, user=False, history=False, usage=False, quota=False, quota_source_label=None): """ Return the percentage of any storage quota applicable to the user/transaction. """ @@ -203,13 +230,13 @@ def get_percent(self, trans=None, user=False, history=False, usage=False, quota= history = trans.history # if quota wasn't passed, attempt to get the quota if quota is False: - quota = self.get_quota(user) + quota = self.get_quota(user, quota_source_label=quota_source_label) # return none if no applicable quotas or quotas disabled if quota is None: return None # get the usage, if it wasn't passed if usage is False: - usage = self.get_usage(trans, user, history) + usage = self.get_usage(trans, user, history, quota_source_label=quota_source_label) try: return min((int(float(usage) / quota * 100), 100)) except ZeroDivisionError: @@ -239,10 +266,19 @@ def set_entity_quota_associations(self, quotas=None, users=None, groups=None, de self.sa_session.flush() def is_over_quota(self, app, job, job_destination): - quota = self.get_quota(job.user) + # Doesn't work because job.object_store_id until inside handler :_( + # quota_source_label = job.quota_source_label + if job_destination is not None: + object_store_id = job_destination.params.get("object_store_id", None) + object_store = app.object_store + quota_source_map = object_store.get_quota_source_map() + quota_source_label = quota_source_map.get_quota_source_info(object_store_id).label + else: + quota_source_label = None + quota = self.get_quota(job.user, quota_source_label=quota_source_label) if quota is not None: try: - usage = self.get_usage(user=job.user, history=job.history) + usage = self.get_usage(user=job.user, history=job.history, quota_source_label=quota_source_label) if usage > quota: return True except AssertionError: diff --git a/lib/galaxy/quota/_schema.py b/lib/galaxy/quota/_schema.py index e2881570216d..b56f4e084877 100644 --- a/lib/galaxy/quota/_schema.py +++ b/lib/galaxy/quota/_schema.py @@ -107,6 +107,11 @@ class QuotaBase(Model): description="The `encoded identifier` of the quota.", ) name: str = QuotaNameField + quota_source_label: Optional[str] = Field( + None, + title="Quota Source Label", + description="Quota source label", + ) class QuotaSummary(QuotaBase): @@ -183,6 +188,11 @@ class CreateQuotaParams(Model): " equivalent to ``no``." ), ) + quota_source_label: Optional[str] = Field( + default=None, + title="Quota Source Label", + description="If set, quota source label to apply this quota operation to. Otherwise, the default quota is used.", + ) in_users: Optional[List[str]] = Field( default=[], title="Users", diff --git a/lib/galaxy/webapps/base/webapp.py b/lib/galaxy/webapps/base/webapp.py index 45df16c83afe..1cb7f6525942 100644 --- a/lib/galaxy/webapps/base/webapp.py +++ b/lib/galaxy/webapps/base/webapp.py @@ -825,7 +825,7 @@ def _associate_user_history(self, user, prev_galaxy_session=None): # Increase the user's disk usage by the amount of the previous history's datasets if they didn't already # own it. for hda in history.datasets: - user.adjust_total_disk_usage(hda.quota_amount(user)) + user.adjust_total_disk_usage(hda.quota_amount(user), hda.dataset.quota_source_info.label) # Only set default history permissions if the history is from the previous session and anonymous set_permissions = True elif self.galaxy_session.current_history: diff --git a/lib/galaxy/webapps/galaxy/api/users.py b/lib/galaxy/webapps/galaxy/api/users.py index bd960968df24..016faee817c9 100644 --- a/lib/galaxy/webapps/galaxy/api/users.py +++ b/lib/galaxy/webapps/galaxy/api/users.py @@ -5,6 +5,7 @@ import json import logging import re +from typing import Optional from fastapi import ( Body, @@ -315,6 +316,37 @@ def _get_user_full(self, trans, user_id, **kwd): except Exception: raise exceptions.RequestParameterInvalidException("Invalid user id specified", id=user_id) + @expose_api + def usage(self, trans, user_id: str, **kwd): + """ + GET /api/users/{user_id}/usage + + Get user's disk usage broken down by quota source. + """ + user = self._get_user_full(trans, user_id, **kwd) + if user: + rval = self.user_serializer.serialize_disk_usage(user) + return rval + else: + return [] + + @expose_api + def usage_for(self, trans, user_id: str, label: str, **kwd): + """ + GET /api/users/{user_id}/usage/{label} + + Get user's disk usage for supplied quota source label. + """ + user = self._get_user_full(trans, user_id, **kwd) + effective_label: Optional[str] = label + if label == "__null__": + effective_label = None + if user: + rval = self.user_serializer.serialize_disk_usage_for(user, effective_label) + return rval + else: + return None + @expose_api def create(self, trans: GalaxyWebTransaction, payload: dict, **kwd): """ @@ -414,7 +446,7 @@ def anon_user_api_value(self, trans): if not trans.user and not trans.history: # Can't return info about this user, may not have a history yet. return {} - usage = trans.app.quota_agent.get_usage(trans) + usage = trans.app.quota_agent.get_usage(trans, history=trans.history) percent = trans.app.quota_agent.get_percent(trans=trans, usage=usage) return { "total_disk_usage": int(usage), diff --git a/lib/galaxy/webapps/galaxy/buildapp.py b/lib/galaxy/webapps/galaxy/buildapp.py index a9bcaba11314..79c2e9098ba9 100644 --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -583,6 +583,12 @@ def populate_api_routes(webapp, app): conditions=dict(method=["POST"]), ) + webapp.mapper.connect( + "/api/users/{user_id}/usage", action="usage", controller="users", conditions=dict(method=["GET"]) + ) + webapp.mapper.connect( + "/api/users/{user_id}/usage/{label}", action="usage_for", controller="users", conditions=dict(method=["GET"]) + ) webapp.mapper.resource_with_deleted("user", "users", path_prefix="/api") webapp.mapper.resource("visualization", "visualizations", path_prefix="/api") webapp.mapper.resource("plugins", "plugins", path_prefix="/api") diff --git a/lib/galaxy/webapps/galaxy/controllers/admin.py b/lib/galaxy/webapps/galaxy/controllers/admin.py index 43257341d665..542b2cc6c531 100644 --- a/lib/galaxy/webapps/galaxy/controllers/admin.py +++ b/lib/galaxy/webapps/galaxy/controllers/admin.py @@ -698,6 +698,9 @@ def create_quota(self, trans, payload=None, **kwd): if trans.request.method == "GET": all_users = [] all_groups = [] + labels = trans.app.object_store.get_quota_source_map().get_quota_source_labels() + label_options = [("Default Quota", None)] + label_options.extend([(label, label) for label in labels]) for user in ( trans.sa_session.query(trans.app.model.User) .filter(trans.app.model.User.table.c.deleted == false()) @@ -713,7 +716,7 @@ def create_quota(self, trans, payload=None, **kwd): default_options = [("No", "no")] for type_ in trans.app.model.DefaultQuotaAssociation.types: default_options.append((f"Yes, {type_}", type_)) - return { + rval = { "title": "Create Quota", "inputs": [ {"name": "name", "label": "Name"}, @@ -730,10 +733,23 @@ def create_quota(self, trans, payload=None, **kwd): "options": default_options, "help": "Warning: Any users or groups associated with this quota will be disassociated.", }, - build_select_input("in_groups", "Groups", all_groups, []), - build_select_input("in_users", "Users", all_users, []), ], } + if len(label_options) > 1: + rval["inputs"].append( + { + "name": "quota_source_label", + "label": "Apply quota to labeled object stores.", + "options": label_options, + } + ) + rval["inputs"].extend( + [ + build_select_input("in_groups", "Groups", all_groups, []), + build_select_input("in_users", "Users", all_users, []), + ] + ) + return rval else: try: quota, message = self.quota_manager.create_quota(payload, decode_id=trans.security.decode_id) diff --git a/lib/galaxy/webapps/galaxy/controllers/dataset.py b/lib/galaxy/webapps/galaxy/controllers/dataset.py index 671a731b34fb..e75ba2e1dab9 100644 --- a/lib/galaxy/webapps/galaxy/controllers/dataset.py +++ b/lib/galaxy/webapps/galaxy/controllers/dataset.py @@ -887,7 +887,7 @@ def _purge(self, trans, dataset_id): hda.deleted = True # HDA is purgeable # Decrease disk usage first - hda.purge_usage_from_quota(user) + hda.purge_usage_from_quota(user, hda.dataset.quota_source_info) # Mark purged hda.purged = True trans.sa_session.add(hda) diff --git a/lib/galaxy/webapps/galaxy/controllers/history.py b/lib/galaxy/webapps/galaxy/controllers/history.py index 5ef2faf41870..041c53a7573b 100644 --- a/lib/galaxy/webapps/galaxy/controllers/history.py +++ b/lib/galaxy/webapps/galaxy/controllers/history.py @@ -609,7 +609,7 @@ def purge_deleted_datasets(self, trans): for hda in trans.history.datasets: if not hda.deleted or hda.purged: continue - hda.purge_usage_from_quota(trans.user) + hda.purge_usage_from_quota(trans.user, hda.dataset.quota_source_info) hda.purged = True trans.sa_session.add(hda) trans.log_event(f"HDA id {hda.id} has been purged") diff --git a/lib/galaxy/webapps/galaxy/services/datasets.py b/lib/galaxy/webapps/galaxy/services/datasets.py index 7f7ee2293d70..bc199bd17854 100644 --- a/lib/galaxy/webapps/galaxy/services/datasets.py +++ b/lib/galaxy/webapps/galaxy/services/datasets.py @@ -98,6 +98,15 @@ class RequestDataType(str, Enum): in_use_state = "in_use_state" +class ConcreteObjectStoreQuotaSourceDetails(Model): + source: Optional[str] = Field( + description="The quota source label corresponding to the object store the dataset is stored in (or would be stored in)" + ) + enabled: bool = Field( + description="Whether the object store tracks quota on the data (independent of Galaxy's configuration)" + ) + + class DatasetStorageDetails(Model): object_store_id: Optional[str] = Field( description="The identifier of the destination ObjectStore for this dataset.", @@ -119,6 +128,7 @@ class DatasetStorageDetails(Model): shareable: bool = Field( description="Is this dataset shareable.", ) + quota: dict = Field(description="Information about quota sources around dataset storage.") class DatasetInheritanceChainEntry(Model): @@ -376,6 +386,13 @@ def show_storage( except FileNotFoundError: # uninitalized directory (emtpy) disk object store can cause this... percent_used = None + + quota_source = dataset.quota_source_info + quota = ConcreteObjectStoreQuotaSourceDetails( + source=quota_source.label, + enabled=quota_source.use, + ) + dataset_state = dataset.state hashes = [h.to_dict() for h in dataset.hashes] sources = [s.to_dict() for s in dataset.sources] @@ -388,6 +405,7 @@ def show_storage( dataset_state=dataset_state, hashes=hashes, sources=sources, + quota=quota, ) def show_inheritance_chain( diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 07ff1bb09139..e685c7fd0d4a 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -1077,6 +1077,17 @@ def user_private_role_id(self) -> str: assert "id" in role, role return role["id"] + def get_usage(self) -> List[Dict[str, Any]]: + usage_response = self.galaxy_interactor.get("users/current/usage") + usage_response.raise_for_status() + return usage_response.json() + + def get_usage_for(self, label: Optional[str]) -> Dict[str, Any]: + label_as_str = label if label is not None else "__null__" + usage_response = self.galaxy_interactor.get(f"users/current/usage/{label_as_str}") + usage_response.raise_for_status() + return usage_response.json() + def create_role(self, user_ids: list, description: Optional[str] = None) -> dict: using_requirement("admin") payload = { @@ -1090,14 +1101,14 @@ def create_role(self, user_ids: list, description: Optional[str] = None) -> dict def create_quota(self, quota_payload: dict) -> dict: using_requirement("admin") - quota_response = self._post("quotas", data=quota_payload, admin=True) - quota_response.raise_for_status() + quota_response = self._post("quotas", data=quota_payload, admin=True, json=True) + api_asserts.assert_status_code_is_ok(quota_response) return quota_response.json() def get_quotas(self) -> list: using_requirement("admin") quota_response = self._get("quotas", admin=True) - quota_response.raise_for_status() + api_asserts.assert_status_code_is_ok(quota_response) return quota_response.json() def make_private(self, history_id: str, dataset_id: str) -> dict: @@ -1109,14 +1120,14 @@ def make_private(self, history_id: str, dataset_id: str) -> dict: "manage": [role_id], } response = self.update_permissions_raw(history_id, dataset_id, payload) - response.raise_for_status() + api_asserts.assert_status_code_is_ok(response) return response.json() - def make_public_raw(self, history_id: str, dataset_id: str) -> Response: + def make_dataset_public_raw(self, history_id: str, dataset_id: str) -> Response: role_id = self.user_private_role_id() payload = { - "access": json.dumps([]), - "manage": json.dumps([role_id]), + "access": [], + "manage": [role_id], } response = self.update_permissions_raw(history_id, dataset_id, payload) return response @@ -1124,12 +1135,12 @@ def make_public_raw(self, history_id: str, dataset_id: str) -> Response: def update_permissions_raw(self, history_id: str, dataset_id: str, payload: dict) -> Response: url = f"histories/{history_id}/contents/{dataset_id}/permissions" update_response = self._put(url, payload, admin=True, json=True) - update_response.raise_for_status() return update_response def make_public(self, history_id: str) -> dict: using_requirement("new_published_objects") sharing_response = self._put(f"histories/{history_id}/publish") + api_asserts.assert_status_code_is_ok(sharing_response) assert sharing_response.status_code == 200 return sharing_response.json() diff --git a/scripts/cleanup_datasets/pgcleanup.py b/scripts/cleanup_datasets/pgcleanup.py index 5b28ecfea5ac..8e7e3eaef1bc 100755 --- a/scripts/cleanup_datasets/pgcleanup.py +++ b/scripts/cleanup_datasets/pgcleanup.py @@ -10,6 +10,7 @@ import inspect import logging import os +import re import string import sys import time @@ -26,6 +27,7 @@ import galaxy.config from galaxy.exceptions import ObjectNotFound +from galaxy.model import calculate_user_disk_usage_statements from galaxy.objectstore import build_object_store_from_config from galaxy.util.script import ( app_properties_from_args, @@ -76,6 +78,7 @@ class Action: directly.) """ + requires_objectstore = True update_time_sql = ", update_time = NOW() AT TIME ZONE 'utc'" force_retry_sql = " AND NOT purged" primary_key = None @@ -116,6 +119,9 @@ def __init__(self, app): self.__row_methods = [] self.__post_methods = [] self.__exit_methods = [] + if self.requires_objectstore: + self.object_store = build_object_store_from_config(self._config) + self._register_exit_method(self.object_store.shutdown) self._init() def __enter__(self): @@ -248,13 +254,14 @@ def _init(self): class RemovesObjects: """Base class for mixins that remove objects from object stores.""" + requires_objectstore = True + def _init(self): + super()._init() self.objects_to_remove = set() log.info("Initializing object store for action %s", self.name) - self.object_store = build_object_store_from_config(self._config) self._register_row_method(self.collect_removed_object_info) self._register_post_method(self.remove_objects) - self._register_exit_method(self.object_store.shutdown) def collect_removed_object_info(self, row): object_id = getattr(row, self.id_column, None) @@ -361,7 +368,10 @@ class RequiresDiskUsageRecalculation: To use, ensure your query returns a ``recalculate_disk_usage_user_id`` column. """ + requires_objectstore = True + def _init(self): + super()._init() self.__recalculate_disk_usage_user_ids = set() self._register_row_method(self.collect_recalculate_disk_usage_user_id) self._register_post_method(self.recalculate_disk_usage) @@ -381,30 +391,19 @@ def recalculate_disk_usage(self): """ log.info("Recalculating disk usage for users whose data were purged") for user_id in sorted(self.__recalculate_disk_usage_user_ids): - # TODO: h.purged = false should be unnecessary once all hdas in purged histories are purged. - sql = """ - UPDATE galaxy_user - SET disk_usage = ( - SELECT COALESCE(SUM(total_size), 0) - FROM ( SELECT d.total_size - FROM history_dataset_association hda - JOIN history h ON h.id = hda.history_id - JOIN dataset d ON hda.dataset_id = d.id - WHERE h.user_id = %(user_id)s - AND h.purged = false - AND hda.purged = false - AND d.purged = false - AND d.id NOT IN (SELECT dataset_id - FROM library_dataset_dataset_association) - GROUP BY d.id) AS sizes) - WHERE id = %(user_id)s - RETURNING disk_usage; - """ - args = {"user_id": user_id} - cur = self._update(sql, args, add_event=False) - for row in cur: - # disk_usage might be None (e.g. user has purged all data) - self.log.info("recalculate_disk_usage user_id %i to %s bytes" % (user_id, row.disk_usage)) + quota_source_map = self.object_store.get_quota_source_map() + statements = calculate_user_disk_usage_statements(user_id, quota_source_map) + + for (sql, args) in statements: + sql, _ = re.subn(r"\:([\w]+)", r"%(\1)s", sql) + new_args = {} + for key, val in args.items(): + if isinstance(val, list): + val = tuple(val) + new_args[key] = val + self._update(sql, new_args, add_event=False) + + self.log.info("recalculate_disk_usage user_id %i" % user_id) class RemovesMetadataFiles(RemovesObjects): diff --git a/scripts/set_user_disk_usage.py b/scripts/set_user_disk_usage.py index 7f0a2ac3e12c..b23b159b0ca8 100755 --- a/scripts/set_user_disk_usage.py +++ b/scripts/set_user_disk_usage.py @@ -44,18 +44,18 @@ def init(): return init_models_from_config(config, object_store=object_store), object_store, engine -def quotacheck(sa_session, users, engine): +def quotacheck(sa_session, users, engine, object_store): sa_session.refresh(user) current = user.get_disk_usage() print(user.username, "<" + user.email + ">:", end=" ") if not args.dryrun: # Apply new disk usage - user.calculate_and_set_disk_usage() + user.calculate_and_set_disk_usage(object_store) # And fetch new = user.get_disk_usage() else: - new = user.calculate_disk_usage() + new = user.calculate_disk_usage_default_source(object_store) print("old usage:", nice_size(current), "change:", end=" ") if new in (current, None): @@ -77,7 +77,7 @@ def quotacheck(sa_session, users, engine): print("Processing %i users..." % user_count) for i, user in enumerate(sa_session.query(model.User).enable_eagerloads(False).yield_per(1000)): print("%3i%%" % int(float(i) / user_count * 100), end=" ") - quotacheck(sa_session, user, engine) + quotacheck(sa_session, user, engine, object_store) print("100% complete") object_store.shutdown() sys.exit(0) @@ -88,5 +88,5 @@ def quotacheck(sa_session, users, engine): if not user: print("User not found") sys.exit(1) + quotacheck(sa_session, user, engine, object_store) object_store.shutdown() - quotacheck(sa_session, user, engine) diff --git a/test/integration/objectstore/test_private_handling.py b/test/integration/objectstore/test_private_handling.py index 48d6e97c44d1..f89d23d72cea 100644 --- a/test/integration/objectstore/test_private_handling.py +++ b/test/integration/objectstore/test_private_handling.py @@ -18,7 +18,7 @@ TEST_INPUT_FILES_CONTENT = "1 2 3" -class PrivatePreventsSharingObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): +class TestPrivatePreventsSharingObjectStoreIntegration(BaseObjectStoreIntegrationTestCase): @classmethod def handle_galaxy_config_kwds(cls, config): config["new_user_dataset_access_role_default_private"] = True @@ -30,12 +30,13 @@ def test_both_types(self): hda = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True) content = self.dataset_populator.get_history_dataset_content(history_id, hda["id"]) assert content.startswith(TEST_INPUT_FILES_CONTENT) - response = self.dataset_populator.make_public_raw(history_id, hda["id"]) - assert response.status_code != 200 + response = self.dataset_populator.make_dataset_public_raw(history_id, hda["id"]) + api_asserts.assert_status_code_is(response, 400) + api_asserts.assert_error_code_is(response, 400008) api_asserts.assert_error_message_contains(response, "Attempting to share a non-shareable dataset.") -class PrivateCannotWritePublicDataObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): +class TestPrivateCannotWritePublicDataObjectStoreIntegration(BaseObjectStoreIntegrationTestCase): @classmethod def handle_galaxy_config_kwds(cls, config): config["new_user_dataset_access_role_default_private"] = False diff --git a/test/integration/objectstore/test_quota_limit.py b/test/integration/objectstore/test_quota_limit.py new file mode 100644 index 000000000000..b26e002f3057 --- /dev/null +++ b/test/integration/objectstore/test_quota_limit.py @@ -0,0 +1,72 @@ +from ._base import BaseObjectStoreIntegrationTestCase +from .test_selection_with_resource_parameters import ( + DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE, + JOB_CONFIG_FILE, + JOB_RESOURCE_PARAMETERS_CONFIG_FILE, +) + + +class TestQuotaIntegration(BaseObjectStoreIntegrationTestCase): + @classmethod + def handle_galaxy_config_kwds(cls, config): + cls._configure_object_store(DISTRIBUTED_OBJECT_STORE_CONFIG_TEMPLATE, config) + config["job_config_file"] = JOB_CONFIG_FILE + config["job_resource_params_file"] = JOB_RESOURCE_PARAMETERS_CONFIG_FILE + config["enable_quotas"] = True + + def test_selection_limit(self): + with self.dataset_populator.test_history() as history_id: + + hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3\n4 5 6\n7 8 9\n") + self.dataset_populator.wait_for_history(history_id) + hda1_input = {"src": "hda", "id": hda1["id"]} + + quotas = self.dataset_populator.get_quotas() + assert len(quotas) == 0 + + payload = { + "name": "defaultquota1", + "description": "first default quota", + "amount": "1 bytes", + "operation": "=", + "default": "registered", + } + self.dataset_populator.create_quota(payload) + + payload = { + "name": "ebsquota1", + "description": "first ebs quota", + "amount": "100 MB", + "operation": "=", + "default": "registered", + "quota_source_label": "ebs", + } + self.dataset_populator.create_quota(payload) + + quotas = self.dataset_populator.get_quotas() + assert len(quotas) == 2 + + hda2 = self.dataset_populator.new_dataset(history_id, content="1 2 3\n4 5 6\n7 8 9\n") + self.dataset_populator.wait_for_history(history_id) + + hda2_now = self.dataset_populator.get_history_dataset_details(history_id, dataset=hda2, wait=False) + assert hda2_now["state"] == "paused" + + create_10_inputs = { + "input1": hda1_input, + "input2": hda1_input, + "__job_resource|__job_resource__select": "yes", + "__job_resource|how_store": "slow", + } + create10_response = self.dataset_populator.run_tool( + "create_10", + create_10_inputs, + history_id, + assert_ok=False, + ) + job_id = create10_response["jobs"][0]["id"] + self.dataset_populator.wait_for_job(job_id) + job_details = self.dataset_populator.get_job_details(job_id).json() + # This job isn't paused, it goes through because we used a different + # objectstore using job parameters. + assert job_details["state"] == "ok" diff --git a/test/integration/objectstore/test_selection_with_resources_parameters.py b/test/integration/objectstore/test_selection_with_resource_parameters.py similarity index 83% rename from test/integration/objectstore/test_selection_with_resources_parameters.py rename to test/integration/objectstore/test_selection_with_resource_parameters.py index f918cdea474c..63a5c1880245 100644 --- a/test/integration/objectstore/test_selection_with_resources_parameters.py +++ b/test/integration/objectstore/test_selection_with_resource_parameters.py @@ -33,11 +33,13 @@
+ + @@ -63,7 +65,8 @@ def handle_galaxy_config_kwds(cls, config): config["job_config_file"] = JOB_CONFIG_FILE config["job_resource_params_file"] = JOB_RESOURCE_PARAMETERS_CONFIG_FILE config["object_store_store_by"] = "uuid" - config["metadata_strategy"] = "celery_extended" + # Broken in dev https://github.com/galaxyproject/galaxy/pull/14055 + # config["metadata_strategy"] = "celery_extended" config["outputs_to_working_directory"] = True def _object_store_counts(self): @@ -87,7 +90,7 @@ def _assert_no_external_filename(self): for external_filename_tuple in self._app.model.session.query(Dataset.external_filename).all(): assert external_filename_tuple[0] is None - def test_tool_simple_constructs(self): + def test_objectstore_selection(self): with self.dataset_populator.test_history() as history_id: def _run_tool(tool_id, inputs): @@ -109,11 +112,23 @@ def _run_tool(tool_id, inputs): # One file uploaded, added to default object store ID. self._assert_file_counts(1, 0, 0, 0) + usage_list = self.dataset_populator.get_usage() + # assert len(usage_list) == 1 + assert usage_list[0]["quota_source_label"] is None + assert usage_list[0]["total_disk_usage"] == 6 + + usage = self.dataset_populator.get_usage_for(None) + assert usage["quota_source_label"] is None + assert usage["total_disk_usage"] == 6 # should create two files in static object store. _run_tool("multi_data_param", {"f1": hda1_input, "f2": hda1_input}) self._assert_file_counts(1, 2, 0, 0) + usage = self.dataset_populator.get_usage_for(None) + assert usage["quota_source_label"] is None + assert usage["total_disk_usage"] == 18 + # should create two files in ebs object store. create_10_inputs_1 = { "input1": hda1_input, @@ -122,6 +137,18 @@ def _run_tool(tool_id, inputs): _run_tool("create_10", create_10_inputs_1) self._assert_file_counts(1, 2, 10, 0) + usage = self.dataset_populator.get_usage_for("ebs") + assert usage["quota_source_label"] == "ebs" + assert usage["total_disk_usage"] == 21 + + usage_list = self.dataset_populator.get_usage() + # assert len(usage_list) == 2 + assert usage_list[0]["quota_source_label"] is None + assert usage_list[0]["total_disk_usage"] == 18 + ebs_usage = [u for u in usage_list if u["quota_source_label"] == "ebs"][0] + assert ebs_usage["quota_source_label"] == "ebs" + assert ebs_usage["total_disk_usage"] == 21, str(usage_list) + # should create 10 files in S3 object store. create_10_inputs_2 = { "__job_resource|__job_resource__select": "yes", diff --git a/test/integration/test_quota.py b/test/integration/test_quota.py index 26ec8965d5ec..fa9a31919796 100644 --- a/test/integration/test_quota.py +++ b/test/integration/test_quota.py @@ -3,6 +3,7 @@ class TestQuotaIntegration(integration_util.IntegrationTestCase): + dataset_populator: DatasetPopulator require_admin_user = True @classmethod @@ -163,6 +164,26 @@ def test_400_when_invalid_amount(self): create_response = self._post("quotas", data=payload, json=True) self._assert_status_code_is(create_response, 400) + def test_quota_source_label_basics(self): + quotas = self.dataset_populator.get_quotas() + prior_quotas_len = len(quotas) + + payload = { + "name": "defaultmylabeledquota1", + "description": "first default quota that is labeled", + "amount": "120MB", + "operation": "=", + "default": "registered", + "quota_source_label": "mylabel", + } + self.dataset_populator.create_quota(payload) + + quotas = self.dataset_populator.get_quotas() + assert len(quotas) == prior_quotas_len + 1 + + labels = [q["quota_source_label"] for q in quotas] + assert "mylabel" in labels + def _create_quota_with_name(self, quota_name: str, is_default: bool = False): payload = self._build_quota_payload_with_name(quota_name, is_default) create_response = self._post("quotas", data=payload, json=True) diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index 5b6ae2daa821..3b57d9bbc0cc 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -22,6 +22,7 @@ get_object_session, ) from galaxy.model.security import GalaxyRBACAgent +from galaxy.objectstore import QuotaSourceMap from galaxy.util.unittest import TestCase datatypes_registry = galaxy.datatypes.registry.Registry() @@ -497,7 +498,7 @@ def test_populated_optimized_list_list_not_populated(self): def test_default_disk_usage(self): u = model.User(email="disk_default@test.com", password="password") self.persist(u) - u.adjust_total_disk_usage(1) + u.adjust_total_disk_usage(1, None) u_id = u.id self.expunge() user_reload = self.model.session.query(model.User).get(u_id) @@ -1132,8 +1133,11 @@ def _workflow_from_steps(user, steps): class MockObjectStore: - def __init__(self): - pass + def __init__(self, quota_source_map=None): + self._quota_source_map = quota_source_map or QuotaSourceMap() + + def get_quota_source_map(self): + return self._quota_source_map def size(self, dataset): return 42 diff --git a/test/unit/data/test_quota.py b/test/unit/data/test_quota.py index 65421f8454b1..ebdc256c5b65 100644 --- a/test/unit/data/test_quota.py +++ b/test/unit/data/test_quota.py @@ -1,26 +1,103 @@ +import uuid + from galaxy import model +from galaxy.objectstore import ( + QuotaSourceInfo, + QuotaSourceMap, +) from galaxy.quota import DatabaseQuotaAgent -from .test_galaxy_mapping import BaseModelTestCase +from .test_galaxy_mapping import ( + BaseModelTestCase, + MockObjectStore, +) -class TestCalculateUsage(BaseModelTestCase): - def test_calculate_usage(self): - u = model.User(email="calc_usage@example.com", password="password") +class TestPurgeUsage(BaseModelTestCase): + def setUp(self): + super().setUp() + model = self.model + u = model.User(email="purge_usage@example.com", password="password") + u.disk_usage = 25 self.persist(u) - h = model.History(name="History for Usage", user=u) + h = model.History(name="History for Purging", user=u) self.persist(h) + self.u = u + self.h = h - d1 = model.HistoryDatasetAssociation( - extension="txt", history=h, create_dataset=True, sa_session=self.model.session + def _setup_dataset(self): + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=self.h, create_dataset=True, sa_session=self.model.session ) d1.dataset.total_size = 10 self.persist(d1) + return d1 + + def test_calculate_usage(self): + d1 = self._setup_dataset() + quota_source_info = QuotaSourceInfo(None, True) + d1.purge_usage_from_quota(self.u, quota_source_info) + self.persist(self.u) + assert int(self.u.disk_usage) == 15 + + def test_calculate_usage_untracked(self): + # test quota tracking off on the objectstore + d1 = self._setup_dataset() + quota_source_info = QuotaSourceInfo(None, False) + d1.purge_usage_from_quota(self.u, quota_source_info) + self.persist(self.u) + assert int(self.u.disk_usage) == 25 + + def test_calculate_usage_per_source(self): + self.u.adjust_total_disk_usage(124, "myquotalabel") + + # test quota tracking with a non-default quota label + d1 = self._setup_dataset() + quota_source_info = QuotaSourceInfo("myquotalabel", True) + d1.purge_usage_from_quota(self.u, quota_source_info) + self.persist(self.u) + assert int(self.u.disk_usage) == 25 - assert u.calculate_disk_usage() == 10 + usages = self.u.dictify_usage() + assert len(usages) == 2 + assert usages[1]["quota_source_label"] == "myquotalabel" + assert usages[1]["total_disk_usage"] == 114 + + +class TestCalculateUsage(BaseModelTestCase): + def setUp(self): + model = self.model + u = model.User(email="calc_usage%s@example.com" % str(uuid.uuid1()), password="password") + self.persist(u) + h = model.History(name="History for Calculated Usage", user=u) + self.persist(h) + self.u = u + self.h = h + + def _add_dataset(self, total_size, object_store_id=None): + model = self.model + d1 = model.HistoryDatasetAssociation( + extension="txt", history=self.h, create_dataset=True, sa_session=self.model.session + ) + d1.dataset.total_size = total_size + d1.dataset.object_store_id = object_store_id + self.persist(d1) + return d1 + + def test_calculate_usage(self): + model = self.model + u = self.u + h = self.h + + d1 = self._add_dataset(10) + + object_store = MockObjectStore() + assert u.calculate_disk_usage_default_source(object_store) == 10 assert u.disk_usage is None - u.calculate_and_set_disk_usage() - assert u.disk_usage == 10 + u.calculate_and_set_disk_usage(object_store) + assert u.calculate_disk_usage_default_source(object_store) == 10 + # method no longer updates user object + # assert u.disk_usage == 10 # Test dataset being in another history doesn't duplicate usage cost. h2 = model.History(name="Second usage history", user=u) @@ -32,7 +109,138 @@ def test_calculate_usage(self): d3 = model.HistoryDatasetAssociation(extension="txt", history=h, dataset=d1.dataset) self.persist(d3) - assert u.calculate_disk_usage() == 10 + assert u.calculate_disk_usage_default_source(object_store) == 10 + + def test_calculate_usage_disabled_quota(self): + u = self.u + + self._add_dataset(10, "not_tracked") + self._add_dataset(15, "tracked") + + quota_source_map = QuotaSourceMap() + not_tracked = QuotaSourceMap() + not_tracked.default_quota_enabled = False + quota_source_map.backends["not_tracked"] = not_tracked + + object_store = MockObjectStore(quota_source_map) + + assert u.calculate_disk_usage_default_source(object_store) == 15 + + def test_calculate_usage_alt_quota(self): + model = self.model + u = self.u + + self._add_dataset(10) + self._add_dataset(15, "alt_source_store") + + quota_source_map = QuotaSourceMap() + alt_source = QuotaSourceMap() + alt_source.default_quota_source = "alt_source" + quota_source_map.backends["alt_source_store"] = alt_source + + object_store = MockObjectStore(quota_source_map) + + u.calculate_and_set_disk_usage(object_store) + model.context.refresh(u) + usages = u.dictify_usage(object_store) + assert len(usages) == 2 + assert usages[0]["quota_source_label"] is None + assert usages[0]["total_disk_usage"] == 10 + + assert usages[1]["quota_source_label"] == "alt_source" + assert usages[1]["total_disk_usage"] == 15 + + usage = u.dictify_usage_for(None) + assert usage["quota_source_label"] is None + assert usage["total_disk_usage"] == 10 + + usage = u.dictify_usage_for("alt_source") + assert usage["quota_source_label"] == "alt_source" + assert usage["total_disk_usage"] == 15 + + usage = u.dictify_usage_for("unused_source") + assert usage["quota_source_label"] == "unused_source" + assert usage["total_disk_usage"] == 0 + + def test_calculate_usage_removes_unused_quota_labels(self): + model = self.model + u = self.u + + self._add_dataset(10) + self._add_dataset(15, "alt_source_store") + + quota_source_map = QuotaSourceMap() + alt_source = QuotaSourceMap() + alt_source.default_quota_source = "alt_source" + quota_source_map.backends["alt_source_store"] = alt_source + + object_store = MockObjectStore(quota_source_map) + + u.calculate_and_set_disk_usage(object_store) + model.context.refresh(u) + usages = u.dictify_usage() + assert len(usages) == 2 + assert usages[0]["quota_source_label"] is None + assert usages[0]["total_disk_usage"] == 10 + + assert usages[1]["quota_source_label"] == "alt_source" + assert usages[1]["total_disk_usage"] == 15 + + alt_source.default_quota_source = "new_alt_source" + u.calculate_and_set_disk_usage(object_store) + model.context.refresh(u) + usages = u.dictify_usage() + assert len(usages) == 2 + assert usages[0]["quota_source_label"] is None + assert usages[0]["total_disk_usage"] == 10 + + assert usages[1]["quota_source_label"] == "new_alt_source" + assert usages[1]["total_disk_usage"] == 15 + + def test_dictify_usage_unused_quota_labels(self): + model = self.model + u = self.u + + self._add_dataset(10) + self._add_dataset(15, "alt_source_store") + + quota_source_map = QuotaSourceMap() + alt_source = QuotaSourceMap() + alt_source.default_quota_source = "alt_source" + quota_source_map.backends["alt_source_store"] = alt_source + + unused_source = QuotaSourceMap() + unused_source.default_quota_source = "unused_source" + quota_source_map.backends["unused_source_store"] = unused_source + + object_store = MockObjectStore(quota_source_map) + u.calculate_and_set_disk_usage(object_store) + model.context.refresh(u) + usages = u.dictify_usage(object_store) + assert len(usages) == 3 + + def test_calculate_usage_default_storage_disabled(self): + model = self.model + u = self.u + + self._add_dataset(10) + self._add_dataset(15, "alt_source_store") + + quota_source_map = QuotaSourceMap(None, False) + alt_source = QuotaSourceMap("alt_source", True) + quota_source_map.backends["alt_source_store"] = alt_source + + object_store = MockObjectStore(quota_source_map) + + u.calculate_and_set_disk_usage(object_store) + model.context.refresh(u) + usages = u.dictify_usage(object_store) + assert len(usages) == 2 + assert usages[0]["quota_source_label"] is None + assert usages[0]["total_disk_usage"] == 0 + + assert usages[1]["quota_source_label"] == "alt_source" + assert usages[1]["total_disk_usage"] == 15 class TestQuota(BaseModelTestCase): @@ -86,6 +294,27 @@ def test_quota(self): self._add_group_quota(u, quota) self._assert_user_quota_is(u, None) + def test_labeled_quota(self): + model = self.model + u = model.User(email="labeled_quota@example.com", password="password") + self.persist(u) + + label1 = "coollabel1" + self._assert_user_quota_is(u, None, label1) + + quota = model.Quota(name="default registered labeled", amount=21, quota_source_label=label1) + self.quota_agent.set_default_quota( + model.DefaultQuotaAssociation.types.REGISTERED, + quota, + ) + + self._assert_user_quota_is(u, 21, label1) + + quota = model.Quota(name="user quota add labeled", amount=31, operation="+", quota_source_label=label1) + self._add_user_quota(u, quota) + + self._assert_user_quota_is(u, 52, label1) + def _add_group_quota(self, user, quota): group = model.Group() uga = model.UserGroupAssociation(user, group) @@ -97,18 +326,56 @@ def _add_user_quota(self, user, quota): user.quotas.append(uqa) self.persist(quota, uqa, user) - def _assert_user_quota_is(self, user, amount): - actual_quota = self.quota_agent.get_quota(user) - assert amount == actual_quota, "Expected quota [%s], got [%s]" % (amount, actual_quota) - if amount is None: - user.total_disk_usage = 1000 - job = model.Job() - job.user = user - assert not self.quota_agent.is_over_quota(None, job, None) - else: - job = model.Job() - job.user = user - user.total_disk_usage = amount - 1 - assert not self.quota_agent.is_over_quota(None, job, None) - user.total_disk_usage = amount + 1 - assert self.quota_agent.is_over_quota(None, job, None) + def _assert_user_quota_is(self, user, amount, quota_source_label=None): + actual_quota = self.quota_agent.get_quota(user, quota_source_label=quota_source_label) + assert amount == actual_quota, f"Expected quota [{amount}], got [{actual_quota}]" + if quota_source_label is None: + if amount is None: + user.total_disk_usage = 1000 + job = self.model.Job() + job.user = user + assert not self.quota_agent.is_over_quota(None, job, None) + else: + job = self.model.Job() + job.user = user + user.total_disk_usage = amount - 1 + assert not self.quota_agent.is_over_quota(None, job, None) + user.total_disk_usage = amount + 1 + assert self.quota_agent.is_over_quota(None, job, None) + + +class TestUsage(BaseModelTestCase): + def test_usage(self): + model = self.model + u = model.User(email="usage@example.com", password="password") + self.persist(u) + + u.adjust_total_disk_usage(123, None) + self.persist(u) + + assert u.get_disk_usage() == 123 + + def test_labeled_usage(self): + model = self.model + u = model.User(email="labeled.usage@example.com", password="password") + self.persist(u) + assert len(u.quota_source_usages) == 0 + + u.adjust_total_disk_usage(123, "foobar") + usages = u.dictify_usage() + assert len(usages) == 1 + + assert u.get_disk_usage() == 0 + assert u.get_disk_usage(quota_source_label="foobar") == 123 + self.model.context.refresh(u) + + usages = u.dictify_usage() + assert len(usages) == 2 + + u.adjust_total_disk_usage(124, "foobar") + self.model.context.refresh(u) + + usages = u.dictify_usage() + assert len(usages) == 2 + assert usages[1]["quota_source_label"] == "foobar" + assert usages[1]["total_disk_usage"] == 247 diff --git a/test/unit/objectstore/test_objectstore.py b/test/unit/objectstore/test_objectstore.py index f880f4f2af41..382bee88d708 100644 --- a/test/unit/objectstore/test_objectstore.py +++ b/test/unit/objectstore/test_objectstore.py @@ -387,11 +387,13 @@ def test_mixed_private(): + + @@ -405,6 +407,8 @@ def test_mixed_private(): type: distributed backends: - id: files1 + quota: + source: 1files type: disk weight: 2 files_dir: "${temp_directory}/files1" @@ -414,6 +418,8 @@ def test_mixed_private(): - type: job_work path: "${temp_directory}/job_working_directory1" - id: files2 + quota: + source: 2files type: disk weight: 1 files_dir: "${temp_directory}/files2" @@ -446,10 +452,45 @@ def test_distributed_store(): _assert_has_keys(as_dict, ["backends", "extra_dirs", "type"]) _assert_key_has_value(as_dict, "type", "distributed") + backends = as_dict["backends"] + assert len(backends) + assert backends[0]["quota"]["source"] == "1files" + assert backends[1]["quota"]["source"] == "2files" + extra_dirs = as_dict["extra_dirs"] assert len(extra_dirs) == 2 +HIERARCHICAL_MUST_HAVE_UNIFIED_QUOTA_SOURCE = """ + + + + + + + + + + + + + + + +""" + + +def test_hiercachical_backend_must_share_quota_source(): + the_exception = None + for config_str in [HIERARCHICAL_MUST_HAVE_UNIFIED_QUOTA_SOURCE]: + try: + with TestConfig(config_str) as (directory, object_store): + pass + except Exception as e: + the_exception = e + assert the_exception is not None + + # Unit testing the cloud and advanced infrastructure object stores is difficult, but # we can at least stub out initializing and test the configuration of these things from # XML and dicts. From d1a2eab8d33f8719ed6a8a7afc864a339fdb4cb9 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 21 Feb 2023 12:51:14 -0500 Subject: [PATCH 05/21] Fix unique constraint add/remove in quota source labels... --- .../versions_gxy/d0583094c8cd_add_quota_source_labels.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py b/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py index 7284fef938b3..867ac31aa43f 100644 --- a/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py +++ b/lib/galaxy/model/migrations/alembic/versions_gxy/d0583094c8cd_add_quota_source_labels.py @@ -17,7 +17,6 @@ from galaxy.model.migrations.util import ( add_unique_constraint, drop_column, - drop_unique_constraint, ) # revision identifiers, used by Alembic. @@ -39,12 +38,12 @@ def upgrade(): Column("disk_usage", Numeric(15, 0)), ) add_unique_constraint("uqsu_unique_label_per_user", "user_quota_source_usage", ["user_id", "quota_source_label"]) - drop_unique_constraint("ix_default_quota_association_type", "default_quota_association") + op.drop_index("ix_default_quota_association_type", "default_quota_association") op.create_index("ix_quota_quota_source_label", "quota", ["quota_source_label"]) def downgrade(): - add_unique_constraint("ix_default_quota_association_type", "default_quota_association", ["type"]) + op.create_index("ix_default_quota_association_type", "default_quota_association", ["type"], unique=True) op.drop_table("user_quota_source_usage") op.drop_index("ix_quota_quota_source_label", "quota") drop_column("quota", "quota_source_label") From c7f200c0798672b3c45a4c3b08b9ffce38428b9b Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 9 Feb 2023 12:58:12 -0500 Subject: [PATCH 06/21] Rebuild client typescript schema. --- client/src/schema/schema.ts | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/client/src/schema/schema.ts b/client/src/schema/schema.ts index ed8ed9a13730..aab777fad7dd 100644 --- a/client/src/schema/schema.ts +++ b/client/src/schema/schema.ts @@ -2083,6 +2083,11 @@ export interface components { * @default = */ operation?: components["schemas"]["QuotaOperation"]; + /** + * Quota Source Label + * @description If set, quota source label to apply this quota operation to. Otherwise, the default quota is used. + */ + quota_source_label?: string; }; /** * CreateQuotaResult @@ -2114,6 +2119,11 @@ export interface components { * @description The name of the quota. This must be unique within a Galaxy instance. */ name: string; + /** + * Quota Source Label + * @description Quota source label + */ + quota_source_label?: string; /** * URL * @deprecated @@ -2475,6 +2485,16 @@ export interface components { * @description The percentage indicating how full the store is. */ percent_used?: number; + /** + * Quota + * @description Information about quota sources around dataset storage. + */ + quota: Record; + /** + * Shareable + * @description Is this dataset shareable. + */ + shareable: boolean; /** * Sources * @description The file sources associated with the supplied dataset instance. @@ -6198,6 +6218,11 @@ export interface components { * @default = */ operation?: components["schemas"]["QuotaOperation"]; + /** + * Quota Source Label + * @description Quota source label + */ + quota_source_label?: string; /** * Users * @description A list of specific users associated with this quota. @@ -6236,6 +6261,11 @@ export interface components { * @description The name of the quota. This must be unique within a Galaxy instance. */ name: string; + /** + * Quota Source Label + * @description Quota source label + */ + quota_source_label?: string; /** * URL * @deprecated @@ -10039,6 +10069,7 @@ export interface operations { * @deprecated * @description Whether to return visible or hidden datasets only. Leave unset for both. */ + /** @description Whether to return only shareable or not shareable datasets. Leave unset for both. */ /** @description View to be passed to the serializer */ /** @description Comma-separated list of keys to be passed to the serializer */ /** @@ -10062,6 +10093,7 @@ export interface operations { types?: string[]; deleted?: boolean; visible?: boolean; + shareable?: boolean; view?: string; keys?: string; q?: string[]; @@ -10848,6 +10880,7 @@ export interface operations { * @deprecated * @description Whether to return visible or hidden datasets only. Leave unset for both. */ + /** @description Whether to return only shareable or not shareable datasets. Leave unset for both. */ /** @description View to be passed to the serializer */ /** @description Comma-separated list of keys to be passed to the serializer */ /** @@ -10871,6 +10904,7 @@ export interface operations { types?: string[]; deleted?: boolean; visible?: boolean; + shareable?: boolean; view?: string; keys?: string; q?: string[]; From dfd4fd1d8729ae1184ea0ac9ae4a3353f9300f38 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 14 Feb 2023 12:00:59 -0500 Subject: [PATCH 07/21] unit test fix --- client/src/components/User/DiskUsage/Quota/QuotaUsageBar.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/components/User/DiskUsage/Quota/QuotaUsageBar.vue b/client/src/components/User/DiskUsage/Quota/QuotaUsageBar.vue index 00667bc95404..d950eb174a30 100644 --- a/client/src/components/User/DiskUsage/Quota/QuotaUsageBar.vue +++ b/client/src/components/User/DiskUsage/Quota/QuotaUsageBar.vue @@ -5,7 +5,7 @@ import { DEFAULT_QUOTA_SOURCE_LABEL, QuotaUsage } from "./model/QuotaUsage"; interface QuotaUsageBarProps { quotaUsage: QuotaUsage; - embedded: boolean; + embedded?: boolean; } const props = withDefaults(defineProps(), { From 6d91292beb65adfbaa697c2ea56c8b5ee1f09e9f Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 14 Feb 2023 12:03:06 -0500 Subject: [PATCH 08/21] Lint fixes... --- lib/galaxy/model/__init__.py | 4 ++-- lib/galaxy/objectstore/__init__.py | 2 +- scripts/cleanup_datasets/pgcleanup.py | 2 +- test/integration/objectstore/test_quota_limit.py | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 65f4447f62b2..36a5cc1ed263 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -581,7 +581,7 @@ def calculate_user_disk_usage_statements(user_id, quota_source_map, for_sqlite=F source = quota_source_map.ids_per_quota_source() # TODO: Merge a lot of these settings together by generating a temp table for # the object_store_id to quota_source_label into a temp table of values - for (quota_source_label, object_store_ids) in source.items(): + for quota_source_label, object_store_ids in source.items(): label_usage = UNIQUE_DATASET_USER_USAGE.format( dataset_condition="dataset.object_store_id IN :include_object_store_ids" ) @@ -948,7 +948,7 @@ def _calculate_or_set_disk_usage(self, object_store): sa_session = object_session(self) for_sqlite = "sqlite" in sa_session.bind.dialect.name statements = calculate_user_disk_usage_statements(self.id, quota_source_map, for_sqlite) - for (sql, args) in statements: + for sql, args in statements: statement = text(sql) binds = [] for key, _ in args.items(): diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 92100b035834..4ea654984222 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -1425,7 +1425,7 @@ def get_id_to_source_pairs(self): def ids_per_quota_source(self): quota_sources: Dict[str, List[str]] = {} - for (object_id, quota_source_label) in self.get_id_to_source_pairs(): + for object_id, quota_source_label in self.get_id_to_source_pairs(): if quota_source_label not in quota_sources: quota_sources[quota_source_label] = [] quota_sources[quota_source_label].append(object_id) diff --git a/scripts/cleanup_datasets/pgcleanup.py b/scripts/cleanup_datasets/pgcleanup.py index 8e7e3eaef1bc..5894bc438e7a 100755 --- a/scripts/cleanup_datasets/pgcleanup.py +++ b/scripts/cleanup_datasets/pgcleanup.py @@ -394,7 +394,7 @@ def recalculate_disk_usage(self): quota_source_map = self.object_store.get_quota_source_map() statements = calculate_user_disk_usage_statements(user_id, quota_source_map) - for (sql, args) in statements: + for sql, args in statements: sql, _ = re.subn(r"\:([\w]+)", r"%(\1)s", sql) new_args = {} for key, val in args.items(): diff --git a/test/integration/objectstore/test_quota_limit.py b/test/integration/objectstore/test_quota_limit.py index b26e002f3057..ad2cd6b1ef38 100644 --- a/test/integration/objectstore/test_quota_limit.py +++ b/test/integration/objectstore/test_quota_limit.py @@ -16,7 +16,6 @@ def handle_galaxy_config_kwds(cls, config): def test_selection_limit(self): with self.dataset_populator.test_history() as history_id: - hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3\n4 5 6\n7 8 9\n") self.dataset_populator.wait_for_history(history_id) hda1_input = {"src": "hda", "id": hda1["id"]} From 4432497c9d0cf24ce6f7a1e0792b24ece36834af Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 14 Feb 2023 12:10:19 -0500 Subject: [PATCH 09/21] Drop nice_total_disk_usage from usage APIs. --- lib/galaxy/managers/users.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/galaxy/managers/users.py b/lib/galaxy/managers/users.py index 84188d906b1b..67639d2423e9 100644 --- a/lib/galaxy/managers/users.py +++ b/lib/galaxy/managers/users.py @@ -655,7 +655,6 @@ def serialize_disk_usage(self, user: model.User) -> List[Dict[str, Any]]: usage["quota_percent"] = self.user_manager.quota(user, quota_source_label=quota_source_label) usage["quota"] = self.user_manager.quota(user, total=True, quota_source_label=quota_source_label) usage["quota_bytes"] = self.user_manager.quota_bytes(user, quota_source_label=quota_source_label) - usage["nice_total_disk_usage"] = util.nice_size(usage["total_disk_usage"]) return rval def serialize_disk_usage_for(self, user: model.User, label: Optional[str]) -> Dict[str, Any]: @@ -664,7 +663,6 @@ def serialize_disk_usage_for(self, user: model.User, label: Optional[str]) -> Di usage["quota_percent"] = self.user_manager.quota(user, quota_source_label=quota_source_label) usage["quota"] = self.user_manager.quota(user, total=True, quota_source_label=quota_source_label) usage["quota_bytes"] = self.user_manager.quota_bytes(user, quota_source_label=quota_source_label) - usage["nice_total_disk_usage"] = util.nice_size(usage["total_disk_usage"]) return usage From 478583486e0c41d1e104787a98a8c8f0e82e402d Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 16 Feb 2023 14:48:15 -0500 Subject: [PATCH 10/21] Improved error message in api_asserts. --- lib/galaxy_test/base/api_asserts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy_test/base/api_asserts.py b/lib/galaxy_test/base/api_asserts.py index 37f2aff59c5f..60e14c7b338f 100644 --- a/lib/galaxy_test/base/api_asserts.py +++ b/lib/galaxy_test/base/api_asserts.py @@ -85,7 +85,7 @@ def assert_error_message_contains(response: Union[Response, dict], expected_cont as_dict = _as_dict(response) assert_has_keys(as_dict, "err_msg") err_msg = as_dict["err_msg"] - assert expected_contains in err_msg + assert expected_contains in err_msg, f"Expected error message [{err_msg}] to contain [{expected_contains}]." def _as_dict(response: Union[Response, dict]) -> Dict[str, Any]: From 017c612d2ee65c879b2b48cc744551eacee355bd Mon Sep 17 00:00:00 2001 From: John Chilton Date: Fri, 17 Jun 2022 12:10:07 -0400 Subject: [PATCH 11/21] Small hacks... are these needed? --- lib/galaxy/objectstore/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 4ea654984222..138f3bd5ab91 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -641,7 +641,7 @@ def _construct_path( hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ - base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path)) + base = os.path.abspath(self.extra_dirs.get(base_dir) or self.file_path) # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): From 46b6d85c67591fdb4368ca5bf12edf6d1718cfe7 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 9 Feb 2023 15:37:09 -0500 Subject: [PATCH 12/21] implement preferred object store id --- .../History/CurrentHistory/HistoryCounter.vue | 147 +++++++---- .../HistorySelectPreferredObjectStore.test.js | 64 +++++ .../HistorySelectPreferredObjectStore.vue | 79 ++++++ ...storyTargetPreferredObjectStorePopover.vue | 48 ++++ .../ObjectStore/DescribeObjectStore.test.js | 3 + .../ObjectStore/DescribeObjectStore.vue | 18 +- .../ObjectStore/ObjectStoreBadge.test.js | 38 +++ .../ObjectStore/ObjectStoreBadge.vue | 191 ++++++++++++++ .../ObjectStore/ObjectStoreBadges.vue | 35 +++ .../ObjectStore/SelectObjectStore.vue | 153 ++++++++++++ .../ShowSelectedObjectStore.test.js | 45 ++++ .../ObjectStore/ShowSelectedObjectStore.vue | 37 +++ .../ObjectStore/adminConfigMixin.js | 15 ++ .../ObjectStore/showTargetPopoverMixin.js | 18 ++ client/src/components/Tool/ToolCard.vue | 55 +++- client/src/components/Tool/ToolForm.vue | 10 + .../Tool/ToolSelectPreferredObjectStore.vue | 48 ++++ .../ToolTargetPreferredObjectStorePopover.vue | 35 +++ .../Quota/ProvidedQuotaSourceUsageBar.vue | 42 ++++ .../User/DiskUsage/Quota/QuotaUsageBar.vue | 10 +- .../src/components/User/UserPreferences.vue | 19 ++ .../User/UserPreferredObjectStore.test.js | 102 ++++++++ .../User/UserPreferredObjectStore.vue | 100 ++++++++ .../Workflow/Run/WorkflowRunFormSimple.vue | 111 ++++++--- .../WorkflowSelectPreferredObjectStore.vue | 48 ++++ .../Run/WorkflowStorageConfiguration.test.js | 65 +++++ .../Run/WorkflowStorageConfiguration.vue | 122 +++++++++ ...kflowTargetPreferredObjectStorePopover.vue | 36 +++ client/src/components/plugins/icons.js | 2 + .../providers/ObjectStoreProvider.js | 16 ++ client/src/utils/navigation/navigation.yml | 12 + client/tests/jest/helpers.js | 4 + lib/galaxy/jobs/__init__.py | 74 +++++- lib/galaxy/managers/configuration.py | 5 +- lib/galaxy/managers/histories.py | 13 + lib/galaxy/managers/users.py | 19 +- lib/galaxy/model/__init__.py | 41 +++ ...9540a051226e_preferred_object_store_ids.py | 33 +++ lib/galaxy/model/unittest_utils/data_app.py | 1 + lib/galaxy/objectstore/__init__.py | 132 +++++++++- .../objectstore/unittest_utils/__init__.py | 1 + lib/galaxy/security/validate_user_input.py | 12 + lib/galaxy/tools/__init__.py | 13 +- lib/galaxy/tools/actions/__init__.py | 2 + lib/galaxy/tools/execute.py | 2 + lib/galaxy/webapps/galaxy/api/object_store.py | 80 ++++++ .../webapps/galaxy/services/datasets.py | 5 + lib/galaxy/webapps/galaxy/services/tools.py | 9 +- lib/galaxy/workflow/run_request.py | 41 +++ lib/galaxy_test/base/populators.py | 22 +- ...ection_with_user_preferred_object_store.py | 235 ++++++++++++++++++ test/unit/objectstore/test_objectstore.py | 100 ++++++++ 52 files changed, 2454 insertions(+), 114 deletions(-) create mode 100644 client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.test.js create mode 100644 client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.vue create mode 100644 client/src/components/History/CurrentHistory/HistoryTargetPreferredObjectStorePopover.vue create mode 100644 client/src/components/ObjectStore/ObjectStoreBadge.test.js create mode 100644 client/src/components/ObjectStore/ObjectStoreBadge.vue create mode 100644 client/src/components/ObjectStore/ObjectStoreBadges.vue create mode 100644 client/src/components/ObjectStore/SelectObjectStore.vue create mode 100644 client/src/components/ObjectStore/ShowSelectedObjectStore.test.js create mode 100644 client/src/components/ObjectStore/ShowSelectedObjectStore.vue create mode 100644 client/src/components/ObjectStore/adminConfigMixin.js create mode 100644 client/src/components/ObjectStore/showTargetPopoverMixin.js create mode 100644 client/src/components/Tool/ToolSelectPreferredObjectStore.vue create mode 100644 client/src/components/Tool/ToolTargetPreferredObjectStorePopover.vue create mode 100644 client/src/components/User/DiskUsage/Quota/ProvidedQuotaSourceUsageBar.vue create mode 100644 client/src/components/User/UserPreferredObjectStore.test.js create mode 100644 client/src/components/User/UserPreferredObjectStore.vue create mode 100644 client/src/components/Workflow/Run/WorkflowSelectPreferredObjectStore.vue create mode 100644 client/src/components/Workflow/Run/WorkflowStorageConfiguration.test.js create mode 100644 client/src/components/Workflow/Run/WorkflowStorageConfiguration.vue create mode 100644 client/src/components/Workflow/Run/WorkflowTargetPreferredObjectStorePopover.vue create mode 100644 client/src/components/providers/ObjectStoreProvider.js create mode 100644 lib/galaxy/model/migrations/alembic/versions_gxy/9540a051226e_preferred_object_store_ids.py create mode 100644 lib/galaxy/webapps/galaxy/api/object_store.py create mode 100644 test/integration/objectstore/test_selection_with_user_preferred_object_store.py diff --git a/client/src/components/History/CurrentHistory/HistoryCounter.vue b/client/src/components/History/CurrentHistory/HistoryCounter.vue index d73e626733e8..5c3a4d8a22c5 100644 --- a/client/src/components/History/CurrentHistory/HistoryCounter.vue +++ b/client/src/components/History/CurrentHistory/HistoryCounter.vue @@ -1,67 +1,106 @@ diff --git a/client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.test.js b/client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.test.js new file mode 100644 index 000000000000..a02300ce0d50 --- /dev/null +++ b/client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.test.js @@ -0,0 +1,64 @@ +import { mount } from "@vue/test-utils"; +import { getLocalVue } from "tests/jest/helpers"; +import HistorySelectPreferredObjectStore from "./HistorySelectPreferredObjectStore"; +import axios from "axios"; +import MockAdapter from "axios-mock-adapter"; +import flushPromises from "flush-promises"; + +const localVue = getLocalVue(true); + +const TEST_ROOT = "/"; +const TEST_HISTORY_ID = "myTestHistoryId"; + +const TEST_HISTORY = { + id: TEST_HISTORY_ID, + preferred_object_store_id: null, +}; + +function mountComponent() { + const wrapper = mount(HistorySelectPreferredObjectStore, { + propsData: { userPreferredObjectStoreId: null, history: TEST_HISTORY, root: TEST_ROOT }, + localVue, + }); + return wrapper; +} + +import { ROOT_COMPONENT } from "utils/navigation"; + +const OBJECT_STORES = [ + { object_store_id: "object_store_1", badges: [], quota: { enabled: false } }, + { object_store_id: "object_store_2", badges: [], quota: { enabled: false } }, +]; + +describe("HistorySelectPreferredObjectStore.vue", () => { + let axiosMock; + + beforeEach(async () => { + axiosMock = new MockAdapter(axios); + axiosMock.onGet("/api/object_store?selectable=true").reply(200, OBJECT_STORES); + }); + + afterEach(async () => { + axiosMock.restore(); + }); + + it("updates object store to default on selection null", async () => { + const wrapper = mountComponent(); + await flushPromises(); + const els = wrapper.findAll(ROOT_COMPONENT.preferences.object_store_selection.option_buttons.selector); + expect(els.length).toBe(3); + const galaxyDefaultOption = wrapper.find( + ROOT_COMPONENT.preferences.object_store_selection.option_button({ object_store_id: "__null__" }).selector + ); + expect(galaxyDefaultOption.exists()).toBeTruthy(); + axiosMock + .onPut(`/api/histories/${TEST_HISTORY_ID}`, expect.objectContaining({ preferred_object_store_id: null })) + .reply(202); + await galaxyDefaultOption.trigger("click"); + await flushPromises(); + const errorEl = wrapper.find(".object-store-selection-error"); + expect(errorEl.exists()).toBeFalsy(); + const emitted = wrapper.emitted(); + expect(emitted["updated"][0][0]).toEqual(null); + }); +}); diff --git a/client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.vue b/client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.vue new file mode 100644 index 000000000000..622329363faf --- /dev/null +++ b/client/src/components/History/CurrentHistory/HistorySelectPreferredObjectStore.vue @@ -0,0 +1,79 @@ + + + diff --git a/client/src/components/History/CurrentHistory/HistoryTargetPreferredObjectStorePopover.vue b/client/src/components/History/CurrentHistory/HistoryTargetPreferredObjectStorePopover.vue new file mode 100644 index 000000000000..a3f047f539d7 --- /dev/null +++ b/client/src/components/History/CurrentHistory/HistoryTargetPreferredObjectStorePopover.vue @@ -0,0 +1,48 @@ + + + diff --git a/client/src/components/ObjectStore/DescribeObjectStore.test.js b/client/src/components/ObjectStore/DescribeObjectStore.test.js index c81d8068f083..1c1acff73753 100644 --- a/client/src/components/ObjectStore/DescribeObjectStore.test.js +++ b/client/src/components/ObjectStore/DescribeObjectStore.test.js @@ -8,18 +8,21 @@ const localVue = getLocalVue(); const TEST_STORAGE_API_RESPONSE_WITHOUT_ID = { object_store_id: null, private: false, + badges: [], }; const TEST_RENDERED_MARKDOWN_AS_HTML = "

My cool markdown\n"; const TEST_STORAGE_API_RESPONSE_WITH_ID = { object_store_id: "foobar", private: false, + badges: [], }; const TEST_STORAGE_API_RESPONSE_WITH_NAME = { object_store_id: "foobar", name: "my cool storage", description: "My cool **markdown**", private: true, + badges: [], }; // works fine without mocking but I guess it is more JS unit-y with the mock? diff --git a/client/src/components/ObjectStore/DescribeObjectStore.vue b/client/src/components/ObjectStore/DescribeObjectStore.vue index fa2222ec53e9..475f583957e6 100644 --- a/client/src/components/ObjectStore/DescribeObjectStore.vue +++ b/client/src/components/ObjectStore/DescribeObjectStore.vue @@ -14,6 +14,7 @@ the default configured Galaxy object store . + diff --git a/client/src/components/ObjectStore/ObjectStoreBadge.test.js b/client/src/components/ObjectStore/ObjectStoreBadge.test.js new file mode 100644 index 000000000000..366127991bad --- /dev/null +++ b/client/src/components/ObjectStore/ObjectStoreBadge.test.js @@ -0,0 +1,38 @@ +import { mount } from "@vue/test-utils"; +import { getLocalVue } from "tests/jest/helpers"; +import ObjectStoreBadge from "./ObjectStoreBadge"; +import { ROOT_COMPONENT } from "utils/navigation"; + +const localVue = getLocalVue(true); + +const TEST_MESSAGE = "a test message provided by backend"; + +describe("ObjectStoreBadge", () => { + let wrapper; + + function mountBadge(badge) { + wrapper = mount(ObjectStoreBadge, { + propsData: { badge }, + localVue, + stubs: { "b-popover": true }, + }); + } + + it("should render a valid badge for more_secure type", async () => { + mountBadge({ type: "more_secure", message: TEST_MESSAGE }); + const selector = ROOT_COMPONENT.object_store_details.badge_of_type({ type: "more_secure" }).selector; + const iconEl = wrapper.find(selector); + expect(iconEl.exists()).toBeTruthy(); + expect(wrapper.vm.message).toContain(TEST_MESSAGE); + expect(wrapper.vm.stockMessage).toContain("more secure by the Galaxy adminstrator"); + }); + + it("should render a valid badge for less_secure type", async () => { + mountBadge({ type: "less_secure", message: TEST_MESSAGE }); + const selector = ROOT_COMPONENT.object_store_details.badge_of_type({ type: "less_secure" }).selector; + const iconEl = wrapper.find(selector); + expect(iconEl.exists()).toBeTruthy(); + expect(wrapper.vm.message).toContain(TEST_MESSAGE); + expect(wrapper.vm.stockMessage).toContain("less secure by the Galaxy adminstrator"); + }); +}); diff --git a/client/src/components/ObjectStore/ObjectStoreBadge.vue b/client/src/components/ObjectStore/ObjectStoreBadge.vue new file mode 100644 index 000000000000..d6f7c086cd1b --- /dev/null +++ b/client/src/components/ObjectStore/ObjectStoreBadge.vue @@ -0,0 +1,191 @@ + + + + + diff --git a/client/src/components/ObjectStore/ObjectStoreBadges.vue b/client/src/components/ObjectStore/ObjectStoreBadges.vue new file mode 100644 index 000000000000..1d6d8e21a149 --- /dev/null +++ b/client/src/components/ObjectStore/ObjectStoreBadges.vue @@ -0,0 +1,35 @@ + + + diff --git a/client/src/components/ObjectStore/SelectObjectStore.vue b/client/src/components/ObjectStore/SelectObjectStore.vue new file mode 100644 index 000000000000..7b8aae417bf9 --- /dev/null +++ b/client/src/components/ObjectStore/SelectObjectStore.vue @@ -0,0 +1,153 @@ + + + diff --git a/client/src/components/ObjectStore/ShowSelectedObjectStore.test.js b/client/src/components/ObjectStore/ShowSelectedObjectStore.test.js new file mode 100644 index 000000000000..142dc91f0b76 --- /dev/null +++ b/client/src/components/ObjectStore/ShowSelectedObjectStore.test.js @@ -0,0 +1,45 @@ +import { mount } from "@vue/test-utils"; +import { getLocalVue } from "tests/jest/helpers"; +import ShowSelectedObjectStore from "./ShowSelectedObjectStore"; +import axios from "axios"; +import MockAdapter from "axios-mock-adapter"; +import flushPromises from "flush-promises"; + +const localVue = getLocalVue(true); +const TEST_OBJECT_ID = "os123"; +const OBJECT_STORE_DATA = { + object_store_id: TEST_OBJECT_ID, + badges: [], +}; + +describe("ShowSelectedObjectStore", () => { + let wrapper; + let axiosMock; + + beforeEach(async () => { + axiosMock = new MockAdapter(axios); + }); + + afterEach(async () => { + axiosMock.restore(); + }); + + it("should show a loading message and then a DescribeObjectStore component", async () => { + axiosMock.onGet(`/api/object_store/${TEST_OBJECT_ID}`).reply(200, OBJECT_STORE_DATA); + wrapper = mount(ShowSelectedObjectStore, { + propsData: { preferredObjectStoreId: TEST_OBJECT_ID, forWhat: "Data goes into..." }, + localVue, + stubs: { + LoadingSpan: true, + DescribeObjectStore: true, + }, + }); + let loadingEl = wrapper.find("loadingspan-stub"); + expect(loadingEl.exists()).toBeTruthy(); + expect(loadingEl.attributes("message")).toBeLocalizationOf("Loading object store details"); + await flushPromises(); + loadingEl = wrapper.find("loadingspan-stub"); + expect(loadingEl.exists()).toBeFalsy(); + expect(wrapper.find("describeobjectstore-stub").exists()).toBeTruthy(); + }); +}); diff --git a/client/src/components/ObjectStore/ShowSelectedObjectStore.vue b/client/src/components/ObjectStore/ShowSelectedObjectStore.vue new file mode 100644 index 000000000000..a0a2e9cf938f --- /dev/null +++ b/client/src/components/ObjectStore/ShowSelectedObjectStore.vue @@ -0,0 +1,37 @@ + + + diff --git a/client/src/components/ObjectStore/adminConfigMixin.js b/client/src/components/ObjectStore/adminConfigMixin.js new file mode 100644 index 000000000000..70f265a941ab --- /dev/null +++ b/client/src/components/ObjectStore/adminConfigMixin.js @@ -0,0 +1,15 @@ +import MarkdownIt from "markdown-it"; + +export default { + methods: { + adminMarkup(markup) { + let markupHtml; + if (markup) { + markupHtml = MarkdownIt({ html: true }).render(markup); + } else { + markupHtml = null; + } + return markupHtml; + }, + }, +}; diff --git a/client/src/components/ObjectStore/showTargetPopoverMixin.js b/client/src/components/ObjectStore/showTargetPopoverMixin.js new file mode 100644 index 000000000000..4fc88ce24863 --- /dev/null +++ b/client/src/components/ObjectStore/showTargetPopoverMixin.js @@ -0,0 +1,18 @@ +import ShowSelectedObjectStore from "./ShowSelectedObjectStore"; + +export default { + components: { + ShowSelectedObjectStore, + }, + props: { + titleSuffix: { + type: String, + default: null, + }, + }, + computed: { + title() { + return this.l(`Preferred Target Object Store ${this.titleSuffix || ""}`); + }, + }, +}; diff --git a/client/src/components/Tool/ToolCard.vue b/client/src/components/Tool/ToolCard.vue index dde944c12025..f877b14af41c 100644 --- a/client/src/components/Tool/ToolCard.vue +++ b/client/src/components/Tool/ToolCard.vue @@ -6,6 +6,9 @@ import ToolOptionsButton from "components/Tool/Buttons/ToolOptionsButton.vue"; import ToolFooter from "components/Tool/ToolFooter"; import ToolHelp from "components/Tool/ToolHelp"; import Heading from "components/Common/Heading"; +import ToolSelectPreferredObjectStore from "./ToolSelectPreferredObjectStore"; +import ToolTargetPreferredObjectStorePopover from "./ToolTargetPreferredObjectStorePopover"; +import { getAppRoot } from "onload/loadConfig"; import { computed, ref, watch } from "vue"; import { useCurrentUser } from "composables/user"; @@ -45,9 +48,17 @@ const props = defineProps({ type: Boolean, default: false, }, + allowObjectStoreSelection: { + type: Boolean, + default: false, + }, + preferredObjectStoreId: { + type: String, + default: null, + }, }); -const emit = defineEmits(["onChangeVersion"]); +const emit = defineEmits(["onChangeVersion", "updatePreferredObjectStoreId"]); function onChangeVersion(v) { emit("onChangeVersion", v); @@ -68,9 +79,22 @@ function onSetError(e) { const { currentUser: user } = useCurrentUser(false, true); const hasUser = computed(() => !user.value.isAnonymous); - const versions = computed(() => props.options.versions); const showVersions = computed(() => props.options.versions?.length > 1); + +const root = computed(() => getAppRoot()); +const showPreferredObjectStoreModal = ref(false); +const toolPreferredObjectStoreId = ref(props.preferredObjectStoreId); + +function onShowObjectStoreSelect() { + showPreferredObjectStoreModal.value = true; +} + +function onUpdatePreferredObjectStoreId(selectedToolPreferredObjectStoreId) { + showPreferredObjectStoreModal.value = false; + toolPreferredObjectStoreId.value = selectedToolPreferredObjectStoreId; + emit("updatePreferredObjectStoreId", selectedToolPreferredObjectStoreId); +}