From eb1649e30eccee7ba2b6b8c75448be5afcefaff0 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 9 Jun 2022 10:33:37 -0400 Subject: [PATCH] private objectstores & dataset.sharable Setup abstractions to prevent sharing transient or private per-user objects in an objectstore. --- .../DatasetStorage/DatasetStorage.test.js | 8 +- .../Dataset/DatasetStorage/DatasetStorage.vue | 19 ++- .../ObjectStoreRestrictionSpan.test.js | 27 ++++ .../ObjectStoreRestrictionSpan.vue | 38 ++++++ client/src/mvc/library/library-model.js | 2 +- lib/galaxy/job_execution/output_collect.py | 2 +- lib/galaxy/jobs/__init__.py | 4 +- lib/galaxy/jobs/runners/__init__.py | 2 + lib/galaxy/model/__init__.py | 47 ++++++- lib/galaxy/model/security.py | 54 ++++++-- lib/galaxy/objectstore/__init__.py | 106 ++++++++++++--- lib/galaxy/objectstore/azure_blob.py | 1 + lib/galaxy/objectstore/cloud.py | 1 + lib/galaxy/objectstore/irods.py | 2 + lib/galaxy/objectstore/pithos.py | 2 + lib/galaxy/objectstore/s3.py | 2 + lib/galaxy/tools/actions/upload_common.py | 4 +- .../webapps/galaxy/api/history_contents.py | 8 ++ .../webapps/galaxy/controllers/dataset.py | 7 +- .../webapps/galaxy/services/datasets.py | 4 + .../galaxy/services/history_contents.py | 14 ++ .../api/test_dataset_collections.py | 2 +- lib/galaxy_test/api/test_libraries.py | 1 + lib/galaxy_test/base/populators.py | 26 +++- .../objectstore/test_private_handling.py | 51 +++++++ test/unit/data/test_galaxy_mapping.py | 128 ++++++++++++++++-- test/unit/objectstore/test_objectstore.py | 57 +++++++- 27 files changed, 560 insertions(+), 59 deletions(-) create mode 100644 client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js create mode 100644 client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue create mode 100644 test/integration/objectstore/test_private_handling.py diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js index 6b34492d590a..229c7e170c0c 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.test.js @@ -10,14 +10,17 @@ const localVue = getLocalVue(); const TEST_STORAGE_API_RESPONSE_WITHOUT_ID = { object_store_id: null, + private: false, }; const TEST_STORAGE_API_RESPONSE_WITH_ID = { object_store_id: "foobar", + private: false, }; const TEST_STORAGE_API_RESPONSE_WITH_NAME = { object_store_id: "foobar", name: "my cool storage", description: "My cool **markdown**", + private: true, }; const TEST_DATASET_ID = "1"; const TEST_STORAGE_URL = `/api/datasets/${TEST_DATASET_ID}/storage`; @@ -46,9 +49,6 @@ describe("Dataset Storage", () => { wrapper = shallowMount(DatasetStorage, { propsData: { datasetId: TEST_DATASET_ID }, localVue, - stubs: { - "loading-span": true, - }, }); } @@ -102,6 +102,7 @@ describe("Dataset Storage", () => { expect(byIdSpan.length).toBe(1); const byNameSpan = wrapper.findAll(".display-os-by-name"); expect(byNameSpan.length).toBe(0); + expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeFalsy(); }); it("test dataset storage with object store name", async () => { @@ -116,6 +117,7 @@ describe("Dataset Storage", () => { expect(byIdSpan.length).toBe(0); const byNameSpan = wrapper.findAll(".display-os-by-name"); expect(byNameSpan.length).toBe(1); + expect(wrapper.find("object-store-restriction-span-stub").props("isPrivate")).toBeTruthy(); }); afterEach(() => { diff --git a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue index 4d3e47d04760..4b8b6de0523e 100644 --- a/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue +++ b/client/src/components/Dataset/DatasetStorage/DatasetStorage.vue @@ -18,13 +18,17 @@

This dataset is stored in - - a Galaxy object store named {{ storageInfo.name }} + + a Galaxy object store named + {{ storageInfo.name }} - - a Galaxy object store with id {{ storageInfo.object_store_id }} + + a Galaxy object store with id + {{ storageInfo.object_store_id }} - the default configured Galaxy object store . + + the default configured Galaxy object store .

@@ -37,10 +41,12 @@ import { getAppRoot } from "onload/loadConfig"; import LoadingSpan from "components/LoadingSpan"; import MarkdownIt from "markdown-it"; import { errorMessageAsString } from "utils/simple-error"; +import ObjectStoreRestrictionSpan from "./ObjectStoreRestrictionSpan"; export default { components: { LoadingSpan, + ObjectStoreRestrictionSpan, }, props: { datasetId: { @@ -80,6 +86,9 @@ export default { } return rootSources[0].source_uri; }, + isPrivate() { + return this.storageInfo.private; + }, }, created() { const datasetId = this.datasetId; diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js new file mode 100644 index 000000000000..0f199a2a8f51 --- /dev/null +++ b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.test.js @@ -0,0 +1,27 @@ +import { shallowMount } from "@vue/test-utils"; +import { getLocalVue } from "jest/helpers"; +import ObjectStoreRestrictionSpan from "./ObjectStoreRestrictionSpan"; + +const localVue = getLocalVue(); + +describe("ObjectStoreRestrictionSpan", () => { + let wrapper; + + it("should render info about private storage if isPrivate", () => { + wrapper = shallowMount(ObjectStoreRestrictionSpan, { + propsData: { isPrivate: true }, + localVue, + }); + expect(wrapper.find(".stored-how").text()).toBe("private"); + expect(wrapper.find(".stored-how").attributes("title")).toBeTruthy(); + }); + + it("should render info about unrestricted storage if not isPrivate", () => { + wrapper = shallowMount(ObjectStoreRestrictionSpan, { + propsData: { isPrivate: false }, + localVue, + }); + expect(wrapper.find(".stored-how").text()).toBe("unrestricted"); + expect(wrapper.find(".stored-how").attributes("title")).toBeTruthy(); + }); +}); diff --git a/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue new file mode 100644 index 000000000000..29d313a72142 --- /dev/null +++ b/client/src/components/Dataset/DatasetStorage/ObjectStoreRestrictionSpan.vue @@ -0,0 +1,38 @@ + + + + + diff --git a/client/src/mvc/library/library-model.js b/client/src/mvc/library/library-model.js index e81e344c49c4..3220e627de1b 100644 --- a/client/src/mvc/library/library-model.js +++ b/client/src/mvc/library/library-model.js @@ -174,7 +174,7 @@ var HistoryContents = Backbone.Collection.extend({ this.id = options.id; }, url: function () { - return `${this.urlRoot + this.id}/contents`; + return `${this.urlRoot + this.id}/contents?sharable=true`; }, model: HistoryItem, }); diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index 7fb773c62d1f..7bbbf851256d 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -341,7 +341,7 @@ def add_library_dataset_to_folder(self, library_folder, ld): trans.app.security_agent.copy_library_permissions(trans, ld, ldda) # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset trans.app.security_agent.set_all_dataset_permissions( - ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user) + ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), flush=False, new=True ) library_folder.add_library_dataset(ld, genome_build=ldda.dbkey) trans.sa_session.add(library_folder) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 250e266643aa..abdb5dd8a13f 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1584,6 +1584,8 @@ def _set_object_store_ids(self, job): object_store_populator = ObjectStorePopulator(self.app, job.user) object_store_id = self.get_destination_configuration("object_store_id", None) + require_sharable = job.requires_sharable_storage(self.app.security_agent) + if object_store_id: object_store_populator.object_store_id = object_store_id @@ -1595,7 +1597,7 @@ def _set_object_store_ids(self, job): # afterward. State below needs to happen the same way. for dataset_assoc in job.output_datasets + job.output_library_datasets: dataset = dataset_assoc.dataset - object_store_populator.set_object_store_id(dataset) + object_store_populator.set_object_store_id(dataset, require_sharable=require_sharable) job.object_store_id = object_store_populator.object_store_id self._setup_working_directory(job=job) diff --git a/lib/galaxy/jobs/runners/__init__.py b/lib/galaxy/jobs/runners/__init__.py index de4ff3c0c375..86a7cf8bbf0d 100644 --- a/lib/galaxy/jobs/runners/__init__.py +++ b/lib/galaxy/jobs/runners/__init__.py @@ -167,6 +167,8 @@ def put(self, job_wrapper): """Add a job to the queue (by job identifier), indicate that the job is ready to run.""" put_timer = ExecutionTimer() queue_job = job_wrapper.enqueue() + # TODO: @jmchilton rebasing on a branch that had above line wrapped in an exception + # handler that would call fail on job_wrapper. Needed? if queue_job: self.mark_as_queued(job_wrapper) log.debug(f"Job [{job_wrapper.job_id}] queued {put_timer}") diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index a4fc9fc40d8c..80b841ccfc41 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -172,6 +172,7 @@ # Tags that get automatically propagated from inputs to outputs when running jobs. AUTO_PROPAGATED_TAGS = ["name"] YIELD_PER_ROWS = 100 +CANNOT_SHARE_PRIVATE_DATASET_MESSAGE = "Attempting to share a non-sharable dataset." if TYPE_CHECKING: @@ -1470,6 +1471,19 @@ def remap_objects(p, k, obj): job_attrs["params"] = params_dict return job_attrs + def requires_sharable_storage(self, security_agent): + # An easy optimization would be to calculate this in galaxy.tools.actions when the + # job is created and all the output permissions are already known. Having to reload + # these permissions in the job code shouldn't strictly be needed. + + requires_sharing = False + for dataset_assoc in self.output_datasets + self.output_library_datasets: + if not security_agent.dataset_is_private_to_a_user(dataset_assoc.dataset.dataset): + requires_sharing = True + break + + return requires_sharing + def to_dict(self, view="collection", system_details=False): if view == "admin_job_list": rval = super().to_dict(view="collection") @@ -2979,7 +2993,14 @@ def __filter_contents(self, content_class, **kwds): visible = galaxy.util.string_as_bool_or_none(kwds.get("visible", None)) if visible is not None: query = query.filter(content_class.visible == visible) + if "object_store_ids" in kwds: + if content_class == HistoryDatasetAssociation: + query = query.join(content_class.dataset).filter( + Dataset.table.c.object_store_id.in_(kwds.get("object_store_ids")) + ) + # else ignoring object_store_ids on HDCAs... if "ids" in kwds: + assert "object_store_ids" not in kwds ids = kwds["ids"] max_in_filter_length = kwds.get("max_in_filter_length", MAX_IN_FILTER_LENGTH) if len(ids) < max_in_filter_length: @@ -3485,14 +3506,27 @@ def is_new(self): def in_ready_state(self): return self.state in self.ready_states + @property + def sharable(self): + """Return True if placed into an objectstore not labeled as ``private``.""" + if self.external_filename: + return True + else: + object_store = self._assert_object_store_set() + return not object_store.is_private(self) + + def ensure_sharable(self): + if not self.sharable: + raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) + def get_file_name(self): if self.purged: log.warning(f"Attempt to get file name of purged dataset {self.id}") return "" if not self.external_filename: - assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" - if self.object_store.exists(self): - file_name = self.object_store.get_filename(self) + object_store = self._assert_object_store_set() + if object_store.exists(self): + file_name = object_store.get_filename(self) else: file_name = "" if not file_name and self.state not in (self.states.NEW, self.states.QUEUED): @@ -3513,6 +3547,10 @@ def set_file_name(self, filename): file_name = property(get_file_name, set_file_name) + def _assert_object_store_set(self): + assert self.object_store is not None, f"Object Store has not been initialized for dataset {self.id}" + return self.object_store + def get_extra_files_path(self): # Unlike get_file_name - external_extra_files_path is not backed by an # actual database column so if SA instantiates this object - the @@ -4553,6 +4591,9 @@ def to_library_dataset_dataset_association( """ Copy this HDA to a library optionally replacing an existing LDDA. """ + if not self.dataset.sharable: + raise Exception("Attempting to share a non-sharable dataset.") + if replace_dataset: # The replace_dataset param ( when not None ) refers to a LibraryDataset that # is being replaced with a new version. diff --git a/lib/galaxy/model/security.py b/lib/galaxy/model/security.py index 738db0b548c3..11509d13addb 100644 --- a/lib/galaxy/model/security.py +++ b/lib/galaxy/model/security.py @@ -899,16 +899,23 @@ def set_all_dataset_permissions(self, dataset, permissions=None, new=False, flus # Make sure that DATASET_MANAGE_PERMISSIONS is associated with at least 1 role has_dataset_manage_permissions = False permissions = permissions or {} - for action, roles in permissions.items(): - if isinstance(action, Action): - if action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS and roles: - has_dataset_manage_permissions = True - break - elif action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS.action and roles: - has_dataset_manage_permissions = True - break + for _ in _walk_action_roles(permissions, self.permitted_actions.DATASET_MANAGE_PERMISSIONS): + has_dataset_manage_permissions = True + break if not has_dataset_manage_permissions: return "At least 1 role must be associated with manage permissions on this dataset." + + # If this is new, the objectstore likely hasn't been set yet - defer check until + # the job handler assigns it. + if not new and not dataset.sharable: + # ensure dataset not shared. + dataset_access_roles = [] + for _, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_ACCESS): + dataset_access_roles.extend(roles) + + if len(dataset_access_roles) != 1 or dataset_access_roles[0].type != self.model.Role.types.PRIVATE: + return galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE + flush_needed = False # Delete all of the current permissions on the dataset if not new: @@ -937,6 +944,12 @@ def set_dataset_permission(self, dataset, permission=None): Permission looks like: { Action.action : [ Role, Role ] } """ permission = permission or {} + + # if modifying access - ensure it is sharable. + for _ in _walk_action_roles(permission, self.permitted_actions.DATASET_ACCESS): + dataset.ensure_sharable() + break + flush_needed = False for action, roles in permission.items(): if isinstance(action, Action): @@ -976,6 +989,7 @@ def copy_dataset_permissions(self, src, dst): self.set_all_dataset_permissions(dst, self.get_permissions(src)) def privately_share_dataset(self, dataset, users=None): + dataset.ensure_sharable() intersect = None users = users or [] for user in users: @@ -1154,6 +1168,19 @@ def dataset_is_private_to_user(self, trans, dataset): else: return False + def dataset_is_private_to_a_user(self, dataset): + """ + If the Dataset object has exactly one access role and that is + the current user's private role then we consider the dataset private. + """ + access_roles = dataset.get_access_roles(self) + + if len(access_roles) != 1: + return False + else: + access_role = access_roles[0] + return access_role.type == self.model.Role.types.PRIVATE + def datasets_are_public(self, trans, datasets): """ Given a transaction object and a list of Datasets, return @@ -1188,6 +1215,8 @@ def datasets_are_public(self, trans, datasets): def make_dataset_public(self, dataset): # A dataset is considered public if there are no "access" actions associated with it. Any # other actions ( 'manage permissions', 'edit metadata' ) are irrelevant. + dataset.ensure_sharable() + flush_needed = False for dp in dataset.actions: if dp.action == self.permitted_actions.DATASET_ACCESS.action: @@ -1632,3 +1661,12 @@ def set_dataset_permissions(self, hda, user, site): hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization(hda=hda, user=user, site=site) self.sa_session.add(hdadaa) self.sa_session.flush() + + +def _walk_action_roles(permissions, query_action): + for action, roles in permissions.items(): + if isinstance(action, Action): + if action == query_action and roles: + yield action, roles + elif action == query_action.action and roles: + yield action, roles diff --git a/lib/galaxy/objectstore/__init__.py b/lib/galaxy/objectstore/__init__.py index 62c531ffaaea..3e0ae1adb466 100644 --- a/lib/galaxy/objectstore/__init__.py +++ b/lib/galaxy/objectstore/__init__.py @@ -42,6 +42,7 @@ NO_SESSION_ERROR_MESSAGE = ( "Attempted to 'create' object store entity in configuration with no database session present." ) +DEFAULT_PRIVATE = False log = logging.getLogger(__name__) @@ -104,6 +105,9 @@ def create( This method will create a proper directory structure for the file if the directory does not already exist. + + The method returns the concrete objectstore the supplied object is stored + in. """ raise NotImplementedError() @@ -243,6 +247,19 @@ def get_concrete_store_description_markdown(self, obj): yet been set, this may return ``None``. """ + @abc.abstractmethod + def is_private(self, obj): + """Return True iff supplied object is stored in private ConcreteObjectStore.""" + + def object_store_ids(self, private=None): + """Return IDs of all concrete object stores - either private ones or non-private ones. + + This should just return an empty list for non-DistributedObjectStore object stores, + i.e. concrete objectstores and the HierarchicalObjectStore since these do not + use the object_store_id column for objects (Galaxy Datasets). + """ + return [] + @abc.abstractmethod def get_store_usage_percent(self): """Return the percentage indicating how full the store is.""" @@ -317,10 +334,11 @@ def to_dict(self): extra_dirs = [] for extra_dir_type, extra_dir_path in self.extra_dirs.items(): extra_dirs.append({"type": extra_dir_type, "path": extra_dir_path}) + store_type = self.store_type return { "config": config_to_dict(self.config), "extra_dirs": extra_dirs, - "type": self.store_type, + "type": store_type, } def _get_object_id(self, obj): @@ -377,6 +395,16 @@ def get_store_usage_percent(self): def get_store_by(self, obj, **kwargs): return self._invoke("get_store_by", obj, **kwargs) + def is_private(self, obj): + return self._invoke("is_private", obj) + + @classmethod + def parse_private_from_config_xml(clazz, config_xml): + private = DEFAULT_PRIVATE + if config_xml is not None: + private = asbool(config_xml.attrib.get("private", DEFAULT_PRIVATE)) + return private + class ConcreteObjectStore(BaseObjectStore): """Subclass of ObjectStore for stores that don't delegate (non-nested). @@ -404,9 +432,12 @@ def __init__(self, config, config_dict=None, **kwargs): self.store_by = config_dict.get("store_by", None) or getattr(config, "object_store_store_by", "id") self.name = config_dict.get("name", None) self.description = config_dict.get("description", None) + # Annotate this as true to prevent sharing of data. + self.private = config_dict.get("private", DEFAULT_PRIVATE) def to_dict(self): rval = super().to_dict() + rval["private"] = self.private rval["store_by"] = self.store_by rval["name"] = self.name rval["description"] = self.description @@ -421,6 +452,9 @@ def _get_concrete_store_description_markdown(self, obj): def _get_store_by(self, obj): return self.store_by + def _is_private(self, obj): + return self.private + class DiskObjectStore(ConcreteObjectStore): """ @@ -433,7 +467,7 @@ class DiskObjectStore(ConcreteObjectStore): >>> file_path=tempfile.mkdtemp() >>> obj = Bunch(id=1) >>> s = DiskObjectStore(Bunch(umask=0o077, jobs_directory=file_path, new_file_path=file_path, object_store_check_old_style=False), dict(files_dir=file_path)) - >>> s.create(obj) + >>> o = s.create(obj) >>> s.exists(obj) True >>> assert s.get_filename(obj) == file_path + '/000/dataset_1.dat' @@ -480,6 +514,7 @@ def parse_xml(clazz, config_xml): extra_dirs.append({"type": e.get("type"), "path": e.get("path")}) config_dict["extra_dirs"] = extra_dirs + config_dict["private"] = BaseObjectStore.parse_private_from_config_xml(config_xml) return config_dict def to_dict(self): @@ -619,6 +654,7 @@ def _create(self, obj, **kwargs): if not dir_only: open(path, "w").close() # Should be rb? umask_fix_perms(path, self.config.umask, 0o666) + return self def _empty(self, obj, **kwargs): """Override `ObjectStore`'s stub by checking file size on disk.""" @@ -753,7 +789,8 @@ def file_ready(self, obj, **kwargs): def _create(self, obj, **kwargs): """Create a backing file in a random backend.""" - random.choice(list(self.backends.values())).create(obj, **kwargs) + objectstore = random.choice(list(self.backends.values())) + return objectstore.create(obj, **kwargs) def _empty(self, obj, **kwargs): """For the first backend that has this `obj`, determine if it is empty.""" @@ -792,6 +829,9 @@ def _get_concrete_store_name(self, obj): def _get_concrete_store_description_markdown(self, obj): return self._call_method("_get_concrete_store_description_markdown", obj, None, False) + def _is_private(self, obj): + return self._call_method("_is_private", obj, ObjectNotFound, True) + def _get_store_by(self, obj): return self._call_method("_get_store_by", obj, None, False) @@ -966,25 +1006,28 @@ def __filesystem_monitor(self, sleeper: Sleeper): def _create(self, obj, **kwargs): """The only method in which obj.object_store_id may be None.""" - if obj.object_store_id is None or not self._exists(obj, **kwargs): - if obj.object_store_id is None or obj.object_store_id not in self.backends: + object_store_id = obj.object_store_id + if object_store_id is None or not self._exists(obj, **kwargs): + if object_store_id is None or object_store_id not in self.backends: try: - obj.object_store_id = random.choice(self.weighted_backend_ids) + object_store_id = random.choice(self.weighted_backend_ids) + obj.object_store_id = object_store_id except IndexError: raise ObjectInvalid( "objectstore.create, could not generate " - "obj.object_store_id: %s, kwargs: %s" % (str(obj), str(kwargs)) + "object_store_id: %s, kwargs: %s" % (str(obj), str(kwargs)) ) log.debug( - "Selected backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id) + "Selected backend '%s' for creation of %s %s" % (object_store_id, obj.__class__.__name__, obj.id) ) else: log.debug( "Using preferred backend '%s' for creation of %s %s" - % (obj.object_store_id, obj.__class__.__name__, obj.id) + % (object_store_id, obj.__class__.__name__, obj.id) ) - self.backends[obj.object_store_id].create(obj, **kwargs) + return self.backends[object_store_id].create(obj, **kwargs) + else: + return self.backends[object_store_id] def _call_method(self, method, obj, default, default_is_exception, **kwargs): object_store_id = self.__get_store_id_for(obj, **kwargs) @@ -1020,6 +1063,14 @@ def __get_store_id_for(self, obj, **kwargs): return id return None + def object_store_ids(self, private=None): + object_store_ids = [] + for backend_id, backend in self.backends.items(): + object_store_ids.extend(backend.object_store_ids(private=private)) + if backend.private is private or private is None: + object_store_ids.append(backend_id) + return object_store_ids + class HierarchicalObjectStore(NestedObjectStore): @@ -1037,22 +1088,33 @@ def __init__(self, config, config_dict, fsmon=False): super().__init__(config, config_dict) backends: Dict[int, ObjectStore] = {} + is_private = config_dict.get("private", DEFAULT_PRIVATE) for order, backend_def in enumerate(config_dict["backends"]): + backend_is_private = backend_def.get("private") + if backend_is_private is not None: + assert ( + is_private == backend_is_private + ), "The private attribute must be defined on the HierarchicalObjectStore and not contained concrete objectstores." backends[order] = build_object_store_from_config(config, config_dict=backend_def, fsmon=fsmon) self.backends = backends + self.private = is_private @classmethod def parse_xml(clazz, config_xml): backends_list = [] + is_private = BaseObjectStore.parse_private_from_config_xml(config_xml) for backend in sorted(config_xml.find("backends"), key=lambda b: int(b.get("order"))): store_type = backend.get("type") objectstore_class, _ = type_to_object_store_class(store_type) backend_config_dict = objectstore_class.parse_xml(backend) + backend_config_dict["private"] = is_private backend_config_dict["type"] = store_type backends_list.append(backend_config_dict) - return {"backends": backends_list} + config_dict = {"backends": backends_list} + config_dict["private"] = is_private + return config_dict def to_dict(self): as_dict = super().to_dict() @@ -1061,6 +1123,7 @@ def to_dict(self): backend_as_dict = backend.to_dict() backends.append(backend_as_dict) as_dict["backends"] = backends + as_dict["private"] = self.private return as_dict def _exists(self, obj, **kwargs): @@ -1072,7 +1135,13 @@ def _exists(self, obj, **kwargs): def _create(self, obj, **kwargs): """Call the primary object store.""" - self.backends[0].create(obj, **kwargs) + return self.backends[0].create(obj, **kwargs) + + def _is_private(self, obj): + # Unlink the DistributedObjectStore - the HierarchicalObjectStore does not use + # object_store_id - so all the contained object stores need to define is_private + # the same way. + return self.private def type_to_object_store_class(store, fsmon=False): @@ -1238,16 +1307,19 @@ def __init__(self, has_object_store, user): self.object_store_id = None self.user = user - def set_object_store_id(self, data): - self.set_dataset_object_store_id(data.dataset) + def set_object_store_id(self, data, require_sharable=False): + self.set_dataset_object_store_id(data.dataset, require_sharable=require_sharable) - def set_dataset_object_store_id(self, dataset): + def set_dataset_object_store_id(self, dataset, require_sharable=True): # Create an empty file immediately. The first dataset will be # created in the "default" store, all others will be created in # the same store as the first. dataset.object_store_id = self.object_store_id try: - self.object_store.create(dataset) + ensure_non_private = require_sharable + concrete_store = self.object_store.create(dataset, ensure_non_private=ensure_non_private) + if concrete_store.private and require_sharable: + raise Exception("Attempted to create shared output datasets in objectstore with sharing disabled") except ObjectInvalid: raise Exception("Unable to create output dataset: object store is full") self.object_store_id = dataset.object_store_id # these will be the same thing after the first output diff --git a/lib/galaxy/objectstore/azure_blob.py b/lib/galaxy/objectstore/azure_blob.py index 7326d5067812..29d5bdcaf7fd 100644 --- a/lib/galaxy/objectstore/azure_blob.py +++ b/lib/galaxy/objectstore/azure_blob.py @@ -77,6 +77,7 @@ def parse_config_xml(config_xml): "path": staging_path, }, "extra_dirs": extra_dirs, + "private": ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. diff --git a/lib/galaxy/objectstore/cloud.py b/lib/galaxy/objectstore/cloud.py index 26986eb21148..dd19e98c187f 100644 --- a/lib/galaxy/objectstore/cloud.py +++ b/lib/galaxy/objectstore/cloud.py @@ -609,6 +609,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else f"dataset_{self._get_object_id(obj)}.dat") open(os.path.join(self.staging_path, rel_path), "w").close() self._push_to_os(rel_path, from_string="") + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index 82dcacc84d67..e2c7b1344a6b 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -108,6 +108,7 @@ def parse_config_xml(config_xml): "path": staging_path, }, "extra_dirs": extra_dirs, + "private": DiskObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -538,6 +539,7 @@ def _create(self, obj, **kwargs): open(os.path.join(self.staging_path, rel_path), "w").close() self._push_to_irods(rel_path, from_string="") log.debug("irods_pt _create: %s", ipt_timer) + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/objectstore/pithos.py b/lib/galaxy/objectstore/pithos.py index b7ec46b5d678..f8f9fedc02a6 100644 --- a/lib/galaxy/objectstore/pithos.py +++ b/lib/galaxy/objectstore/pithos.py @@ -77,6 +77,7 @@ def parse_config_xml(config_xml): log.error(msg) raise Exception(msg) r["extra_dirs"] = [{k: e.get(k) for k in attrs} for e in extra_dirs] + r["private"] = ConcreteObjectStore.parse_private_from_config_xml(config_xml) if "job_work" not in (d["type"] for d in r["extra_dirs"]): msg = f'No value for {tag}:type="job_work" in XML tree' log.error(msg) @@ -297,6 +298,7 @@ def _create(self, obj, **kwargs): new_file = os.path.join(self.staging_path, rel_path) open(new_file, "w").close() self.pithos.upload_from_string(rel_path, "") + return self def _empty(self, obj, **kwargs): """ diff --git a/lib/galaxy/objectstore/s3.py b/lib/galaxy/objectstore/s3.py index 6eb00e658dfe..bcbdea87dea1 100644 --- a/lib/galaxy/objectstore/s3.py +++ b/lib/galaxy/objectstore/s3.py @@ -105,6 +105,7 @@ def parse_config_xml(config_xml): "path": staging_path, }, "extra_dirs": extra_dirs, + "private": ConcreteObjectStore.parse_private_from_config_xml(config_xml), } except Exception: # Toss it back up after logging, we can't continue loading at this point. @@ -621,6 +622,7 @@ def _create(self, obj, **kwargs): rel_path = os.path.join(rel_path, alt_name if alt_name else f"dataset_{self._get_object_id(obj)}.dat") open(os.path.join(self.staging_path, rel_path), "w").close() self._push_to_os(rel_path, from_string="") + return self def _empty(self, obj, **kwargs): if self._exists(obj, **kwargs): diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py index 4029a3256915..911e8735f667 100644 --- a/lib/galaxy/tools/actions/upload_common.py +++ b/lib/galaxy/tools/actions/upload_common.py @@ -140,7 +140,7 @@ def __new_history_upload(trans, uploaded_dataset, history=None, state=None): trans.sa_session.flush() history.add_dataset(hda, genome_build=uploaded_dataset.dbkey, quota=False) permissions = trans.app.security_agent.history_get_default_permissions(history) - trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions) + trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions, new=True, flush=False) trans.sa_session.flush() return hda @@ -211,7 +211,7 @@ def __new_library_upload(trans, cntrller, uploaded_dataset, library_bunch, tag_h else: # Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset trans.app.security_agent.set_all_dataset_permissions( - ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user) + ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), new=True ) folder.add_library_dataset(ld, genome_build=uploaded_dataset.dbkey) trans.sa_session.add(folder) diff --git a/lib/galaxy/webapps/galaxy/api/history_contents.py b/lib/galaxy/webapps/galaxy/api/history_contents.py index c5f20b4dddaf..a9ad2bb5a2b3 100644 --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -199,6 +199,11 @@ def get_legacy_index_query_params( description="Whether to return visible or hidden datasets only. Leave unset for both.", deprecated=True, ), + sharable: Optional[bool] = Query( + default=None, + title="Sharable", + description="Whether to return only sharable or not sharable datasets. Leave unset for both.", + ), ) -> LegacyHistoryContentsIndexParams: """This function is meant to be used as a dependency to render the OpenAPI documentation correctly""" @@ -208,6 +213,7 @@ def get_legacy_index_query_params( details=details, deleted=deleted, visible=visible, + sharable=sharable, ) @@ -217,6 +223,7 @@ def parse_legacy_index_query_params( details: Optional[str] = None, deleted: Optional[bool] = None, visible: Optional[bool] = None, + sharable: Optional[bool] = None, **_, # Additional params are ignored ) -> LegacyHistoryContentsIndexParams: """Parses (legacy) query parameters for the history contents `index` operation @@ -242,6 +249,7 @@ def parse_legacy_index_query_params( ids=id_list, deleted=deleted, visible=visible, + sharable=sharable, dataset_details=dataset_details, ) diff --git a/lib/galaxy/webapps/galaxy/controllers/dataset.py b/lib/galaxy/webapps/galaxy/controllers/dataset.py index 187c20446ac2..ef00c2ff8aea 100644 --- a/lib/galaxy/webapps/galaxy/controllers/dataset.py +++ b/lib/galaxy/webapps/galaxy/controllers/dataset.py @@ -325,7 +325,12 @@ def get_edit(self, trans, dataset_id=None, **kwd): permission_disable = True permission_inputs = list() if trans.user: - if data.dataset.actions: + if not data.dataset.sharable: + permission_message = "The dataset is stored on private storage to you and cannot be shared." + permission_inputs.append( + {"name": "not_sharable", "type": "hidden", "label": permission_message, "readonly": True} + ) + elif data.dataset.actions: in_roles = {} for action, roles in trans.app.security_agent.get_permissions(data.dataset).items(): in_roles[action.action] = [trans.security.encode_id(role.id) for role in roles] diff --git a/lib/galaxy/webapps/galaxy/services/datasets.py b/lib/galaxy/webapps/galaxy/services/datasets.py index 3da90a7185c3..17bb9524d1c1 100644 --- a/lib/galaxy/webapps/galaxy/services/datasets.py +++ b/lib/galaxy/webapps/galaxy/services/datasets.py @@ -100,6 +100,9 @@ class DatasetStorageDetails(Model): ) hashes: List[dict] = Field(description="The file contents hashes associated with the supplied dataset instance.") sources: List[dict] = Field(description="The file sources associated with the supplied dataset instance.") + sharable: bool = Field( + description="Is this dataset sharable.", + ) class DatasetInheritanceChainEntry(Model): @@ -354,6 +357,7 @@ def show_storage( sources = [s.to_dict() for s in dataset.sources] return DatasetStorageDetails( object_store_id=object_store_id, + sharable=dataset.sharable, name=name, description=description, percent_used=percent_used, diff --git a/lib/galaxy/webapps/galaxy/services/history_contents.py b/lib/galaxy/webapps/galaxy/services/history_contents.py index 8f9a981087a7..1feccdcccf05 100644 --- a/lib/galaxy/webapps/galaxy/services/history_contents.py +++ b/lib/galaxy/webapps/galaxy/services/history_contents.py @@ -61,6 +61,7 @@ User, ) from galaxy.model.security import GalaxyRBACAgent +from galaxy.objectstore import BaseObjectStore from galaxy.schema import ( FilterQueryParams, SerializationParams, @@ -153,6 +154,11 @@ class LegacyHistoryContentsIndexParams(Model): dataset_details: Optional[DatasetDetailsType] deleted: Optional[bool] visible: Optional[bool] + sharable: Optional[bool] = Field( + default=None, + title="Sharable", + description="Whether to return only sharable or not sharable datasets. Leave unset for both.", + ) class HistoryContentsIndexJobsSummaryParams(Model): @@ -263,6 +269,7 @@ class HistoriesContentsService(ServiceBase, ServesExportStores, ConsumesModelSto def __init__( self, security: IdEncodingHelper, + object_store: BaseObjectStore, history_manager: histories.HistoryManager, history_contents_manager: HistoryContentsManager, hda_manager: hdas.HDAManager, @@ -292,6 +299,7 @@ def __init__( self.item_operator = HistoryItemOperator(self.hda_manager, self.hdca_manager, self.dataset_collection_manager) self.short_term_storage_allocator = short_term_storage_allocator self.genomes_manager = genomes_manager + self.object_store = object_store def index( self, @@ -1113,6 +1121,12 @@ def __index_legacy( ids = legacy_params_dict.get("ids") if ids: legacy_params_dict["ids"] = self.decode_ids(ids) + + object_store_ids = None + sharable = legacy_params.sharable + if sharable is not None: + object_store_ids = self.object_store.object_store_ids(private=not sharable) + legacy_params_dict["object_store_ids"] = object_store_ids contents = history.contents_iter(**legacy_params_dict) items = [ self._serialize_legacy_content_item(trans, content, legacy_params_dict.get("dataset_details")) diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index cb2da8feff27..5fe1b80ec131 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -182,7 +182,7 @@ def test_list_list_list_download(self): assert len(namelist) == 3, f"Expected 3 elements in [{namelist}]" def test_hda_security(self): - element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id) + element_identifiers = self.dataset_collection_populator.pair_identifiers(self.history_id, wait=True) self.dataset_populator.make_private(self.history_id, element_identifiers[0]["id"]) with self._different_user(): history_id = self.dataset_populator.new_history() diff --git a/lib/galaxy_test/api/test_libraries.py b/lib/galaxy_test/api/test_libraries.py index 52f30d1c3e57..ec470c27e08d 100644 --- a/lib/galaxy_test/api/test_libraries.py +++ b/lib/galaxy_test/api/test_libraries.py @@ -591,4 +591,5 @@ def _create_dataset_in_folder_in_library(self, library_name, content="1 2 3", wa hda_id = self.dataset_populator.new_dataset(history_id, content=content, wait=wait)["id"] payload = {"from_hda_id": hda_id, "create_type": "file", "folder_id": folder_id} ld = self._post(f"libraries/{folder_id}/contents", payload) + ld.raise_for_status() return ld diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 0365b6b89e51..53d6d99c4445 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -1003,9 +1003,23 @@ def make_private(self, history_id: str, dataset_id: str) -> dict: "access": [role_id], "manage": [role_id], } + response = self.update_permissions_raw(history_id, dataset_id, payload) + response.raise_for_status() + return response.json() + + def make_public_raw(self, history_id, dataset_id): + role_id = self.user_private_role_id() + payload = { + "access": json.dumps([]), + "manage": json.dumps([role_id]), + } + response = self.update_permissions_raw(history_id, dataset_id, payload) + return response + + def update_permissions_raw(self, history_id, dataset_id, payload): url = f"histories/{history_id}/contents/{dataset_id}/permissions" update_response = self._put(url, payload, admin=True, json=True) - assert update_response.status_code == 200, update_response.content + update_response.raise_for_status() return update_response.json() def make_public(self, history_id: str) -> dict: @@ -2526,8 +2540,8 @@ def __create_payload_collection(self, history_id, identifiers_func, collection_t payload = dict(history_id=history_id, collection_type=collection_type, **kwds) return payload - def pair_identifiers(self, history_id, contents=None): - hda1, hda2 = self.__datasets(history_id, count=2, contents=contents) + def pair_identifiers(self, history_id, contents=None, wait=False): + hda1, hda2 = self.__datasets(history_id, count=2, contents=contents, wait=wait) element_identifiers = [ dict(name="forward", src="hda", id=hda1["id"]), @@ -2563,10 +2577,12 @@ def __create(self, payload, wait=False): else: return self.dataset_populator.fetch(payload, wait=wait) - def __datasets(self, history_id, count, contents=None): + def __datasets(self, history_id, count, contents=None, wait=False): datasets = [] for i in range(count): - new_kwds = {} + new_kwds = { + "wait": wait, + } if contents: new_kwds["content"] = contents[i] datasets.append(self.dataset_populator.new_dataset(history_id, **new_kwds)) diff --git a/test/integration/objectstore/test_private_handling.py b/test/integration/objectstore/test_private_handling.py new file mode 100644 index 000000000000..b7914fd6cf7f --- /dev/null +++ b/test/integration/objectstore/test_private_handling.py @@ -0,0 +1,51 @@ +"""Integration tests for mixing store_by.""" + +import string + +from galaxy_test.base import api_asserts +from ._base import BaseObjectStoreIntegrationTestCase + +PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE = string.Template( + """ + + + + + +""" +) + +TEST_INPUT_FILES_CONTENT = "1 2 3" + + +class PrivatePreventsSharingObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = True + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + """Test each object store configures files correctly.""" + with self.dataset_populator.test_history() as history_id: + hda = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_FILES_CONTENT, wait=True) + content = self.dataset_populator.get_history_dataset_content(history_id, hda["id"]) + assert content.startswith(TEST_INPUT_FILES_CONTENT) + response = self.dataset_populator.make_public_raw(history_id, hda["id"]) + assert response.status_code != 200 + api_asserts.assert_error_message_contains(response, "Attempting to share a non-sharable dataset.") + + +class PrivateCannotWritePublicDataObjectStoreIntegrationTestCase(BaseObjectStoreIntegrationTestCase): + @classmethod + def handle_galaxy_config_kwds(cls, config): + config["new_user_dataset_access_role_default_private"] = False + cls._configure_object_store(PRIVATE_OBJECT_STORE_CONFIG_TEMPLATE, config) + + def test_both_types(self): + with self.dataset_populator.test_history() as history_id: + response = self.dataset_populator.new_dataset_request( + history_id, content=TEST_INPUT_FILES_CONTENT, wait=True, assert_ok=False + ) + job = response.json()["jobs"][0] + final_state = self.dataset_populator.wait_for_job(job["id"]) + assert final_state == "error" diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index 908e5b7a01b6..1eba8fcabc86 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -35,6 +35,7 @@ not os.environ.get("GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE"), reason="GALAXY_TEST_UNIT_MAPPING_URI_POSTGRES_BASE not set", ) +PRIVATE_OBJECT_STORE_ID = "my_private_data" class BaseModelTestCase(unittest.TestCase): @@ -153,8 +154,12 @@ def assert_display_name_converts_to_unicode(item, name): assert history.get_display_name() == "Hello₩◎ґʟⅾ" def test_hda_to_library_dataset_dataset_association(self): - u = model.User(email="mary@example.com", password="password") - hda = model.HistoryDatasetAssociation(name="hda_name") + model = self.model + u = self.model.User(email="mary@example.com", password="password") + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation( + name="hda_name", create_dataset=True, history=h1, sa_session=model.session + ) self.persist(hda) trans = collections.namedtuple("trans", "user") target_folder = model.LibraryFolder(name="library_folder") @@ -180,6 +185,23 @@ def test_hda_to_library_dataset_dataset_association(self): assert new_ldda.library_dataset.expired_datasets[0] == ldda assert target_folder.item_count == 1 + def test_hda_to_library_dataset_dataset_association_fails_if_private(self): + model = self.model + u = model.User(email="mary2@example.com", password="password") + h1 = model.History(name="History 1", user=u) + hda = model.HistoryDatasetAssociation( + name="hda_name", create_dataset=True, history=h1, sa_session=model.session + ) + hda.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self.persist(hda) + trans = collections.namedtuple("trans", "user") + target_folder = model.LibraryFolder(name="library_folder") + with pytest.raises(Exception): + hda.to_library_dataset_dataset_association( + trans=trans(user=u), + target_folder=target_folder, + ) + def test_tags(self): TAG_NAME = "Test Tag" my_tag = model.Tag(name=TAG_NAME) @@ -594,8 +616,8 @@ def test_history_contents(self): self.persist(u, h1, expunge=False) d1 = self.new_hda(h1, name="1") - d2 = self.new_hda(h1, name="2", visible=False) - d3 = self.new_hda(h1, name="3", deleted=True) + d2 = self.new_hda(h1, name="2", visible=False, object_store_id="foobar") + d3 = self.new_hda(h1, name="3", deleted=True, object_store_id="three_store") d4 = self.new_hda(h1, name="4", visible=False, deleted=True) self.session().flush() @@ -609,8 +631,11 @@ def contents_iter_names(**kwds): self.assertEqual(contents_iter_names(), ["1", "2", "3", "4"]) assert contents_iter_names(deleted=False) == ["1", "2"] assert contents_iter_names(visible=True) == ["1", "3"] + assert contents_iter_names(visible=True, object_store_ids=["three_store"]) == ["3"] assert contents_iter_names(visible=False) == ["2", "4"] assert contents_iter_names(deleted=True, visible=False) == ["4"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar"]) == ["2"] + assert contents_iter_names(deleted=False, object_store_ids=["foobar2"]) == [] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id]) == ["1", "2", "3", "4"] assert contents_iter_names(ids=[d1.id, d2.id, d3.id, d4.id], max_in_filter_length=1) == ["1", "2", "3", "4"] @@ -966,6 +991,77 @@ def test_next_hid(self): h._next_hid(n=3) assert h.hid_counter == 5 + def test_cannot_make_private_objectstore_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_public") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + self._make_owned(security_agent, u_from, d1) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_objectstore_dataset_shared(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_make_private_shared") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.privately_share_dataset(d1.dataset, [u_to]) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_set_dataset_permisson_on_private(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, _ = self._three_users("cannot_set_permissions_on_private") + + h = self.model.History(name="History for Prevent Sharing", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + role = security_agent.get_private_user_role(u_to, auto_create=True) + access_action = security_agent.permitted_actions.DATASET_ACCESS.action + + with pytest.raises(Exception) as exec_info: + security_agent.set_dataset_permission(d1.dataset, {access_action: [role]}) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + + def test_cannot_make_private_dataset_public(self): + security_agent = GalaxyRBACAgent(self.model) + u_from, u_to, u_other = self._three_users("cannot_make_private_dataset_public") + + h = self.model.History(name="History for Annotation", user=u_from) + d1 = self.model.HistoryDatasetAssociation( + extension="txt", history=h, create_dataset=True, sa_session=self.model.session + ) + self.persist(h, d1) + + d1.dataset.object_store_id = PRIVATE_OBJECT_STORE_ID + self._make_private(security_agent, u_from, d1) + + with pytest.raises(Exception) as exec_info: + security_agent.make_dataset_public(d1.dataset) + assert galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE in str(exec_info.value) + def _three_users(self, suffix): email_from = f"user_{suffix}e1@example.com" email_to = f"user_{suffix}e2@example.com" @@ -982,18 +1078,26 @@ def _make_private(self, security_agent, user, hda): access_action = security_agent.permitted_actions.DATASET_ACCESS.action manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {access_action: [role], manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) def _make_owned(self, security_agent, user, hda): role = security_agent.get_private_user_role(user, auto_create=True) manage_action = security_agent.permitted_actions.DATASET_MANAGE_PERMISSIONS.action permissions = {manage_action: [role]} - security_agent.set_all_dataset_permissions(hda.dataset, permissions) + self._set_permissions(security_agent, hda.dataset, permissions) + + def _set_permissions(self, security_agent, dataset, permissions): + # TODO: refactor set_all_dataset_permissions to actually throw an exception :| + error = security_agent.set_all_dataset_permissions(dataset, permissions) + if error: + raise Exception(error) def new_hda(self, history, **kwds): - return history.add_dataset( - model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds) - ) + object_store_id = kwds.pop("object_store_id", None) + hda = self.model.HistoryDatasetAssociation(create_dataset=True, sa_session=self.model.session, **kwds) + if object_store_id is not None: + hda.dataset.object_store_id = object_store_id + return history.add_dataset(hda) @skip_if_not_postgres_base @@ -1051,6 +1155,12 @@ def get_store_by(self, *args, **kwds): def update_from_file(self, *arg, **kwds): pass + def is_private(self, object): + if object.object_store_id == PRIVATE_OBJECT_STORE_ID: + return True + else: + return False + def get_suite(): suite = unittest.TestSuite() diff --git a/test/unit/objectstore/test_objectstore.py b/test/unit/objectstore/test_objectstore.py index 19ab416902d8..ba918bc9f64c 100644 --- a/test/unit/objectstore/test_objectstore.py +++ b/test/unit/objectstore/test_objectstore.py @@ -308,8 +308,26 @@ def test_concrete_name_without_objectstore_id(): assert files1_name is None +MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG = """ + + + + + + + + + + + + + + +""" + + MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG = """ - + @@ -327,12 +345,45 @@ def test_concrete_name_without_objectstore_id(): def test_mixed_store_by(): + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): + as_dict = object_store.to_dict() + assert as_dict["backends"][0]["store_by"] == "id" + assert as_dict["backends"][1]["store_by"] == "uuid" + with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): as_dict = object_store.to_dict() assert as_dict["backends"][0]["store_by"] == "id" assert as_dict["backends"][1]["store_by"] == "uuid" +def test_mixed_private(): + # Distributed object store can combine private and non-private concrete objectstores + with TestConfig(MIXED_STORE_BY_DISTRIBUTED_TEST_CONFIG) as (directory, object_store): + ids = object_store.object_store_ids() + print(ids) + assert len(ids) == 2 + + ids = object_store.object_store_ids(private=True) + assert len(ids) == 1 + assert ids[0] == "files2" + + ids = object_store.object_store_ids(private=False) + assert len(ids) == 1 + assert ids[0] == "files1" + + as_dict = object_store.to_dict() + assert not as_dict["backends"][0]["private"] + assert as_dict["backends"][1]["private"] + + with TestConfig(MIXED_STORE_BY_HIERARCHICAL_TEST_CONFIG) as (directory, object_store): + as_dict = object_store.to_dict() + assert as_dict["backends"][0]["private"] + assert as_dict["backends"][1]["private"] + + assert object_store.private + assert as_dict["private"] is True + + DISTRIBUTED_TEST_CONFIG = """ @@ -487,7 +538,7 @@ def test_config_parse_pithos(): assert len(extra_dirs) == 2 -S3_TEST_CONFIG = """ +S3_TEST_CONFIG = """ @@ -499,6 +550,7 @@ def test_config_parse_pithos(): S3_TEST_CONFIG_YAML = """ type: s3 +private: true auth: access_key: access_moo secret_key: secret_cow @@ -522,6 +574,7 @@ def test_config_parse_pithos(): def test_config_parse_s3(): for config_str in [S3_TEST_CONFIG, S3_TEST_CONFIG_YAML]: with TestConfig(config_str, clazz=UnitializeS3ObjectStore) as (directory, object_store): + assert object_store.private assert object_store.access_key == "access_moo" assert object_store.secret_key == "secret_cow"