Skip to content

Commit

Permalink
private objectstores & dataset.sharable
Browse files Browse the repository at this point in the history
Setup abstractions to prevent sharing transient or private per-user objects in an objectstore.
  • Loading branch information
jmchilton committed Dec 21, 2020
1 parent 24057b7 commit b382c0c
Show file tree
Hide file tree
Showing 22 changed files with 452 additions and 55 deletions.
2 changes: 1 addition & 1 deletion client/src/mvc/library/library-model.js
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ var HistoryContents = Backbone.Collection.extend({
this.id = options.id;
},
url: function () {
return `${this.urlRoot + this.id}/contents`;
return `${this.urlRoot + this.id}/contents?sharable=true`;
},
model: HistoryItem,
});
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/job_execution/output_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def add_library_dataset_to_folder(self, library_folder, ld):
# Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
trans.app.security_agent.copy_library_permissions(trans, ld, ldda)
# Copy the current user's DefaultUserPermissions to the new LibraryDatasetDatasetAssociation.dataset
trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user))
trans.app.security_agent.set_all_dataset_permissions(ldda.dataset, trans.app.security_agent.user_get_default_permissions(trans.user), flush=False, new=True)
library_folder.add_library_dataset(ld, genome_build=ldda.dbkey)
trans.sa_session.add(library_folder)
trans.sa_session.flush()
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,8 @@ def _set_object_store_ids(self, job):

object_store_populator = ObjectStorePopulator(self.app, job.user)
object_store_id = self.get_destination_configuration("object_store_id", None)
require_sharable = job.requires_sharable_storage(self.app.security_agent)

if object_store_id:
object_store_populator.object_store_id = object_store_id

Expand All @@ -1483,7 +1485,7 @@ def _set_object_store_ids(self, job):
# afterward. State below needs to happen the same way.
for dataset_assoc in job.output_datasets + job.output_library_datasets:
dataset = dataset_assoc.dataset
object_store_populator.set_object_store_id(dataset)
object_store_populator.set_object_store_id(dataset, require_sharable=require_sharable)

job.object_store_id = object_store_populator.object_store_id
self._setup_working_directory(job=job)
Expand Down
7 changes: 6 additions & 1 deletion lib/galaxy/jobs/runners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,12 @@ def put(self, job_wrapper):
"""Add a job to the queue (by job identifier), indicate that the job is ready to run.
"""
put_timer = ExecutionTimer()
job_wrapper.enqueue()
try:
job_wrapper.enqueue()
except Exception as e:
job_wrapper.fail(str(e), exception=e)
log.debug(f"Job [{job_wrapper.job_id}] failed to queue {put_timer}")
return
self.mark_as_queued(job_wrapper)
log.debug(f"Job [{job_wrapper.job_id}] queued {put_timer}")

Expand Down
45 changes: 42 additions & 3 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
JOB_METRIC_SCALE = 7
# Tags that get automatically propagated from inputs to outputs when running jobs.
AUTO_PROPAGATED_TAGS = ["name"]
CANNOT_SHARE_PRIVATE_DATASET_MESSAGE = "Attempting to share a non-sharable dataset."


class RepresentById:
Expand Down Expand Up @@ -1100,6 +1101,19 @@ def remap_objects(p, k, obj):
job_attrs['params'] = params_dict
return job_attrs

def requires_sharable_storage(self, security_agent):
# An easy optimization would be to calculate this in galaxy.tools.actions when the
# job is created and all the output permissions are already known. Having to reload
# these permissions in the job code shouldn't strictly be needed.

requires_sharing = False
for dataset_assoc in self.output_datasets + self.output_library_datasets:
if not security_agent.dataset_is_private_to_a_user(dataset_assoc.dataset.dataset):
requires_sharing = True
break

return requires_sharing

def to_dict(self, view='collection', system_details=False):
rval = super().to_dict(view=view)
rval['tool_id'] = self.tool_id
Expand Down Expand Up @@ -2099,7 +2113,12 @@ def __filter_contents(self, content_class, **kwds):
visible = galaxy.util.string_as_bool_or_none(kwds.get('visible', None))
if visible is not None:
query = query.filter(content_class.visible == visible)
if 'object_store_ids' in kwds:
if content_class == HistoryDatasetAssociation:
query = query.join(content_class.dataset).filter(Dataset.table.c.object_store_id.in_(kwds.get("object_store_ids")))
# else ignoring object_store_ids on HDCAs...
if 'ids' in kwds:
assert 'object_store_ids' not in kwds
ids = kwds['ids']
max_in_filter_length = kwds.get('max_in_filter_length', MAX_IN_FILTER_LENGTH)
if len(ids) < max_in_filter_length:
Expand Down Expand Up @@ -2358,11 +2377,24 @@ def is_new(self):
def in_ready_state(self):
return self.state in self.ready_states

@property
def sharable(self):
"""Return True if placed into an objectstore not labeled as ``private``."""
if self.external_filename:
return True
else:
object_store = self._assert_object_store_set()
return not object_store.is_private(self)

def ensure_sharable(self):
if not self.sharable:
raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE)

def get_file_name(self):
if not self.external_filename:
assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
if self.object_store.exists(self):
return self.object_store.get_filename(self)
object_store = self._assert_object_store_set()
if object_store.exists(self):
return object_store.get_filename(self)
else:
return ''
else:
Expand All @@ -2377,6 +2409,10 @@ def set_file_name(self, filename):
self.external_filename = filename
file_name = property(get_file_name, set_file_name)

def _assert_object_store_set(self):
assert self.object_store is not None, "Object Store has not been initialized for dataset %s" % self.id
return self.object_store

def get_extra_files_path(self):
# Unlike get_file_name - external_extra_files_path is not backed by an
# actual database column so if SA instantiates this object - the
Expand Down Expand Up @@ -3229,6 +3265,9 @@ def to_library_dataset_dataset_association(self, trans, target_folder, replace_d
"""
Copy this HDA to a library optionally replacing an existing LDDA.
"""
if not self.dataset.sharable:
raise Exception("Attempting to share a non-sharable dataset.")

if replace_dataset:
# The replace_dataset param ( when not None ) refers to a LibraryDataset that
# is being replaced with a new version.
Expand Down
54 changes: 46 additions & 8 deletions lib/galaxy/model/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,16 +810,23 @@ def set_all_dataset_permissions(self, dataset, permissions={}, new=False, flush=
"""
# Make sure that DATASET_MANAGE_PERMISSIONS is associated with at least 1 role
has_dataset_manage_permissions = False
for action, roles in permissions.items():
if isinstance(action, Action):
if action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS and roles:
has_dataset_manage_permissions = True
break
elif action == self.permitted_actions.DATASET_MANAGE_PERMISSIONS.action and roles:
has_dataset_manage_permissions = True
break
for action, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_MANAGE_PERMISSIONS):
has_dataset_manage_permissions = True
break
if not has_dataset_manage_permissions:
return "At least 1 role must be associated with manage permissions on this dataset."

# If this is new, the objectstore likely hasn't been set yet - defer check until
# the job handler assigns it.
if not new and not dataset.sharable:
# ensure dataset not shared.
dataset_access_roles = []
for action, roles in _walk_action_roles(permissions, self.permitted_actions.DATASET_ACCESS):
dataset_access_roles.extend(roles)

if len(dataset_access_roles) != 1 or dataset_access_roles[0].type != self.model.Role.types.PRIVATE:
return galaxy.model.CANNOT_SHARE_PRIVATE_DATASET_MESSAGE

flush_needed = False
# Delete all of the current permissions on the dataset
if not new:
Expand Down Expand Up @@ -847,6 +854,12 @@ def set_dataset_permission(self, dataset, permission={}):
Set a specific permission on a dataset, leaving all other current permissions on the dataset alone.
Permission looks like: { Action.action : [ Role, Role ] }
"""

# if modifying access - ensure it is sharable.
for action, roles in _walk_action_roles(permission, self.permitted_actions.DATASET_ACCESS):
dataset.ensure_sharable()
break

flush_needed = False
for action, roles in permission.items():
if isinstance(action, Action):
Expand Down Expand Up @@ -886,6 +899,7 @@ def copy_dataset_permissions(self, src, dst):
self.set_all_dataset_permissions(dst, self.get_permissions(src))

def privately_share_dataset(self, dataset, users=[]):
dataset.ensure_sharable()
intersect = None
for user in users:
roles = [ura.role for ura in user.roles if ura.role.type == self.model.Role.types.SHARING]
Expand Down Expand Up @@ -1056,6 +1070,19 @@ def dataset_is_private_to_user(self, trans, dataset):
else:
return False

def dataset_is_private_to_a_user(self, dataset):
"""
If the Dataset object has exactly one access role and that is
the current user's private role then we consider the dataset private.
"""
access_roles = dataset.get_access_roles(self)

if len(access_roles) != 1:
return False
else:
access_role = access_roles[0]
return access_role.type == self.model.Role.types.PRIVATE

def datasets_are_public(self, trans, datasets):
'''
Given a transaction object and a list of Datasets, return
Expand Down Expand Up @@ -1084,6 +1111,8 @@ def datasets_are_public(self, trans, datasets):
def make_dataset_public(self, dataset):
# A dataset is considered public if there are no "access" actions associated with it. Any
# other actions ( 'manage permissions', 'edit metadata' ) are irrelevant.
dataset.ensure_sharable()

flush_needed = False
for dp in dataset.actions:
if dp.action == self.permitted_actions.DATASET_ACCESS.action:
Expand Down Expand Up @@ -1463,3 +1492,12 @@ def set_dataset_permissions(self, hda, user, site):
hdadaa = self.model.HistoryDatasetAssociationDisplayAtAuthorization(hda=hda, user=user, site=site)
self.sa_session.add(hdadaa)
self.sa_session.flush()


def _walk_action_roles(permissions, query_action):
for action, roles in permissions.items():
if isinstance(action, Action):
if action == query_action and roles:
yield action, roles
elif action == query_action.action and roles:
yield action, roles
Loading

0 comments on commit b382c0c

Please sign in to comment.