Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: ♻️ Is560/clone projects #2974

Closed
wants to merge 19 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
WIP [skip CI]
pcrespov committed May 16, 2022
commit cae10a2a0b241388fcd5a523f73c280fb6deb261
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
""" Helper functions to determine access rights on projects
# DRAFT Rationale:
osparc-simcore defines TWO authorization methods: i.e. a set of rules on what,
how and when any resource can be accessed or operated by a user
## ROLE-BASED METHOD:
In this method, a user is assigned a role (user/tester/admin) upon registration. Each role is
system-wide and defines a set of operations that the user *can* perform
- Every operation is named as a resource and an action (e.g. )
- Resource is named hierarchically
- Roles can inherit permitted operations from other role
This method is static because is system-wide and it is defined directly in the
code at services/web/server/src/simcore_service_webserver/security_roles.py
It is defined on top of every API entrypoint and applied just after authentication of the user.
## GROUP-BASED METHOD:
The second method is designed to dynamically share a resource among groups of users. A group
defines a set of rules that apply to a resource and users can be added to the group dynamically.
So far, there are two resources that define access rights (AR):
- one applies to projects (read/write/delete) and
- the other to services (execute/write)
The project access rights are set in the column "access_rights" of the "projects" table .
The service access rights has its own table: service_access_rights
Access rights apply hierarchically, meaning that the access granted to a project applies
to all nodes inside and stored data in nodes.
How do these two AR coexist?: Access to read, write or delete a project are defined in the project AR but execution
will depend on the service AR attached to nodes inside.
What about stored data?
- data generated in nodes inherits the AR from the associated project
- data generated in API uses full AR provided by ownership (i.e. user_id in files_meta_data table)
"""


import logging
from dataclasses import dataclass
from typing import Dict, List, Optional

import sqlalchemy as sa
from aiopg.sa.connection import SAConnection
from aiopg.sa.result import ResultProxy, RowProxy
from models_library.projects import ProjectID
from simcore_postgres_database.storage_models import user_to_groups
from sqlalchemy.sql import text

logger = logging.getLogger(__name__)


@dataclass
class AccessRights:
read: bool
write: bool
delete: bool

@classmethod
def all(cls) -> "AccessRights":
return cls(True, True, True)

@classmethod
def none(cls) -> "AccessRights":
return cls(False, False, False)


class AccessLayerError(Exception):
"""Base class for access-layer related errors"""


class InvalidFileIdentifier(AccessLayerError):
"""Identifier does not follow the criteria to
be a file identifier (see naming criteria below)
"""

def __init__(self, identifier, reason=None, details=None):
self.identifier = identifier
self.reason = reason or "Invalid file identifier"
self.details = details

super().__init__(self.reason, self.details)

def __str__(self):
return "Error in {}: {} [{}]".format(self.identifier, self.reason, self.details)


async def _get_user_groups_ids(conn: SAConnection, user_id: int) -> List[int]:
stmt = sa.select([user_to_groups.c.gid]).where(user_to_groups.c.uid == user_id)
rows = await (await conn.execute(stmt)).fetchall()
user_group_ids = [g.gid for g in rows]
return user_group_ids


def _aggregate_access_rights(
access_rights: Dict[str, Dict], group_ids: List[int]
) -> AccessRights:
try:
prj_access = {"read": False, "write": False, "delete": False}
for gid, grp_access in access_rights.items():
if int(gid) in group_ids:
for operation in grp_access:
prj_access[operation] |= grp_access[operation]

return AccessRights(**prj_access)
except KeyError:
# NOTE: database does NOT include schema for json access_rights column!
logger.warning(
"Invalid entry in projects.access_rights. Revoking all rights [%s]",
access_rights,
)
return AccessRights.none()


async def list_projects_access_rights(
conn: SAConnection, user_id: int
) -> Dict[ProjectID, AccessRights]:
"""
Returns access-rights of user (user_id) over all OWNED or SHARED projects
"""

user_group_ids: List[int] = await _get_user_groups_ids(conn, user_id)

smt = text(
f"""\
SELECT uuid, access_rights
FROM projects
WHERE (
prj_owner = {user_id}
OR jsonb_exists_any( access_rights, (
SELECT ARRAY( SELECT gid::TEXT FROM user_to_groups WHERE uid = {user_id} )
)
)
)
"""
)
projects_access_rights = {}

async for row in conn.execute(smt):
assert isinstance(row.access_rights, dict)
assert isinstance(row.uuid, ProjectID)

if row.access_rights:
# TODO: access_rights should be direclty filtered from result in stm instead calling again user_group_ids
projects_access_rights[row.uuid] = _aggregate_access_rights(
row.access_rights, user_group_ids
)

else:
# backwards compatibility
# - no access_rights defined BUT project is owned
projects_access_rights[row.uuid] = AccessRights.all()

return projects_access_rights


async def get_project_access_rights(
conn: SAConnection, user_id: int, project_id: ProjectID
) -> AccessRights:
"""
Returns access-rights of user (user_id) over a project resource (project_id)
"""
user_group_ids: List[int] = await _get_user_groups_ids(conn, user_id)

stmt = text(
f"""\
SELECT prj_owner, access_rights
FROM projects
WHERE (
( uuid = '{project_id}' ) AND (
prj_owner = {user_id}
OR jsonb_exists_any( access_rights, (
SELECT ARRAY( SELECT gid::TEXT FROM user_to_groups WHERE uid = {user_id} )
)
)
)
)
"""
)

result: ResultProxy = await conn.execute(stmt)
row: Optional[RowProxy] = await result.first()

if not row:
# Either project does not exists OR user_id has NO access
return AccessRights.none()

assert row.prj_owner is None or isinstance(row.prj_owner, int)
assert isinstance(row.access_rights, dict)

if row.prj_owner == user_id:
return AccessRights.all()

# determine user's access rights by aggregating AR of all groups
prj_access = _aggregate_access_rights(row.access_rights, user_group_ids)
return prj_access


# HELPERS -----------------------------------------------


async def get_readable_project_ids(conn: SAConnection, user_id: int) -> List[ProjectID]:
"""Returns a list of projects where user has granted read-access"""
projects_access_rights = await list_projects_access_rights(conn, int(user_id))
return [pid for pid, access in projects_access_rights.items() if access.read]
Original file line number Diff line number Diff line change
@@ -5,10 +5,11 @@
import logging
from copy import deepcopy
from typing import Any, Dict, Optional, Tuple
from uuid import UUID, uuid1, uuid5
from uuid import UUID, uuid4, uuid5

from aiohttp import web
from models_library.basic_types import UUIDStr
from models_library.projects import ProjectID
from models_library.users import UserID

from ..storage_api import copy_data_folders_from_project
@@ -19,6 +20,8 @@

NodesMap = Dict[UUIDStr, UUIDStr]

AUTO_CREATE_UUID = None


def _replace_uuids(entity: Any, project_map, nodes_map) -> Any:

@@ -47,9 +50,9 @@ def _replace_uuids(entity: Any, project_map, nodes_map) -> Any:


def _clone_project_from(
project: ProjectDict,
source_project: ProjectDict,
*,
new_project_id: Optional[UUID],
new_project_id: ProjectID,
clean_output_data: bool = False,
) -> Tuple[ProjectDict, NodesMap]:
"""
@@ -59,62 +62,60 @@ def _clone_project_from(
- w/ or w/o outputs
"""
#
# TODO: not robust to changes in project schema
# TODO: Not robust to changes in project schema
# e.g. how to guarantee these are the outputs?
# should we mark these fields?
#

project_copy = deepcopy(project)

# Update project id
# NOTE: this can be re-assigned by dbapi if not unique
if new_project_id:
assert isinstance(new_project_id, UUID) # nosec
project_copy_uuid = new_project_id
else:
project_copy_uuid = uuid1() # random project id

project_copy["uuid"] = str(project_copy_uuid)
cloned_project = deepcopy(source_project)
cloned_project["uuid"] = f"{new_project_id}"

# Workbench nodes shall be unique within the project context
def _create_new_node_uuid(old_uuid):
return str(uuid5(project_copy_uuid, str(old_uuid)))
def _create_new_node_uuid(previous_uuid):
return f"{uuid5(new_project_id, f'{previous_uuid}')}"

nodes_map = {}
for node_uuid in project.get("workbench", {}).keys():
nodes_map: NodesMap = {}
for node_uuid in source_project.get("workbench", {}).keys():
nodes_map[node_uuid] = _create_new_node_uuid(node_uuid)

project_map = {project["uuid"]: project_copy["uuid"]}
project_map = {source_project["uuid"]: cloned_project["uuid"]}

project_copy["workbench"] = _replace_uuids(
project_copy.get("workbench", {}), project_map, nodes_map
cloned_project["workbench"] = _replace_uuids(
cloned_project.get("workbench", {}), project_map, nodes_map
)
if "ui" in project_copy:
project_copy["ui"]["workbench"] = _replace_uuids(
project_copy["ui"].get("workbench", {}), project_map, nodes_map

if "ui" in cloned_project:
cloned_project["ui"]["workbench"] = _replace_uuids(
cloned_project["ui"].get("workbench", {}), project_map, nodes_map
)
project_copy["ui"]["slideshow"] = _replace_uuids(
project_copy["ui"].get("slideshow", {}), project_map, nodes_map
cloned_project["ui"]["slideshow"] = _replace_uuids(
cloned_project["ui"].get("slideshow", {}), project_map, nodes_map
)
if "mode" in project_copy["ui"]:
project_copy["ui"]["mode"] = project_copy["ui"]["mode"]
if "mode" in cloned_project["ui"]:
cloned_project["ui"]["mode"] = cloned_project["ui"]["mode"]

if clean_output_data:
FIELDS_TO_DELETE = ("outputs", "progress", "runHash")
for node_data in project_copy.get("workbench", {}).values():
for node_data in cloned_project.get("workbench", {}).values():
for field in FIELDS_TO_DELETE:
node_data.pop(field, None)

return project_copy, nodes_map
return cloned_project, nodes_map


async def copy_project(
async def clone_project(
app: web.Application,
project: ProjectDict,
user_id: UserID,
new_project_id: Optional[UUID] = None,
project: ProjectDict,
*,
new_project_id: Optional[UUID] = AUTO_CREATE_UUID,
) -> ProjectDict:
"""Clones both document and data folders of a project
"""
Let's define clone as a copy with potentially some field
updates (e.g. new identifiers, etc).
- document
Clones both project in db and its associated data-folders
- project
- get new identifiers for project and nodes
- data folders
- folder name composes as project_uuid/node_uuid
@@ -124,8 +125,15 @@ async def copy_project(
# TODO: set as invisible and set visible when copied so it can be used?
# TODO: atomic operation?

# TODO: can perform action? check whether user_id can clone project
# TODO: perform action?

db: ProjectDBAPI = app[APP_PROJECT_DBAPI]

# Update project id
new_project_id = new_project_id or uuid4()
assert isinstance(new_project_id, UUID) # nosec

# creates clone
project_copy, nodes_map = _clone_project_from(
project,
@@ -146,4 +154,6 @@ async def copy_project(
return updated_project


# def create_project( source_project: ProjectID, )
# TODO: schedule task
# TODO: long running https://google.aip.dev/151
# TODO: create_project(...)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Optional

from aiohttp.test_utils import TestClient
from models_library.projects import ProjectID
from models_library.users import UserID
from simcore_service_webserver._constants import APP_PROJECT_DBAPI
from simcore_service_webserver.projects._access_rights import (
AccessRights,
get_project_access_rights,
)
from simcore_service_webserver.projects.projects_db import ProjectDBAPI


async def test_access_rights(
client: TestClient, user_id: UserID, project_id: ProjectID
):
assert client.app

db: ProjectDBAPI = client.app[APP_PROJECT_DBAPI]

async with db.engine.acquire() as conn, conn.begin():
# access layer
can: Optional[AccessRights] = await get_project_access_rights(
conn, int(user_id), project_id
)
assert not can.read
assert not can.delete
Original file line number Diff line number Diff line change
@@ -4,12 +4,19 @@
# pylint: disable=unused-variable


import json
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict
from uuid import UUID

import jsonschema
import pytest
from jsonschema import ValidationError
from models_library.projects import Workbench
from pydantic import BaseModel
from simcore_service_webserver.projects import _create
from simcore_service_webserver.projects.project_models import ProjectDict

# HELPERS -----------------------------------------------------------------------------------------

@@ -26,8 +33,59 @@ def get(self, uuid):
# TESTS -----------------------------------------------------------------------------------------


@pytest.fixture
def project_schema(project_schema_file: Path) -> Dict[str, Any]:
with open(project_schema_file) as fh:
schema = json.load(fh)
return schema


@pytest.mark.parametrize(
"test_data_file_name",
[
"fake-project.json",
"fake-template-projects.hack08.notebooks.json",
"fake-template-projects.isan.2dplot.json",
"fake-template-projects.isan.matward.json",
"fake-template-projects.isan.paraview.json",
"fake-template-projects.isan.ucdavis.json",
"fake-template-projects.sleepers.json",
],
)
def test_clone_project_row(
test_data_file_name: str,
project_schema: Dict[str, Any],
tests_data_dir: Path,
app,
user_id,
):
original_project: ProjectDict = json.loads(
(tests_data_dir / test_data_file_name).read_text()
)

source_project: ProjectDict = deepcopy(original_project)
clone, _ = _create.clone_project(
app, user_id, source_project, new_project_id=_create.AUTO_CREATE_UUID
)

# was not modified by clone_project_document
assert source_project == original_project

# valid clone
assert clone["uuid"] != original_project["uuid"]

node_ids = original_project["workbench"].keys()
for clone_node_id in clone["workbench"]:
assert clone_node_id not in node_ids

try:
jsonschema.validate(instance=clone, schema=project_schema)
except ValidationError as err:
pytest.fail(f"Invalid clone of '{test_data_file_name}': {err.message}")


@pytest.mark.skip(reason="UNDER DEV")
def test_clone_project(project_uuid: UUID):
def test_clone_project_dev(project_uuid: UUID):
# a project in db
repo = ProjectRepo()