diff --git a/components/renku_data_services/app_config/config.py b/components/renku_data_services/app_config/config.py index 4733024eb..7e988ab2c 100644 --- a/components/renku_data_services/app_config/config.py +++ b/components/renku_data_services/app_config/config.py @@ -63,7 +63,7 @@ from renku_data_services.users.db import UserRepo as KcUserRepo from renku_data_services.users.dummy_kc_api import DummyKeycloakAPI from renku_data_services.users.kc_api import IKeycloakAPI, KeycloakAPI -from renku_data_services.users.models import UserInfo +from renku_data_services.users.models import UnsavedUserInfo from renku_data_services.utils.core import merge_api_specs, oidc_discovery default_resource_pool = models.ResourcePool( @@ -436,8 +436,8 @@ def from_env(cls, prefix: str = "") -> "Config": user_store = DummyUserStore(user_always_exists=user_always_exists) gitlab_client = DummyGitlabAPI() dummy_users = [ - UserInfo("user1", "user1", "doe", "user1@doe.com"), - UserInfo("user2", "user2", "doe", "user2@doe.com"), + UnsavedUserInfo(id="user1", first_name="user1", last_name="doe", email="user1@doe.com"), + UnsavedUserInfo(id="user2", first_name="user2", last_name="doe", email="user2@doe.com"), ] kc_api = DummyKeycloakAPI(users=[i._to_keycloak_dict() for i in dummy_users]) redis = RedisConfig.fake() diff --git a/components/renku_data_services/authz/authz.py b/components/renku_data_services/authz/authz.py index 540dc4c62..38386204c 100644 --- a/components/renku_data_services/authz/authz.py +++ b/components/renku_data_services/authz/authz.py @@ -35,7 +35,7 @@ from renku_data_services.errors import errors from renku_data_services.namespace.models import Group, GroupUpdate, Namespace, NamespaceKind, NamespaceUpdate from renku_data_services.project.models import Project, ProjectUpdate -from renku_data_services.users.models import UserWithNamespace, UserWithNamespaceUpdate +from renku_data_services.users.models import UserInfo, UserInfoUpdate _P = ParamSpec("_P") @@ -51,7 +51,7 @@ def authz(self) -> "Authz": _AuthzChangeFuncResult = TypeVar( "_AuthzChangeFuncResult", - bound=Project | ProjectUpdate | Group | UserWithNamespaceUpdate | list[UserWithNamespace] | None, + bound=Project | ProjectUpdate | Group | UserInfoUpdate | list[UserInfo] | None, ) _T = TypeVar("_T") _WithAuthz = TypeVar("_WithAuthz", bound=WithAuthz) @@ -493,14 +493,14 @@ async def _get_authz_change( case AuthzOperation.delete, ResourceType.group if result is None: # NOTE: This means that the group does not exist in the first place so nothing was deleted pass - case AuthzOperation.update_or_insert, ResourceType.user if isinstance(result, UserWithNamespaceUpdate): + case AuthzOperation.update_or_insert, ResourceType.user if isinstance(result, UserInfoUpdate): if result.old is None: authz_change = db_repo.authz._add_user_namespace(result.new.namespace) case AuthzOperation.insert_many, ResourceType.user_namespace if isinstance(result, list): for res in result: - if not isinstance(res, UserWithNamespace): + if not isinstance(res, UserInfo): raise errors.ProgrammingError( - message="Expected list of UserWithNamespace when generating authorization " + message="Expected list of UserInfo when generating authorization " f"database updates for inserting namespaces but found {type(res)}" ) authz_change.extend(db_repo.authz._add_user_namespace(res.namespace)) diff --git a/components/renku_data_services/base_models/core.py b/components/renku_data_services/base_models/core.py index 43f7bbe3a..b0143d238 100644 --- a/components/renku_data_services/base_models/core.py +++ b/components/renku_data_services/base_models/core.py @@ -4,7 +4,7 @@ import unicodedata from dataclasses import dataclass, field from enum import Enum, StrEnum -from typing import ClassVar, Optional, Protocol +from typing import ClassVar, Optional, Protocol, Self from sanic import Request @@ -122,7 +122,7 @@ def __init__(self, value: str) -> None: object.__setattr__(self, "value", value.lower()) @classmethod - def from_name(cls, name: str) -> "Slug": + def from_name(cls, name: str) -> Self: """Takes a name with any amount of invalid characters and transforms it in a valid slug.""" lower_case = name.lower() no_space = re.sub(r"\s+", "-", lower_case) @@ -139,6 +139,24 @@ def from_name(cls, name: str) -> "Slug": ) return cls(no_dot_git_or_dot_atom_at_end) + @classmethod + def from_user(cls, email: str | None, first_name: str | None, last_name: str | None, keycloak_id: str) -> Self: + """Create a slug from a user.""" + if email: + slug = email.split("@")[0] + elif first_name and last_name: + slug = first_name + "-" + last_name + elif last_name: + slug = last_name + elif first_name: + slug = first_name + else: + slug = "user_" + keycloak_id + # The length limit is 99 but leave some space for modifications that may be added down the line + # to filter out invalid characters or to generate a unique name + slug = slug[:80] + return cls.from_name(slug) + def __true_div__(self, other: "Slug") -> str: """Joins two slugs into a path fraction without dashes at the beginning or end.""" if type(self) is not type(other): diff --git a/components/renku_data_services/crc/blueprints.py b/components/renku_data_services/crc/blueprints.py index 434eea5bd..c71da7490 100644 --- a/components/renku_data_services/crc/blueprints.py +++ b/components/renku_data_services/crc/blueprints.py @@ -16,7 +16,7 @@ from renku_data_services.crc.db import ResourcePoolRepository, UserRepository from renku_data_services.k8s.quota import QuotaRepository from renku_data_services.users.db import UserRepo as KcUserRepo -from renku_data_services.users.models import UserWithNamespace +from renku_data_services.users.models import UserInfo @dataclass(kw_only=True) @@ -183,10 +183,10 @@ async def _put_post( self, api_user: base_models.APIUser, resource_pool_id: int, body: apispec.PoolUsersWithId, post: bool = True ) -> HTTPResponse: user_ids_to_add = set([user.id for user in body.root]) - users_checks: list[UserWithNamespace | None] = await asyncio.gather( + users_checks: list[UserInfo | None] = await asyncio.gather( *[self.kc_user_repo.get_user(id=id) for id in user_ids_to_add] ) - existing_user_ids = set([user.user.id for user in users_checks if user is not None]) + existing_user_ids = set([user.id for user in users_checks if user is not None]) if existing_user_ids != user_ids_to_add: missing_ids = user_ids_to_add.difference(existing_user_ids) raise errors.MissingResourceError(message=f"The users with IDs {missing_ids} cannot be found") diff --git a/components/renku_data_services/message_queue/converters.py b/components/renku_data_services/message_queue/converters.py index 4dda27170..3f3dc7dbb 100644 --- a/components/renku_data_services/message_queue/converters.py +++ b/components/renku_data_services/message_queue/converters.py @@ -94,38 +94,38 @@ def to_events( class _UserEventConverter: @staticmethod def to_events( - user: user_models.UserInfo | user_models.UserWithNamespace | user_models.UserWithNamespaceUpdate, + user: user_models.UserInfo | user_models.UserInfoUpdate | str, event_type: type[AvroModel] | type[events.AmbiguousEvent], ) -> list[Event]: match event_type: case v2.UserAdded | events.InsertUserNamespace: - user = cast(user_models.UserWithNamespace, user) + user = cast(user_models.UserInfo, user) return [ _make_event( "user.added", v2.UserAdded( - id=user.user.id, - firstName=user.user.first_name, - lastName=user.user.last_name, - email=user.user.email, + id=user.id, + firstName=user.first_name, + lastName=user.last_name, + email=user.email, namespace=user.namespace.slug, ), ) ] case v2.UserRemoved: - user = cast(user_models.UserInfo, user) - return [_make_event("user.removed", v2.UserRemoved(id=user.id))] + user_id = cast(str, user) + return [_make_event("user.removed", v2.UserRemoved(id=user_id))] case events.UpdateOrInsertUser: - user = cast(user_models.UserWithNamespaceUpdate, user) + user = cast(user_models.UserInfoUpdate, user) if user.old is None: return [ _make_event( "user.added", v2.UserAdded( - id=user.new.user.id, - firstName=user.new.user.first_name, - lastName=user.new.user.last_name, - email=user.new.user.email, + id=user.new.id, + firstName=user.new.first_name, + lastName=user.new.last_name, + email=user.new.email, namespace=user.new.namespace.slug, ), ) @@ -135,10 +135,10 @@ def to_events( _make_event( "user.updated", v2.UserUpdated( - id=user.new.user.id, - firstName=user.new.user.first_name, - lastName=user.new.user.last_name, - email=user.new.user.email, + id=user.new.id, + firstName=user.new.first_name, + lastName=user.new.last_name, + email=user.new.email, namespace=user.new.namespace.slug, ), ) @@ -328,16 +328,16 @@ def to_events(input: _T, event_type: type[AvroModel] | type[events.AmbiguousEven group_authz = cast(list[authz_models.MembershipChange], input) return _GroupAuthzEventConverter.to_events(group_authz) case v2.UserAdded: - user_with_namespace = cast(user_models.UserWithNamespace, input) + user_with_namespace = cast(user_models.UserInfo, input) return _UserEventConverter.to_events(user_with_namespace, event_type) case v2.UserRemoved: - user_info = cast(user_models.UserInfo, input) + user_info = cast(str, input) return _UserEventConverter.to_events(user_info, event_type) case events.UpdateOrInsertUser: - user_with_namespace_update = cast(user_models.UserWithNamespaceUpdate, input) + user_with_namespace_update = cast(user_models.UserInfoUpdate, input) return _UserEventConverter.to_events(user_with_namespace_update, event_type) case events.InsertUserNamespace: - user_namespaces = cast(list[user_models.UserWithNamespace], input) + user_namespaces = cast(list[user_models.UserInfo], input) output: list[Event] = [] for namespace in user_namespaces: output.extend(_UserEventConverter.to_events(namespace, event_type)) diff --git a/components/renku_data_services/namespace/db.py b/components/renku_data_services/namespace/db.py index 8f1d36565..07c725a3f 100644 --- a/components/renku_data_services/namespace/db.py +++ b/components/renku_data_services/namespace/db.py @@ -49,25 +49,27 @@ def __init__( @with_db_transaction @Authz.authz_change(AuthzOperation.insert_many, ResourceType.user_namespace) @dispatch_message(events.InsertUserNamespace) - async def generate_user_namespaces( - self, *, session: AsyncSession | None = None - ) -> list[user_models.UserWithNamespace]: + async def generate_user_namespaces(self, *, session: AsyncSession | None = None) -> list[user_models.UserInfo]: """Generate user namespaces if the user table has data and the namespaces table is empty.""" if not session: raise errors.ProgrammingError(message="A database session is required") # NOTE: lock to make sure another instance of the data service cannot insert/update but can read - output: list[user_models.UserWithNamespace] = [] + output: list[user_models.UserInfo] = [] await session.execute(text("LOCK TABLE common.namespaces IN EXCLUSIVE MODE")) at_least_one_namespace = (await session.execute(select(schemas.NamespaceORM).limit(1))).one_or_none() if at_least_one_namespace: logger.info("Found at least one user namespace, skipping creation") - return output + return [] logger.info("Found zero user namespaces, will try to create them from users table") res = await session.scalars(select(user_schemas.UserORM)) for user in res: - ns = await self._insert_user_namespace(session, user, retry_enumerate=10, retry_random=True) + slug = base_models.Slug.from_user(user.email, user.first_name, user.last_name, user.keycloak_id) + ns = await self._insert_user_namespace( + session, user.keycloak_id, slug.value, retry_enumerate=10, retry_random=True + ) logger.info(f"Creating user namespace {ns}") - output.append(user_models.UserWithNamespace(user.dump(), ns)) + user.namespace = ns + output.append(user.dump()) logger.info(f"Created {len(output)} user namespaces") return output @@ -329,7 +331,7 @@ async def get_namespaces( output.append(ns_orm.dump()) return output, group_count - async def _get_user_namespaces(self) -> AsyncGenerator[user_models.UserWithNamespace, None]: + async def _get_user_namespaces(self) -> AsyncGenerator[user_models.UserInfo, None]: """Lists all user namespaces without regard for authorization or permissions, used for migrations.""" async with self.session_maker() as session, session.begin(): namespaces = await session.stream_scalars( @@ -379,39 +381,37 @@ async def get_user_namespace(self, user_id: str) -> models.Namespace | None: raise errors.ProgrammingError(message="Found a namespace that has no user associated with it.") return ns.dump() + async def _create_user_namespace_slug( + self, session: AsyncSession, user_slug: str, retry_enumerate: int = 0, retry_random: bool = False + ) -> str: + """Create a valid namespace slug for a user.""" + nss = await session.scalars( + select(schemas.NamespaceORM.slug).where(schemas.NamespaceORM.slug.startswith(user_slug)) + ) + nslist = nss.all() + if user_slug not in nslist: + return user_slug + if retry_enumerate: + for inc in range(1, retry_enumerate + 1): + slug = f"{user_slug}-{inc}" + if slug not in nslist: + return slug + if retry_random: + suffix = "".join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)]) # nosec B311 + slug = f"{user_slug}-{suffix}" + if slug not in nslist: + return slug + + raise errors.ValidationError(message=f"Cannot create generate a unique namespace slug for the user {user_slug}") + async def _insert_user_namespace( - self, session: AsyncSession, user: schemas.UserORM, retry_enumerate: int = 0, retry_random: bool = False - ) -> models.Namespace: + self, session: AsyncSession, user_id: str, user_slug: str, retry_enumerate: int = 0, retry_random: bool = False + ) -> schemas.NamespaceORM: """Insert a new namespace for the user and optionally retry different variations to avoid collisions.""" - original_slug = user.to_slug() - for inc in range(0, retry_enumerate + 1): - # NOTE: on iteration 0 we try with the optimal slug value derived from the user data without any suffix. - suffix = "" - if inc > 0: - suffix = f"-{inc}" - slug = base_models.Slug.from_name(original_slug.value.lower() + suffix) - ns = schemas.NamespaceORM(slug.value, user_id=user.keycloak_id) - try: - async with session.begin_nested(): - session.add(ns) - await session.flush() - except IntegrityError: - if retry_enumerate == 0: - raise errors.ValidationError(message=f"The user namespace slug {slug.value} already exists") - continue - else: - await session.refresh(ns) - return ns.dump() - if not retry_random: - raise errors.ValidationError( - message=f"Cannot create generate a unique namespace slug for the user with ID {user.keycloak_id}" - ) - # NOTE: At this point the attempts to generate unique ID have ended and the only option is - # to add a small random suffix to avoid uniqueness constraints problems - suffix = "-" + "".join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)]) # nosec: B311 - slug = base_models.Slug.from_name(original_slug.value.lower() + suffix) - ns = schemas.NamespaceORM(slug.value, user_id=user.keycloak_id) + namespace = await self._create_user_namespace_slug(session, user_slug, retry_enumerate, retry_random) + slug = base_models.Slug.from_name(namespace) + ns = schemas.NamespaceORM(slug.value, user_id=user_id) session.add(ns) await session.flush() await session.refresh(ns) - return ns.dump() + return ns diff --git a/components/renku_data_services/namespace/orm.py b/components/renku_data_services/namespace/orm.py index 741ba5a66..abc85a74c 100644 --- a/components/renku_data_services/namespace/orm.py +++ b/components/renku_data_services/namespace/orm.py @@ -1,7 +1,7 @@ """SQLAlchemy's schemas for the group database.""" from datetime import datetime -from typing import Optional +from typing import Optional, Self, cast from sqlalchemy import CheckConstraint, DateTime, MetaData, String, func from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship @@ -11,7 +11,7 @@ from renku_data_services.base_orm.registry import COMMON_ORM_REGISTRY from renku_data_services.errors import errors from renku_data_services.namespace import models -from renku_data_services.users.models import UserInfo, UserWithNamespace +from renku_data_services.users.models import UserInfo from renku_data_services.users.orm import UserORM from renku_data_services.utils.sqlalchemy import ULIDType @@ -103,7 +103,7 @@ def dump(self) -> models.Namespace: name=name, ) - def dump_user(self) -> UserWithNamespace: + def dump_user(self) -> UserInfo: """Create a user with namespace from the ORM.""" if self.user is None: raise errors.ProgrammingError( @@ -111,8 +111,25 @@ def dump_user(self) -> UserWithNamespace: "has no associated user with it." ) ns = self.dump() - user_info = UserInfo(self.user.keycloak_id, self.user.first_name, self.user.last_name, self.user.email) - return UserWithNamespace(user_info, ns) + user_info = UserInfo( + id=self.user.keycloak_id, + first_name=self.user.first_name, + last_name=self.user.last_name, + email=self.user.email, + namespace=ns, + ) + return user_info + + @classmethod + def load(cls, ns: models.Namespace) -> Self: + """Create an ORM object from the user object.""" + match ns.kind: + case models.NamespaceKind.group: + return cls(slug=ns.slug, group_id=cast(ULID, ns.underlying_resource_id)) + case models.NamespaceKind.user: + return cls(slug=ns.slug, user_id=cast(str, ns.underlying_resource_id)) + + raise errors.ValidationError(message=f"Unknown namespace kind {ns.kind}") class NamespaceOldORM(BaseORM): diff --git a/components/renku_data_services/project/blueprints.py b/components/renku_data_services/project/blueprints.py index 606f1ba9e..e9ccb540f 100644 --- a/components/renku_data_services/project/blueprints.py +++ b/components/renku_data_services/project/blueprints.py @@ -236,11 +236,10 @@ async def _get_all_members(_: Request, user: base_models.APIUser, project_id: st for member in members: user_id = member.user_id - user_with_namespace = await self.user_repo.get_user(id=user_id) - if not user_with_namespace: + user_info = await self.user_repo.get_user(id=user_id) + if not user_info: raise errors.MissingResourceError(message=f"The user with ID {user_id} cannot be found.") - user_info = user_with_namespace.user - namespace_info = user_with_namespace.namespace + namespace_info = user_info.namespace user_with_id = apispec.ProjectMemberResponse( id=user_id, diff --git a/components/renku_data_services/users/blueprints.py b/components/renku_data_services/users/blueprints.py index 7ad08895f..a768f2586 100644 --- a/components/renku_data_services/users/blueprints.py +++ b/components/renku_data_services/users/blueprints.py @@ -39,11 +39,11 @@ async def _get_all(request: Request, user: base_models.APIUser, query: apispec.U apispec.UsersWithId, [ dict( - id=user.user.id, + id=user.id, username=user.namespace.slug, - email=user.user.email, - first_name=user.user.first_name, - last_name=user.user.last_name, + email=user.email, + first_name=user.first_name, + last_name=user.last_name, ) for user in users ], @@ -65,11 +65,11 @@ async def _get_self(_: Request, user: base_models.APIUser) -> JSONResponse: return validated_json( apispec.UserWithId, dict( - id=user_info.user.id, + id=user_info.id, username=user_info.namespace.slug, - email=user_info.user.email, - first_name=user_info.user.first_name, - last_name=user_info.user.last_name, + email=user_info.email, + first_name=user_info.first_name, + last_name=user_info.last_name, ), ) @@ -99,11 +99,11 @@ async def _get_one(_: Request, user: base_models.APIUser, user_id: str) -> JSONR return validated_json( apispec.UserWithId, dict( - id=user_info.user.id, + id=user_info.id, username=user_info.namespace.slug, - email=user_info.user.email, - first_name=user_info.user.first_name, - last_name=user_info.user.last_name, + email=user_info.email, + first_name=user_info.first_name, + last_name=user_info.last_name, ), ) diff --git a/components/renku_data_services/users/db.py b/components/renku_data_services/users/db.py index 5817c9bb7..3f0586be5 100644 --- a/components/renku_data_services/users/db.py +++ b/components/renku_data_services/users/db.py @@ -9,8 +9,8 @@ from sanic.log import logger from sqlalchemy import delete, func, select from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload +from renku_data_services import base_models from renku_data_services.authz.authz import Authz, AuthzOperation, ResourceType from renku_data_services.base_api.auth import APIUser, only_authenticated from renku_data_services.errors import errors @@ -20,16 +20,16 @@ from renku_data_services.message_queue.interface import IMessageQueue from renku_data_services.message_queue.redis_queue import dispatch_message from renku_data_services.namespace.db import GroupRepository +from renku_data_services.namespace.orm import NamespaceORM from renku_data_services.users.config import UserPreferencesConfig from renku_data_services.users.kc_api import IKeycloakAPI from renku_data_services.users.models import ( KeycloakAdminEvent, PinnedProjects, UserInfo, + UserInfoFieldUpdate, UserInfoUpdate, UserPreferences, - UserWithNamespace, - UserWithNamespaceUpdate, ) from renku_data_services.users.orm import LastKeycloakEventTimestamp, UserORM, UserPreferencesORM from renku_data_services.utils.core import with_db_transaction @@ -59,7 +59,7 @@ async def initialize(self, kc_api: IKeycloakAPI) -> None: return await self._users_sync.users_sync(kc_api) - async def _add_api_user(self, user: APIUser) -> UserWithNamespace: + async def _add_api_user(self, user: APIUser) -> UserInfo: if not user.id: raise errors.UnauthorizedError(message="The user has to be authenticated to be inserted in the DB.") result = await self._users_sync.update_or_insert_user( @@ -72,12 +72,10 @@ async def _add_api_user(self, user: APIUser) -> UserWithNamespace: ) return result.new - async def get_user(self, id: str) -> UserWithNamespace | None: + async def get_user(self, id: str) -> UserInfo | None: """Get a specific user from the database.""" async with self.session_maker() as session: - result = await session.scalars( - select(UserORM).where(UserORM.keycloak_id == id).options(selectinload(UserORM.namespace)) - ) + result = await session.scalars(select(UserORM).where(UserORM.keycloak_id == id)) user = result.one_or_none() if user is None: return None @@ -85,7 +83,7 @@ async def get_user(self, id: str) -> UserWithNamespace | None: raise errors.ProgrammingError(message=f"Cannot find a user namespace for user {id}.") return user.namespace.dump_user() - async def get_or_create_user(self, requested_by: APIUser, id: str) -> UserWithNamespace | None: + async def get_or_create_user(self, requested_by: APIUser, id: str) -> UserInfo | None: """Get a specific user from the database and create it potentially if it does not exist. If the caller is the same user that is being retrieved and they are authenticated and @@ -99,25 +97,24 @@ async def get_or_create_user(self, requested_by: APIUser, id: str) -> UserWithNa return user @only_authenticated - async def get_users(self, requested_by: APIUser, email: str | None = None) -> list[UserWithNamespace]: + async def get_users(self, requested_by: APIUser, email: str | None = None) -> list[UserInfo]: """Get users from the database.""" if not email and not requested_by.is_admin: raise errors.ForbiddenError(message="Non-admin users cannot list all users.") users = await self._get_users(email) - is_api_user_missing = not any([requested_by.id == user.user.id for user in users]) + is_api_user_missing = not any([requested_by.id == user.id for user in users]) if not email and is_api_user_missing: api_user_info = await self._add_api_user(requested_by) users.append(api_user_info) return users - async def _get_users(self, email: str | None = None) -> list[UserWithNamespace]: + async def _get_users(self, email: str | None = None) -> list[UserInfo]: async with self.session_maker() as session: stmt = select(UserORM) if email: stmt = stmt.where(UserORM.email == email) - stmt = stmt.options(selectinload(UserORM.namespace)) result = await session.scalars(stmt) users = result.all() @@ -125,7 +122,7 @@ async def _get_users(self, email: str | None = None) -> list[UserWithNamespace]: if user.namespace is None: raise errors.ProgrammingError(message=f"Cannot find a user namespace for user {id}.") - return [user.namespace.dump_user() for user in users if user.namespace is not None] + return [user.dump() for user in users if user.namespace is not None] @only_authenticated async def get_or_create_user_secret_key(self, requested_by: APIUser) -> str: @@ -176,7 +173,7 @@ async def _get_user(self, id: str) -> UserInfo | None: @dispatch_message(events.UpdateOrInsertUser) async def update_or_insert_user( self, user_id: str, payload: dict[str, Any], *, session: AsyncSession | None = None - ) -> UserWithNamespaceUpdate: + ) -> UserInfoUpdate: """Update a user or insert it if it does not exist.""" if not session: raise errors.ProgrammingError(message="A database session is required") @@ -187,21 +184,26 @@ async def update_or_insert_user( else: return await self._insert_user(session=session, user_id=user_id, **payload) - async def _insert_user(self, session: AsyncSession, user_id: str, **kwargs: Any) -> UserWithNamespaceUpdate: + async def _insert_user(self, session: AsyncSession, user_id: str, **kwargs: Any) -> UserInfoUpdate: """Insert a user.""" kwargs.pop("keycloak_id", None) kwargs.pop("id", None) - new_user = UserORM(keycloak_id=user_id, **kwargs) + slug = base_models.Slug.from_user( + kwargs.get("email"), kwargs.get("first_name"), kwargs.get("last_name"), user_id + ).value + namespace = await self.group_repo._create_user_namespace_slug( + session, user_slug=slug, retry_enumerate=5, retry_random=True + ) + slug = base_models.Slug.from_name(namespace) + new_user = UserORM(keycloak_id=user_id, namespace=NamespaceORM(slug=slug.value, user_id=user_id), **kwargs) + new_user.namespace.user = new_user session.add(new_user) await session.flush() - namespace = await self.group_repo._insert_user_namespace( - session, new_user, retry_enumerate=5, retry_random=True - ) - return UserWithNamespaceUpdate(None, UserWithNamespace(new_user.dump(), namespace)) + return UserInfoUpdate(None, new_user.dump()) async def _update_user( self, session: AsyncSession, user_id: str, existing_user: UserORM | None, **kwargs: Any - ) -> UserWithNamespaceUpdate: + ) -> UserInfoUpdate: """Update a user.""" if not existing_user: async with self.session_maker() as session, session.begin(): @@ -222,13 +224,11 @@ async def _update_user( raise errors.ProgrammingError( message=f"Cannot find a user namespace for user {user_id} when updating the user." ) - return UserWithNamespaceUpdate( - UserWithNamespace(old_user, namespace), UserWithNamespace(existing_user.dump(), namespace) - ) + return UserInfoUpdate(old_user, existing_user.dump()) @with_db_transaction @dispatch_message(avro_schema_v2.UserRemoved) - async def _remove_user(self, user_id: str, *, session: AsyncSession | None = None) -> UserInfo | None: + async def _remove_user(self, user_id: str, *, session: AsyncSession | None = None) -> str | None: """Remove a user from the database.""" if not session: raise errors.ProgrammingError(message="A database session is required") @@ -240,9 +240,8 @@ async def _remove_user(self, user_id: str, *, session: AsyncSession | None = Non logger.info(f"User with ID {user_id} was not found.") return None logger.info(f"User with ID {user_id} was removed from the database.") - removed_user = user.dump() logger.info(f"User namespace with ID {user_id} was removed from the authorization database.") - return removed_user + return user_id async def users_sync(self, kc_api: IKeycloakAPI) -> None: """Sync all users from Keycloak into the users database.""" @@ -286,9 +285,9 @@ async def events_sync(self, kc_api: IKeycloakAPI) -> None: delete_admin_events = kc_api.get_admin_events( start_date=start_date, event_types=[KeycloakAdminEvent.DELETE] ) - parsed_updates = UserInfoUpdate.from_json_admin_events(update_admin_events) - parsed_updates.extend(UserInfoUpdate.from_json_user_events(user_events)) - parsed_deletions = UserInfoUpdate.from_json_admin_events(delete_admin_events) + parsed_updates = UserInfoFieldUpdate.from_json_admin_events(update_admin_events) + parsed_updates.extend(UserInfoFieldUpdate.from_json_user_events(user_events)) + parsed_deletions = UserInfoFieldUpdate.from_json_admin_events(delete_admin_events) parsed_updates = sorted(parsed_updates, key=lambda x: x.timestamp_utc) parsed_deletions = sorted(parsed_deletions, key=lambda x: x.timestamp_utc) if previous_sync_latest_utc_timestamp is not None: diff --git a/components/renku_data_services/users/models.py b/components/renku_data_services/users/models.py index 7752bd481..849a378ef 100644 --- a/components/renku_data_services/users/models.py +++ b/components/renku_data_services/users/models.py @@ -30,7 +30,7 @@ class KeycloakAdminEvent(Enum): @dataclass -class UserInfoUpdate: +class UserInfoFieldUpdate: """An update of a specific field of user information.""" user_id: str @@ -40,9 +40,9 @@ class UserInfoUpdate: old_value: str | None = None @classmethod - def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfoUpdate"]: + def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfoFieldUpdate"]: """Generate a list of updates from a json response from Keycloak.""" - output: list[UserInfoUpdate] = [] + output: list[UserInfoFieldUpdate] = [] for event in val: details = event.get("details") user_id = event.get("userId") @@ -64,7 +64,7 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo email = details.get("email") if email: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="email", new_value=email, timestamp_utc=timestamp_utc, @@ -73,7 +73,7 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo ) if first_name: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="first_name", new_value=first_name, timestamp_utc=timestamp_utc, @@ -82,7 +82,7 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo ) if last_name: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="last_name", new_value=last_name, timestamp_utc=timestamp_utc, @@ -96,7 +96,7 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo if first_name: old_value = details.get("previous_first_name") output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="first_name", new_value=first_name, old_value=old_value, @@ -107,7 +107,7 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo if last_name: old_value = details.get("previous_last_name") output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="last_name", new_value=last_name, old_value=old_value, @@ -118,7 +118,7 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo if email: old_value = details.get("previous_email") output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="email", new_value=email, old_value=old_value, @@ -131,9 +131,9 @@ def from_json_user_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfo return output @classmethod - def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfoUpdate"]: + def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInfoFieldUpdate"]: """Generate a list of updates from a json response from Keycloak.""" - output: list[UserInfoUpdate] = [] + output: list[UserInfoFieldUpdate] = [] for event in val: timestamp_epoch = event.get("time") if not timestamp_epoch: @@ -165,7 +165,7 @@ def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInf first_name = payload.get("firstName") if first_name: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="first_name", new_value=first_name, timestamp_utc=timestamp_utc, @@ -175,7 +175,7 @@ def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInf last_name = payload.get("lastName") if last_name: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="last_name", new_value=last_name, timestamp_utc=timestamp_utc, @@ -185,7 +185,7 @@ def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInf email = payload.get("email") if email: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="email", new_value=email, timestamp_utc=timestamp_utc, @@ -194,7 +194,7 @@ def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInf ) case KeycloakAdminEvent.DELETE.value: output.append( - UserInfoUpdate( + UserInfoFieldUpdate( field_name="email", new_value="", timestamp_utc=timestamp_utc, @@ -206,8 +206,8 @@ def from_json_admin_events(self, val: Iterable[dict[str, Any]]) -> list["UserInf return output -@dataclass(eq=True, frozen=True) -class UserInfo: +@dataclass(eq=True, frozen=True, kw_only=True) +class UnsavedUserInfo: """Keycloak user.""" id: str @@ -216,9 +216,9 @@ class UserInfo: email: str | None = None @classmethod - def from_kc_user_payload(self, payload: dict[str, Any]) -> "UserInfo": + def from_kc_user_payload(cls, payload: dict[str, Any]) -> "UnsavedUserInfo": """Create a user object from the user payload from the Keycloak admin API.""" - return UserInfo( + return UnsavedUserInfo( id=payload["id"], first_name=payload.get("firstName"), last_name=payload.get("lastName"), @@ -253,18 +253,18 @@ def _to_keycloak_dict(self) -> dict[str, Any]: } -class UserWithNamespace(NamedTuple): +@dataclass(eq=True, frozen=True, kw_only=True) +class UserInfo(UnsavedUserInfo): """A tuple used to convey information about a user and their namespace.""" - user: UserInfo namespace: Namespace -class UserWithNamespaceUpdate(NamedTuple): +class UserInfoUpdate(NamedTuple): """Used to convey information about an update of a user or their namespace.""" - old: UserWithNamespace | None - new: UserWithNamespace + old: UserInfo | None + new: UserInfo class PinnedProjects(BaseModel): diff --git a/components/renku_data_services/users/orm.py b/components/renku_data_services/users/orm.py index 73f00a4d5..366943a8c 100644 --- a/components/renku_data_services/users/orm.py +++ b/components/renku_data_services/users/orm.py @@ -7,7 +7,6 @@ from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship -from renku_data_services.base_models import Slug from renku_data_services.base_orm.registry import COMMON_ORM_REGISTRY from renku_data_services.users.models import PinnedProjects, UserInfo, UserPreferences @@ -29,14 +28,12 @@ class UserORM(BaseORM): __tablename__ = "users" keycloak_id: Mapped[str] = mapped_column(String(36), unique=True, index=True) + namespace: Mapped["NamespaceORM"] = relationship(repr=False, back_populates="user", lazy="selectin") first_name: Mapped[Optional[str]] = mapped_column(String(256), default=None) last_name: Mapped[Optional[str]] = mapped_column(String(256), default=None) email: Mapped[Optional[str]] = mapped_column(String(320), default=None, index=True) secret_key: Mapped[Optional[bytes]] = mapped_column(LargeBinary(), default=None, repr=False) id: Mapped[int] = mapped_column(primary_key=True, init=False) - namespace: Mapped["NamespaceORM | None"] = relationship( - init=False, repr=False, viewonly=True, back_populates="user" - ) def dump(self) -> UserInfo: """Create a user object from the ORM object.""" @@ -45,6 +42,7 @@ def dump(self) -> UserInfo: first_name=self.first_name, last_name=self.last_name, email=self.email, + namespace=self.namespace.dump(), ) @classmethod @@ -55,25 +53,9 @@ def load(cls, user: UserInfo) -> "UserORM": first_name=user.first_name, last_name=user.last_name, email=user.email, + namespace=NamespaceORM.load(user.namespace), ) - def to_slug(self) -> Slug: - """Convert the User ORM object to a namespace slug.""" - if self.email: - slug = self.email.split("@")[0] - elif self.first_name and self.last_name: - slug = self.first_name + "-" + self.last_name - elif self.last_name: - slug = self.last_name - elif self.first_name: - slug = self.first_name - else: - slug = "user_" + self.keycloak_id - # The length limit is 99 but leave some space for modifications that may be added down the line - # to filter out invalid characters or to generate a unique name - slug = slug[:80] - return Slug.from_name(slug) - class LastKeycloakEventTimestamp(BaseORM): """The latest event timestamp processed from Keycloak.""" diff --git a/test/bases/renku_data_services/background_jobs/test_sync.py b/test/bases/renku_data_services/background_jobs/test_sync.py index e49d54c57..35fb48f39 100644 --- a/test/bases/renku_data_services/background_jobs/test_sync.py +++ b/test/bases/renku_data_services/background_jobs/test_sync.py @@ -15,6 +15,7 @@ RelationshipFilter, WriteRelationshipsRequest, ) +from ulid import ULID from bases.renku_data_services.background_jobs.config import SyncConfig from renku_data_services.authz.admin_sync import sync_admins_from_keycloak @@ -39,12 +40,13 @@ GroupPostRequest, ) from renku_data_services.namespace.db import GroupRepository +from renku_data_services.namespace.models import Namespace, NamespaceKind from renku_data_services.namespace.orm import NamespaceORM from renku_data_services.project.db import ProjectRepository from renku_data_services.project.models import UnsavedProject from renku_data_services.users.db import UserRepo, UsersSync from renku_data_services.users.dummy_kc_api import DummyKeycloakAPI -from renku_data_services.users.models import KeycloakAdminEvent, UserInfo, UserInfoUpdate +from renku_data_services.users.models import KeycloakAdminEvent, UnsavedUserInfo, UserInfo, UserInfoFieldUpdate from renku_data_services.users.orm import UserORM @@ -109,7 +111,7 @@ def get_kc_users(updates: list[UserInfo]) -> list[dict[str, Any]]: return output -def get_kc_user_update_events(updates: list[UserInfoUpdate]) -> list[dict[str, Any]]: +def get_kc_user_update_events(updates: list[UserInfoFieldUpdate]) -> list[dict[str, Any]]: output: list[dict[str, Any]] = [] for update in updates: output.append( @@ -206,14 +208,45 @@ def get_kc_roles(role_names: list[str]) -> dict[str, list[dict[str, Union[bool, async def test_total_users_sync( get_app_configs: Callable[..., tuple[SyncConfig, UserRepo]], admin_user: APIUser ) -> None: - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") - user2 = UserInfo("user-2-id", "Jane", "Doe", "jane.doe@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id="user-1-id", + slug="user-1", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + created_by="user-1-id", + ), + ) + user2 = UserInfo( + id="user-2-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id="user-2-id", + slug="user-2", + kind=NamespaceKind.user, + underlying_resource_id="user-2-id", + created_by="user-2-id", + ), + ) assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=admin_user.id, + slug="admin", + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + created_by=admin_user.id, + ), ) user_roles = {admin_user.id: get_kc_roles(["renku-admin"])} kc_api = DummyKeycloakAPI(users=get_kc_users([user1, user2, admin_user_info]), user_roles=user_roles) @@ -223,24 +256,22 @@ async def test_total_users_sync( db_users = await user_repo.get_users(admin_user) kc_users = [UserInfo.from_kc_user_payload(user) for user in sync_config.kc_api.get_users()] kc_users.append( - UserInfo( + UnsavedUserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, ) ) - assert set(kc_users) == {user1, user2, admin_user_info} + assert set(u.id for u in kc_users) == set([user1.id, user2.id, admin_user_info.id]) assert len(db_users) == 1 # listing users add the requesting user if not present await sync_config.syncer.users_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) # Make sure doing users sync again does not change anything and works await sync_config.syncer.users_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) # Make sure that the addition of the users resulted in the creation of namespaces nss, _ = await sync_config.syncer.group_repo.get_namespaces( user=APIUser(id=user1.id), pagination=PaginationRequest(1, 100) @@ -259,13 +290,32 @@ async def test_total_users_sync( @pytest.mark.asyncio async def test_user_events_update(get_app_configs, admin_user: APIUser) -> None: kc_api = DummyKeycloakAPI() - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), ) kc_api.users = get_kc_users([user1]) sync_config: SyncConfig @@ -273,29 +323,38 @@ async def test_user_events_update(get_app_configs, admin_user: APIUser) -> None: sync_config, user_repo = get_app_configs(kc_api) db_users = await user_repo.get_users(admin_user) kc_users = [UserInfo.from_kc_user_payload(user) for user in sync_config.kc_api.get_users()] - assert set(kc_users) == {user1} + assert set(u.id for u in kc_users) == {user1.id} assert len(db_users) == 1 # listing users add the requesting user if not present await sync_config.syncer.users_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] kc_users.append(admin_user_info) - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) # Add update and create events - user2 = UserInfo("user-2-id", "Jane", "Doe", "jane.doe@gmail.com") - user1_update = UserInfoUpdate("user-1-id", datetime.utcnow(), "first_name", "Johnathan") + user2 = UserInfo( + id="user-2-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="jane.doe", + created_by="user-2-id", + kind=NamespaceKind.user, + underlying_resource_id="user-2-id", + ), + ) + user1_update = UserInfoFieldUpdate("user-1-id", datetime.utcnow(), "first_name", "Johnathan") user1_updated = UserInfo(**{**asdict(user1), "first_name": "Johnathan"}) kc_api.user_events = get_kc_user_create_events([user2]) + get_kc_user_update_events([user1_update]) # Process events and check if updates show up await sync_config.syncer.events_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(db_users) == {user1_updated, user2, admin_user_info} + assert set(u.id for u in db_users) == set(u.id for u in [user1_updated, user2, admin_user_info]) # Ensure re-processing events does not break anything kc_api.user_events = get_kc_user_create_events([user2]) + get_kc_user_update_events([user1_update]) await sync_config.syncer.events_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(db_users) == {user1_updated, user2, admin_user_info} + assert set(u.id for u in db_users) == set(u.id for u in [user1_updated, user2, admin_user_info]) # Make sure that the addition of the user resulted in the creation of namespaces nss, _ = await sync_config.syncer.group_repo.get_namespaces( user=APIUser(id=user2.id), pagination=PaginationRequest(1, 100) @@ -308,14 +367,45 @@ async def test_user_events_update(get_app_configs, admin_user: APIUser) -> None: @pytest.mark.asyncio async def test_admin_events(get_app_configs, admin_user: APIUser) -> None: kc_api = DummyKeycloakAPI() - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") - user2 = UserInfo("user-2-id", "Jane", "Doe", "jane.doe@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) + user2 = UserInfo( + id="user-2-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="jane.doe", + created_by="user-2-id", + kind=NamespaceKind.user, + underlying_resource_id="user-2-id", + ), + ) assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), ) kc_api.users = get_kc_users([user1, user2, admin_user_info]) sync_config: SyncConfig @@ -323,7 +413,7 @@ async def test_admin_events(get_app_configs, admin_user: APIUser) -> None: sync_config, user_repo = get_app_configs(kc_api) db_users = await user_repo.get_users(admin_user) kc_users = [UserInfo.from_kc_user_payload(user) for user in sync_config.kc_api.get_users()] - assert set(kc_users) == {user1, user2, admin_user_info} + assert set(u.id for u in kc_users) == set(u.id for u in [user1, user2, admin_user_info]) assert len(db_users) == 1 # listing users add the requesting user if not present await sync_config.syncer.users_sync(kc_api) # Make sure that the addition of the users resulted in the creation of namespaces @@ -334,8 +424,7 @@ async def test_admin_events(get_app_configs, admin_user: APIUser) -> None: assert user2.email assert nss[0].slug == user2.email.split("@")[0] db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) # Add admin events user1_updated = UserInfo(**{**asdict(user1), "last_name": "Renku"}) kc_api.admin_events = get_kc_admin_events( @@ -344,8 +433,7 @@ async def test_admin_events(get_app_configs, admin_user: APIUser) -> None: # Process admin events await sync_config.syncer.events_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert {user1_updated, admin_user_info} == set(db_users) + assert set(u.id for u in [user1_updated, admin_user_info]) == set(u.id for u in db_users) # Make sure that the removal of a user removes the namespace nss, _ = await sync_config.syncer.group_repo.get_namespaces( user=APIUser(id=user2.id), pagination=PaginationRequest(1, 100) @@ -356,14 +444,45 @@ async def test_admin_events(get_app_configs, admin_user: APIUser) -> None: @pytest.mark.asyncio async def test_events_update_error(get_app_configs, admin_user: APIUser) -> None: kc_api = DummyKeycloakAPI() - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") - user2 = UserInfo("user-2-id", "Jane", "Doe", "jane.doe@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) + user2 = UserInfo( + id="user-2-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="jane.doe", + created_by="user-2-id", + kind=NamespaceKind.user, + underlying_resource_id="user-2-id", + ), + ) assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), ) kc_api.users = get_kc_users([user1, user2]) sync_config: SyncConfig @@ -372,13 +491,15 @@ async def test_events_update_error(get_app_configs, admin_user: APIUser) -> None db_users = await user_repo.get_users(admin_user) kc_users = [UserInfo.from_kc_user_payload(user) for user in sync_config.kc_api.get_users()] kc_users.append(admin_user_info) - assert set(kc_users) == {user1, user2, admin_user_info} + assert set(u.id for u in kc_users) == set(u.id for u in [user1, user2, admin_user_info]) assert len(db_users) == 1 # listing users add the requesting user if not present - assert db_users[0].user == admin_user_info + assert db_users[0].id == admin_user_info.id + assert db_users[0].first_name == admin_user_info.first_name + assert db_users[0].last_name == admin_user_info.last_name + assert db_users[0].email == admin_user_info.email await sync_config.syncer.users_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) # Add admin events user1_updated = UserInfo(**{**asdict(user1), "last_name": "Renku"}) user2_updated = UserInfo(**{**asdict(user2), "last_name": "Smith"}) @@ -391,51 +512,77 @@ async def test_events_update_error(get_app_configs, admin_user: APIUser) -> None with pytest.raises(ValueError): await sync_config.syncer.events_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] # An error occurs in processing an event or between events and none of the events are processed - assert {user1, user2, admin_user_info} == set(db_users) + assert set(u.id for u in [user1, user2, admin_user_info]) == set(u.id for u in db_users) # Add admin events without error kc_api.admin_events = get_kc_admin_events([(user1_updated, KeycloakAdminEvent.UPDATE)]) + get_kc_admin_events( [(user2_updated, KeycloakAdminEvent.UPDATE)] ) await sync_config.syncer.events_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert {user1_updated, user2_updated, admin_user_info} == set(db_users) + assert set(u.id for u in [user1_updated, user2_updated, admin_user_info]) == set(u.id for u in db_users) @pytest.mark.asyncio async def test_removing_non_existent_user(get_app_configs, admin_user: APIUser) -> None: kc_api = DummyKeycloakAPI() - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") - non_existent_user = UserInfo("non-existent-id", "Not", "Exist", "not.exist@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) + non_existent_user = UserInfo( + id="non-existent-id", + first_name="Not", + last_name="Exist", + email="not.exist@gmail.com", + namespace=Namespace( + id=ULID(), + slug="not.exist", + created_by="noone", + kind=NamespaceKind.user, + underlying_resource_id="non-existent-id", + ), + ) assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), ) kc_api.users = get_kc_users([user1, admin_user_info]) sync_config: SyncConfig user_repo: UserRepo sync_config, user_repo = get_app_configs(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] kc_users = [UserInfo.from_kc_user_payload(user) for user in sync_config.kc_api.get_users()] - assert set(kc_users) == {user1, admin_user_info} + assert set(u.id for u in kc_users) == set(u.id for u in [user1, admin_user_info]) assert len(db_users) == 1 await sync_config.syncer.users_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) # Add admin events kc_api.admin_events = get_kc_admin_events([(non_existent_user, KeycloakAdminEvent.DELETE)]) # Process events await sync_config.syncer.events_sync(kc_api) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(db_users) == {user1, admin_user_info} + assert set(u.id for u in db_users) == set(u.id for u in [user1, admin_user_info]) @pytest.mark.asyncio @@ -444,13 +591,35 @@ async def test_avoiding_namespace_slug_duplicates( ) -> None: kc_api = DummyKeycloakAPI() num_users = 10 - users = [UserInfo(f"user-{i}-id", "John", "Doe", "john.doe@gmail.com") for i in range(1, num_users + 1)] + users = [ + UserInfo( + id=f"user-{i}-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by=f"user-{i}-id", + kind=NamespaceKind.user, + underlying_resource_id=f"user-{i}-id", + ), + ) + for i in range(1, num_users + 1) + ] assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), ) kc_api.users = get_kc_users(users + [admin_user_info]) sync_config, _ = get_app_configs(kc_api) @@ -479,13 +648,32 @@ async def test_avoiding_namespace_slug_duplicates( @pytest.mark.asyncio async def test_authz_admin_sync(get_app_configs, admin_user: APIUser) -> None: kc_api = DummyKeycloakAPI() - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) assert admin_user.id admin_user_info = UserInfo( id=admin_user.id, first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), ) kc_api.users = get_kc_users([user1, admin_user_info]) kc_api.user_roles = {admin_user_info.id: ["renku-admin"]} @@ -498,8 +686,7 @@ async def test_authz_admin_sync(get_app_configs, admin_user: APIUser) -> None: await sync_config.syncer.users_sync(kc_api) await sync_admins_from_keycloak(kc_api, authz) db_users = await user_repo.get_users(admin_user) - db_users = [user.user for user in db_users] - assert set(kc_users) == set(db_users) + assert set(u.id for u in kc_users) == set(u.id for u in db_users) authz_admin_ids = await authz._get_admin_user_ids() assert set(authz_admin_ids) == {admin_user_info.id} # Make user1 admin @@ -530,22 +717,50 @@ async def get_user_namespace_ids_in_authz(authz: Authz) -> set[str]: @pytest.mark.asyncio async def test_bootstraping_user_namespaces(get_app_configs, admin_user: APIUser): kc_api = DummyKeycloakAPI() - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") - user2 = UserInfo("user-2-id", "Jane", "Doe", "jane.doe@gmail.com") + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) + user2 = UserInfo( + id="user-2-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="jane.doe", + created_by="user-2-id", + kind=NamespaceKind.user, + underlying_resource_id="user-2-id", + ), + ) assert admin_user.id kc_api.users = get_kc_users([user1, user2]) sync_config: SyncConfig sync_config, _ = get_app_configs(kc_api) authz = Authz(sync_config.authz_config) - db_user_namespace_ids: set[str] = set() + db_user_namespace_ids: set[ULID] = set() async with sync_config.session_maker() as session, session.begin(): for user in [user1, user2]: - user_orm = UserORM(user.id, first_name=user.first_name, last_name=user.last_name, email=user.email) + user_orm = UserORM( + user.id, + first_name=user.first_name, + last_name=user.last_name, + email=user.email, + namespace=NamespaceORM(user.id, user_id=user.id), + ) session.add(user_orm) await session.flush() - ns = NamespaceORM(user.id, user_id=user.id) - session.add(ns) - db_user_namespace_ids.add(ns.id) + db_user_namespace_ids.add(user_orm.namespace.id) authz_user_namespace_ids = await get_user_namespace_ids_in_authz(authz) assert len(authz_user_namespace_ids) == 0 await bootstrap_user_namespaces(sync_config) @@ -562,9 +777,40 @@ async def test_fixing_project_group_namespace_relations( first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), + ) + user1 = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), + ) + user2 = UserInfo( + id="user-2-id", + first_name="Jane", + last_name="Doe", + email="jane.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="jane.doe", + created_by="user-2-id", + kind=NamespaceKind.user, + underlying_resource_id="user-2-id", + ), ) - user1 = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") - user2 = UserInfo("user-2-id", "Jane", "Doe", "jane.doe@gmail.com") user1_api = APIUser(is_admin=False, id=user1.id, access_token="access_token") user2_api = APIUser(is_admin=False, id=user2.id, access_token="access_token") user_roles = {admin_user.id: get_kc_roles(["renku-admin"])} @@ -622,8 +868,27 @@ async def test_migrate_groups_make_all_public( first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), + ) + user = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), ) - user = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") user_api = APIUser(is_admin=False, id=user.id, access_token="access_token") anon_user_api = APIUser(is_admin=False) user_roles = {admin_user.id: get_kc_roles(["renku-admin"])} @@ -665,8 +930,27 @@ async def test_migrate_user_namespaces_make_all_public( first_name=admin_user.first_name, last_name=admin_user.last_name, email=admin_user.email, + namespace=Namespace( + id=ULID(), + slug="admin-user", + created_by=admin_user.id, + kind=NamespaceKind.user, + underlying_resource_id=admin_user.id, + ), + ) + user = UserInfo( + id="user-1-id", + first_name="John", + last_name="Doe", + email="john.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="john.doe", + created_by="user-1-id", + kind=NamespaceKind.user, + underlying_resource_id="user-1-id", + ), ) - user = UserInfo("user-1-id", "John", "Doe", "john.doe@gmail.com") anon_user_api = APIUser(is_admin=False) user_roles = {admin_user.id: get_kc_roles(["renku-admin"])} kc_api = DummyKeycloakAPI(users=get_kc_users([admin_user_info, user]), user_roles=user_roles) diff --git a/test/bases/renku_data_services/data_api/conftest.py b/test/bases/renku_data_services/data_api/conftest.py index 81900428d..5a79e35f4 100644 --- a/test/bases/renku_data_services/data_api/conftest.py +++ b/test/bases/renku_data_services/data_api/conftest.py @@ -7,6 +7,7 @@ from authzed.api.v1 import Relationship, RelationshipUpdate, SubjectReference, WriteRelationshipsRequest from sanic import Sanic from sanic_testing.testing import SanicASGITestClient +from ulid import ULID from components.renku_data_services.utils.middleware import validate_null_byte from renku_data_services.app_config.config import Config @@ -14,6 +15,7 @@ from renku_data_services.authz.authz import _AuthzConverter from renku_data_services.data_api.app import register_all_handlers from renku_data_services.migrations.core import run_migrations_for_app +from renku_data_services.namespace.models import Namespace, NamespaceKind from renku_data_services.secrets.config import Config as SecretsConfig from renku_data_services.secrets_storage_api.app import register_all_handlers as register_secrets_handlers from renku_data_services.storage.rclone import RCloneValidator @@ -24,22 +26,62 @@ @pytest.fixture def admin_user() -> UserInfo: - return UserInfo(id="admin", first_name="Admin", last_name="Doe", email="admin.doe@gmail.com") + return UserInfo( + id="admin", + first_name="Admin", + last_name="Doe", + email="admin.doe@gmail.com", + namespace=Namespace( + id=ULID(), slug="admin.doe", kind=NamespaceKind.user, underlying_resource_id="admin", created_by="admin" + ), + ) @pytest.fixture def regular_user() -> UserInfo: - return UserInfo(id="user", first_name="User", last_name="Doe", email="user.doe@gmail.com") + return UserInfo( + id="user", + first_name="User", + last_name="Doe", + email="user.doe@gmail.com", + namespace=Namespace( + id=ULID(), slug="user", kind=NamespaceKind.user, underlying_resource_id="user", created_by="user" + ), + ) @pytest.fixture def member_1_user() -> UserInfo: - return UserInfo(id="member-1", first_name="Member-1", last_name="Doe", email="member-1.doe@gmail.com") + return UserInfo( + id="member-1", + first_name="Member-1", + last_name="Doe", + email="member-1.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="member-1.doe", + kind=NamespaceKind.user, + underlying_resource_id="member-1", + created_by="member-1", + ), + ) @pytest.fixture def member_2_user() -> UserInfo: - return UserInfo(id="member-2", first_name="Member-2", last_name="Doe", email="member-2.doe@gmail.com") + return UserInfo( + id="member-2", + first_name="Member-2", + last_name="Doe", + email="member-2.doe@gmail.com", + namespace=Namespace( + id=ULID(), + slug="member-2.doe", + kind=NamespaceKind.user, + underlying_resource_id="member-2", + created_by="member-2", + ), + ) @pytest.fixture diff --git a/test/bases/renku_data_services/data_api/test_users.py b/test/bases/renku_data_services/data_api/test_users.py index 9f2f564ef..c2e200983 100644 --- a/test/bases/renku_data_services/data_api/test_users.py +++ b/test/bases/renku_data_services/data_api/test_users.py @@ -2,7 +2,9 @@ from uuid import uuid4 import pytest +from ulid import ULID +from renku_data_services.namespace.models import Namespace, NamespaceKind from renku_data_services.users.models import UserInfo @@ -13,6 +15,13 @@ async def test_get_all_users_as_admin(sanic_client, users) -> None: first_name="Admin", last_name="Adminson", email="admin@gmail.com", + namespace=Namespace( + id=ULID(), + slug="admin.adminson", + kind=NamespaceKind.user, + underlying_resource_id="admin-id", + created_by="admin-id", + ), ) admin_token = { "id": admin.id, @@ -29,28 +38,38 @@ async def test_get_all_users_as_admin(sanic_client, users) -> None: assert res.status_code == 200, res.text assert len(res.json) == len(users) retrieved_users = [ - UserInfo( - id=user["id"], - first_name=user.get("first_name"), - last_name=user.get("last_name"), - email=user.get("email"), + ( + user["id"], + user.get("first_name"), + user.get("last_name"), + user.get("email"), ) for user in res.json ] - assert set(retrieved_users) == set(users) + existing_users = [ + ( + user.id, + user.first_name, + user.last_name, + user.email, + ) + for user in users + ] + assert set(retrieved_users) == set(existing_users) for user in users: _, res = await sanic_client.get( f"/api/data/users/{user.id}", headers={"Authorization": f"bearer {json.dumps(admin_token)}"}, ) assert res.status_code == 200 - retrieved_user = UserInfo( + retrieved_user = dict( id=res.json["id"], first_name=res.json.get("first_name"), last_name=res.json.get("last_name"), email=res.json.get("email"), ) - assert user == retrieved_user + existing_user = dict(id=user.id, first_name=user.first_name, last_name=user.last_name, email=user.email) + assert existing_user == retrieved_user @pytest.mark.asyncio @@ -73,31 +92,32 @@ async def test_get_all_users_as_non_admin(sanic_client, users) -> None: @pytest.mark.asyncio async def test_get_logged_in_user(sanic_client, users) -> None: user = users[0] + user_dict = dict(id=user.id, first_name=user.first_name, last_name=user.last_name, email=user.email) access_token = {"id": user.id, "is_admin": False} _, res = await sanic_client.get( "/api/data/user", headers={"Authorization": f"bearer {json.dumps(access_token)}"}, ) assert res.status_code == 200 - retrieved_user = UserInfo( + retrieved_user = dict( id=res.json["id"], first_name=res.json.get("first_name"), last_name=res.json.get("last_name"), email=res.json.get("email"), ) - assert retrieved_user == user + assert retrieved_user == user_dict _, res = await sanic_client.get( f"/api/data/users/{user.id}", headers={"Authorization": f"bearer {json.dumps(access_token)}"}, ) assert res.status_code == 200 - retrieved_user = UserInfo( + retrieved_user = dict( id=res.json["id"], first_name=res.json.get("first_name"), last_name=res.json.get("last_name"), email=res.json.get("email"), ) - assert retrieved_user == user + assert retrieved_user == user_dict @pytest.mark.asyncio @@ -110,7 +130,10 @@ async def test_logged_in_users_can_get_other_users(sanic_client, users) -> None: headers={"Authorization": f"bearer {json.dumps(access_token)}"}, ) assert res.status_code == 200 - retrieved_other_user = UserInfo( + other_user = dict( + id=other_user.id, first_name=other_user.first_name, last_name=other_user.last_name, email=other_user.email + ) + retrieved_other_user = dict( id=res.json["id"], first_name=res.json.get("first_name"), last_name=res.json.get("last_name"), @@ -124,7 +147,10 @@ async def test_anonymous_users_can_get_other_users(sanic_client, users) -> None: other_user = users[1] _, res = await sanic_client.get(f"/api/data/users/{other_user.id}") assert res.status_code == 200 - retrieved_other_user = UserInfo( + other_user = dict( + id=other_user.id, first_name=other_user.first_name, last_name=other_user.last_name, email=other_user.email + ) + retrieved_other_user = dict( id=res.json["id"], first_name=res.json.get("first_name"), last_name=res.json.get("last_name"), @@ -135,29 +161,28 @@ async def test_anonymous_users_can_get_other_users(sanic_client, users) -> None: @pytest.mark.asyncio async def test_logged_in_user_check_adds_user_if_missing(sanic_client, users, admin_headers) -> None: - user = UserInfo( - id=str(uuid4()), + user_id = str(uuid4()) + user = dict( + id=user_id, first_name="Peter", last_name="Parker", email="peter@spiderman.com", ) - # The user is not really in the database - assert user not in users access_token = { - "id": user.id, + "id": user_id, "is_admin": False, - "first_name": user.first_name, - "last_name": user.last_name, - "email": user.email, - "name": f"{user.first_name} {user.last_name}", + "first_name": user["first_name"], + "last_name": user["last_name"], + "email": user["email"], + "name": f"{user["first_name"]} {user["last_name"]}", } # Just by hitting the users endpoint with valid credentials the user will be aded to the database _, res = await sanic_client.get( - f"/api/data/users/{user.id}", + f"/api/data/users/{user_id}", headers={"Authorization": f"bearer {json.dumps(access_token)}"}, ) assert res.status_code == 200 - user_response = UserInfo( + user_response = dict( id=res.json["id"], first_name=res.json.get("first_name"), last_name=res.json.get("last_name"), @@ -171,7 +196,7 @@ async def test_logged_in_user_check_adds_user_if_missing(sanic_client, users, ad ) assert res.status_code == 200 users_response = [ - UserInfo( + dict( id=iuser["id"], first_name=iuser.get("first_name"), last_name=iuser.get("last_name"),