Skip to content

Commit

Permalink
refactor: merge UserInfo and UserWithNamespace (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
Panaetius authored Sep 16, 2024
1 parent d69af01 commit 4c89c4b
Show file tree
Hide file tree
Showing 15 changed files with 626 additions and 260 deletions.
6 changes: 3 additions & 3 deletions components/renku_data_services/app_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
from renku_data_services.users.db import UserRepo as KcUserRepo
from renku_data_services.users.dummy_kc_api import DummyKeycloakAPI
from renku_data_services.users.kc_api import IKeycloakAPI, KeycloakAPI
from renku_data_services.users.models import UserInfo
from renku_data_services.users.models import UnsavedUserInfo
from renku_data_services.utils.core import merge_api_specs, oidc_discovery

default_resource_pool = models.ResourcePool(
Expand Down Expand Up @@ -436,8 +436,8 @@ def from_env(cls, prefix: str = "") -> "Config":
user_store = DummyUserStore(user_always_exists=user_always_exists)
gitlab_client = DummyGitlabAPI()
dummy_users = [
UserInfo("user1", "user1", "doe", "[email protected]"),
UserInfo("user2", "user2", "doe", "[email protected]"),
UnsavedUserInfo(id="user1", first_name="user1", last_name="doe", email="[email protected]"),
UnsavedUserInfo(id="user2", first_name="user2", last_name="doe", email="[email protected]"),
]
kc_api = DummyKeycloakAPI(users=[i._to_keycloak_dict() for i in dummy_users])
redis = RedisConfig.fake()
Expand Down
10 changes: 5 additions & 5 deletions components/renku_data_services/authz/authz.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from renku_data_services.errors import errors
from renku_data_services.namespace.models import Group, GroupUpdate, Namespace, NamespaceKind, NamespaceUpdate
from renku_data_services.project.models import Project, ProjectUpdate
from renku_data_services.users.models import UserWithNamespace, UserWithNamespaceUpdate
from renku_data_services.users.models import UserInfo, UserInfoUpdate

_P = ParamSpec("_P")

Expand All @@ -51,7 +51,7 @@ def authz(self) -> "Authz":

_AuthzChangeFuncResult = TypeVar(
"_AuthzChangeFuncResult",
bound=Project | ProjectUpdate | Group | UserWithNamespaceUpdate | list[UserWithNamespace] | None,
bound=Project | ProjectUpdate | Group | UserInfoUpdate | list[UserInfo] | None,
)
_T = TypeVar("_T")
_WithAuthz = TypeVar("_WithAuthz", bound=WithAuthz)
Expand Down Expand Up @@ -493,14 +493,14 @@ async def _get_authz_change(
case AuthzOperation.delete, ResourceType.group if result is None:
# NOTE: This means that the group does not exist in the first place so nothing was deleted
pass
case AuthzOperation.update_or_insert, ResourceType.user if isinstance(result, UserWithNamespaceUpdate):
case AuthzOperation.update_or_insert, ResourceType.user if isinstance(result, UserInfoUpdate):
if result.old is None:
authz_change = db_repo.authz._add_user_namespace(result.new.namespace)
case AuthzOperation.insert_many, ResourceType.user_namespace if isinstance(result, list):
for res in result:
if not isinstance(res, UserWithNamespace):
if not isinstance(res, UserInfo):
raise errors.ProgrammingError(
message="Expected list of UserWithNamespace when generating authorization "
message="Expected list of UserInfo when generating authorization "
f"database updates for inserting namespaces but found {type(res)}"
)
authz_change.extend(db_repo.authz._add_user_namespace(res.namespace))
Expand Down
22 changes: 20 additions & 2 deletions components/renku_data_services/base_models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import unicodedata
from dataclasses import dataclass, field
from enum import Enum, StrEnum
from typing import ClassVar, Optional, Protocol
from typing import ClassVar, Optional, Protocol, Self

from sanic import Request

Expand Down Expand Up @@ -122,7 +122,7 @@ def __init__(self, value: str) -> None:
object.__setattr__(self, "value", value.lower())

@classmethod
def from_name(cls, name: str) -> "Slug":
def from_name(cls, name: str) -> Self:
"""Takes a name with any amount of invalid characters and transforms it in a valid slug."""
lower_case = name.lower()
no_space = re.sub(r"\s+", "-", lower_case)
Expand All @@ -139,6 +139,24 @@ def from_name(cls, name: str) -> "Slug":
)
return cls(no_dot_git_or_dot_atom_at_end)

@classmethod
def from_user(cls, email: str | None, first_name: str | None, last_name: str | None, keycloak_id: str) -> Self:
"""Create a slug from a user."""
if email:
slug = email.split("@")[0]
elif first_name and last_name:
slug = first_name + "-" + last_name
elif last_name:
slug = last_name
elif first_name:
slug = first_name
else:
slug = "user_" + keycloak_id
# The length limit is 99 but leave some space for modifications that may be added down the line
# to filter out invalid characters or to generate a unique name
slug = slug[:80]
return cls.from_name(slug)

def __true_div__(self, other: "Slug") -> str:
"""Joins two slugs into a path fraction without dashes at the beginning or end."""
if type(self) is not type(other):
Expand Down
6 changes: 3 additions & 3 deletions components/renku_data_services/crc/blueprints.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from renku_data_services.crc.db import ResourcePoolRepository, UserRepository
from renku_data_services.k8s.quota import QuotaRepository
from renku_data_services.users.db import UserRepo as KcUserRepo
from renku_data_services.users.models import UserWithNamespace
from renku_data_services.users.models import UserInfo


@dataclass(kw_only=True)
Expand Down Expand Up @@ -183,10 +183,10 @@ async def _put_post(
self, api_user: base_models.APIUser, resource_pool_id: int, body: apispec.PoolUsersWithId, post: bool = True
) -> HTTPResponse:
user_ids_to_add = set([user.id for user in body.root])
users_checks: list[UserWithNamespace | None] = await asyncio.gather(
users_checks: list[UserInfo | None] = await asyncio.gather(
*[self.kc_user_repo.get_user(id=id) for id in user_ids_to_add]
)
existing_user_ids = set([user.user.id for user in users_checks if user is not None])
existing_user_ids = set([user.id for user in users_checks if user is not None])
if existing_user_ids != user_ids_to_add:
missing_ids = user_ids_to_add.difference(existing_user_ids)
raise errors.MissingResourceError(message=f"The users with IDs {missing_ids} cannot be found")
Expand Down
42 changes: 21 additions & 21 deletions components/renku_data_services/message_queue/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,38 +94,38 @@ def to_events(
class _UserEventConverter:
@staticmethod
def to_events(
user: user_models.UserInfo | user_models.UserWithNamespace | user_models.UserWithNamespaceUpdate,
user: user_models.UserInfo | user_models.UserInfoUpdate | str,
event_type: type[AvroModel] | type[events.AmbiguousEvent],
) -> list[Event]:
match event_type:
case v2.UserAdded | events.InsertUserNamespace:
user = cast(user_models.UserWithNamespace, user)
user = cast(user_models.UserInfo, user)
return [
_make_event(
"user.added",
v2.UserAdded(
id=user.user.id,
firstName=user.user.first_name,
lastName=user.user.last_name,
email=user.user.email,
id=user.id,
firstName=user.first_name,
lastName=user.last_name,
email=user.email,
namespace=user.namespace.slug,
),
)
]
case v2.UserRemoved:
user = cast(user_models.UserInfo, user)
return [_make_event("user.removed", v2.UserRemoved(id=user.id))]
user_id = cast(str, user)
return [_make_event("user.removed", v2.UserRemoved(id=user_id))]
case events.UpdateOrInsertUser:
user = cast(user_models.UserWithNamespaceUpdate, user)
user = cast(user_models.UserInfoUpdate, user)
if user.old is None:
return [
_make_event(
"user.added",
v2.UserAdded(
id=user.new.user.id,
firstName=user.new.user.first_name,
lastName=user.new.user.last_name,
email=user.new.user.email,
id=user.new.id,
firstName=user.new.first_name,
lastName=user.new.last_name,
email=user.new.email,
namespace=user.new.namespace.slug,
),
)
Expand All @@ -135,10 +135,10 @@ def to_events(
_make_event(
"user.updated",
v2.UserUpdated(
id=user.new.user.id,
firstName=user.new.user.first_name,
lastName=user.new.user.last_name,
email=user.new.user.email,
id=user.new.id,
firstName=user.new.first_name,
lastName=user.new.last_name,
email=user.new.email,
namespace=user.new.namespace.slug,
),
)
Expand Down Expand Up @@ -328,16 +328,16 @@ def to_events(input: _T, event_type: type[AvroModel] | type[events.AmbiguousEven
group_authz = cast(list[authz_models.MembershipChange], input)
return _GroupAuthzEventConverter.to_events(group_authz)
case v2.UserAdded:
user_with_namespace = cast(user_models.UserWithNamespace, input)
user_with_namespace = cast(user_models.UserInfo, input)
return _UserEventConverter.to_events(user_with_namespace, event_type)
case v2.UserRemoved:
user_info = cast(user_models.UserInfo, input)
user_info = cast(str, input)
return _UserEventConverter.to_events(user_info, event_type)
case events.UpdateOrInsertUser:
user_with_namespace_update = cast(user_models.UserWithNamespaceUpdate, input)
user_with_namespace_update = cast(user_models.UserInfoUpdate, input)
return _UserEventConverter.to_events(user_with_namespace_update, event_type)
case events.InsertUserNamespace:
user_namespaces = cast(list[user_models.UserWithNamespace], input)
user_namespaces = cast(list[user_models.UserInfo], input)
output: list[Event] = []
for namespace in user_namespaces:
output.extend(_UserEventConverter.to_events(namespace, event_type))
Expand Down
78 changes: 39 additions & 39 deletions components/renku_data_services/namespace/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,27 @@ def __init__(
@with_db_transaction
@Authz.authz_change(AuthzOperation.insert_many, ResourceType.user_namespace)
@dispatch_message(events.InsertUserNamespace)
async def generate_user_namespaces(
self, *, session: AsyncSession | None = None
) -> list[user_models.UserWithNamespace]:
async def generate_user_namespaces(self, *, session: AsyncSession | None = None) -> list[user_models.UserInfo]:
"""Generate user namespaces if the user table has data and the namespaces table is empty."""
if not session:
raise errors.ProgrammingError(message="A database session is required")
# NOTE: lock to make sure another instance of the data service cannot insert/update but can read
output: list[user_models.UserWithNamespace] = []
output: list[user_models.UserInfo] = []
await session.execute(text("LOCK TABLE common.namespaces IN EXCLUSIVE MODE"))
at_least_one_namespace = (await session.execute(select(schemas.NamespaceORM).limit(1))).one_or_none()
if at_least_one_namespace:
logger.info("Found at least one user namespace, skipping creation")
return output
return []
logger.info("Found zero user namespaces, will try to create them from users table")
res = await session.scalars(select(user_schemas.UserORM))
for user in res:
ns = await self._insert_user_namespace(session, user, retry_enumerate=10, retry_random=True)
slug = base_models.Slug.from_user(user.email, user.first_name, user.last_name, user.keycloak_id)
ns = await self._insert_user_namespace(
session, user.keycloak_id, slug.value, retry_enumerate=10, retry_random=True
)
logger.info(f"Creating user namespace {ns}")
output.append(user_models.UserWithNamespace(user.dump(), ns))
user.namespace = ns
output.append(user.dump())
logger.info(f"Created {len(output)} user namespaces")
return output

Expand Down Expand Up @@ -329,7 +331,7 @@ async def get_namespaces(
output.append(ns_orm.dump())
return output, group_count

async def _get_user_namespaces(self) -> AsyncGenerator[user_models.UserWithNamespace, None]:
async def _get_user_namespaces(self) -> AsyncGenerator[user_models.UserInfo, None]:
"""Lists all user namespaces without regard for authorization or permissions, used for migrations."""
async with self.session_maker() as session, session.begin():
namespaces = await session.stream_scalars(
Expand Down Expand Up @@ -379,39 +381,37 @@ async def get_user_namespace(self, user_id: str) -> models.Namespace | None:
raise errors.ProgrammingError(message="Found a namespace that has no user associated with it.")
return ns.dump()

async def _create_user_namespace_slug(
self, session: AsyncSession, user_slug: str, retry_enumerate: int = 0, retry_random: bool = False
) -> str:
"""Create a valid namespace slug for a user."""
nss = await session.scalars(
select(schemas.NamespaceORM.slug).where(schemas.NamespaceORM.slug.startswith(user_slug))
)
nslist = nss.all()
if user_slug not in nslist:
return user_slug
if retry_enumerate:
for inc in range(1, retry_enumerate + 1):
slug = f"{user_slug}-{inc}"
if slug not in nslist:
return slug
if retry_random:
suffix = "".join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)]) # nosec B311
slug = f"{user_slug}-{suffix}"
if slug not in nslist:
return slug

raise errors.ValidationError(message=f"Cannot create generate a unique namespace slug for the user {user_slug}")

async def _insert_user_namespace(
self, session: AsyncSession, user: schemas.UserORM, retry_enumerate: int = 0, retry_random: bool = False
) -> models.Namespace:
self, session: AsyncSession, user_id: str, user_slug: str, retry_enumerate: int = 0, retry_random: bool = False
) -> schemas.NamespaceORM:
"""Insert a new namespace for the user and optionally retry different variations to avoid collisions."""
original_slug = user.to_slug()
for inc in range(0, retry_enumerate + 1):
# NOTE: on iteration 0 we try with the optimal slug value derived from the user data without any suffix.
suffix = ""
if inc > 0:
suffix = f"-{inc}"
slug = base_models.Slug.from_name(original_slug.value.lower() + suffix)
ns = schemas.NamespaceORM(slug.value, user_id=user.keycloak_id)
try:
async with session.begin_nested():
session.add(ns)
await session.flush()
except IntegrityError:
if retry_enumerate == 0:
raise errors.ValidationError(message=f"The user namespace slug {slug.value} already exists")
continue
else:
await session.refresh(ns)
return ns.dump()
if not retry_random:
raise errors.ValidationError(
message=f"Cannot create generate a unique namespace slug for the user with ID {user.keycloak_id}"
)
# NOTE: At this point the attempts to generate unique ID have ended and the only option is
# to add a small random suffix to avoid uniqueness constraints problems
suffix = "-" + "".join([random.choice(string.ascii_lowercase + string.digits) for _ in range(8)]) # nosec: B311
slug = base_models.Slug.from_name(original_slug.value.lower() + suffix)
ns = schemas.NamespaceORM(slug.value, user_id=user.keycloak_id)
namespace = await self._create_user_namespace_slug(session, user_slug, retry_enumerate, retry_random)
slug = base_models.Slug.from_name(namespace)
ns = schemas.NamespaceORM(slug.value, user_id=user_id)
session.add(ns)
await session.flush()
await session.refresh(ns)
return ns.dump()
return ns
27 changes: 22 additions & 5 deletions components/renku_data_services/namespace/orm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""SQLAlchemy's schemas for the group database."""

from datetime import datetime
from typing import Optional
from typing import Optional, Self, cast

from sqlalchemy import CheckConstraint, DateTime, MetaData, String, func
from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, mapped_column, relationship
Expand All @@ -11,7 +11,7 @@
from renku_data_services.base_orm.registry import COMMON_ORM_REGISTRY
from renku_data_services.errors import errors
from renku_data_services.namespace import models
from renku_data_services.users.models import UserInfo, UserWithNamespace
from renku_data_services.users.models import UserInfo
from renku_data_services.users.orm import UserORM
from renku_data_services.utils.sqlalchemy import ULIDType

Expand Down Expand Up @@ -103,16 +103,33 @@ def dump(self) -> models.Namespace:
name=name,
)

def dump_user(self) -> UserWithNamespace:
def dump_user(self) -> UserInfo:
"""Create a user with namespace from the ORM."""
if self.user is None:
raise errors.ProgrammingError(
message="Cannot dump ORM namespace as namespace with user if the namespace "
"has no associated user with it."
)
ns = self.dump()
user_info = UserInfo(self.user.keycloak_id, self.user.first_name, self.user.last_name, self.user.email)
return UserWithNamespace(user_info, ns)
user_info = UserInfo(
id=self.user.keycloak_id,
first_name=self.user.first_name,
last_name=self.user.last_name,
email=self.user.email,
namespace=ns,
)
return user_info

@classmethod
def load(cls, ns: models.Namespace) -> Self:
"""Create an ORM object from the user object."""
match ns.kind:
case models.NamespaceKind.group:
return cls(slug=ns.slug, group_id=cast(ULID, ns.underlying_resource_id))
case models.NamespaceKind.user:
return cls(slug=ns.slug, user_id=cast(str, ns.underlying_resource_id))

raise errors.ValidationError(message=f"Unknown namespace kind {ns.kind}")


class NamespaceOldORM(BaseORM):
Expand Down
Loading

0 comments on commit 4c89c4b

Please sign in to comment.