Skip to content

Commit

Permalink
feat(service): accept commit SHA in read endpoints (#3608)
Browse files Browse the repository at this point in the history
  • Loading branch information
olevski authored Oct 18, 2023
1 parent 5df0ce1 commit 01a0798
Show file tree
Hide file tree
Showing 14 changed files with 152 additions and 37 deletions.
22 changes: 12 additions & 10 deletions renku/core/util/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast
from uuid import uuid4

import git

from renku.core import errors
from renku.infrastructure.repository import DiffChangeType

Expand Down Expand Up @@ -712,7 +714,7 @@ def clone_repository(
skip_smudge=True,
recursive=False,
depth=None,
progress=None,
progress: Optional[git.RemoteProgress] = None,
config: Optional[dict] = None,
raise_git_except=False,
checkout_revision=None,
Expand Down Expand Up @@ -746,10 +748,8 @@ def clone_repository(

path = Path(path) if path else Path(get_repository_name(url))

def handle_git_exception():
"""Handle git exceptions."""
if raise_git_except:
return
def error_from_progress(progress: Optional[git.RemoteProgress], url: str) -> errors.GitError:
"""Format a Git command error into a more user-friendly format."""

message = f"Cannot clone repo from {url}"

Expand All @@ -758,7 +758,7 @@ def handle_git_exception():
error = "".join([f"\n\t{line}" for line in lines if line.strip()])
message += f" - error message:\n {error}"

raise errors.GitError(message)
return errors.GitError(message)

def clean_directory(clean: bool):
if not clean or not path:
Expand Down Expand Up @@ -825,8 +825,9 @@ def clone(branch, depth):
repository = clone(branch=checkout_revision, depth=depth)
except errors.GitCommandError:
if not checkout_revision:
handle_git_exception()
raise
if raise_git_except:
raise
raise error_from_progress(progress, url)

# NOTE: Delete the partially-cloned repository
clean_directory(clean=True)
Expand All @@ -835,8 +836,9 @@ def clone(branch, depth):
try:
repository = clone(branch=None, depth=None)
except errors.GitCommandError:
handle_git_exception()
raise
if raise_git_except:
raise
raise error_from_progress(progress, url)

if checkout_revision is not None and not no_checkout:
try:
Expand Down
13 changes: 8 additions & 5 deletions renku/infrastructure/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from pathlib import Path
from typing import (
Any,
Callable,
Dict,
Generator,
List,
Expand Down Expand Up @@ -1001,20 +1000,24 @@ def clone_from(
branch: Optional[str] = None,
recursive: bool = False,
depth: Optional[int] = None,
progress: Optional[Callable] = None,
progress: Optional[git.RemoteProgress] = None,
no_checkout: bool = False,
env: Optional[dict] = None,
clone_options: Optional[List[str]] = None,
) -> "Repository":
"""Clone a remote repository and create an instance."""
"""Clone a remote repository and create an instance.
Since this is just a thin wrapper around GitPython note that the branch parameter
can work and accept either a branch name or a tag. But it will not work with a commit SHA.
"""
try:
repository = git.Repo.clone_from(
url=url,
to_path=path,
branch=branch,
branch=branch, # NOTE: Git python will accept tag or branch here but not SHA
recursive=recursive,
depth=depth,
progress=progress,
progress=progress, # type: ignore[arg-type]
no_checkout=no_checkout,
env=env,
multi_options=clone_options,
Expand Down
15 changes: 11 additions & 4 deletions renku/ui/service/cache/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@

from renku.ui.service.cache.base import BaseCache
from renku.ui.service.config import CACHE_PROJECTS_PATH
from renku.ui.service.utils import normalize_git_url

MAX_CONCURRENT_PROJECT_REQUESTS = 10
LOCK_TIMEOUT = 15
NO_BRANCH_FOLDER = "__default_branch__"
DETACHED_HEAD_FOLDER_PREFIX = "__detached_head_"


class Project(Model):
Expand All @@ -55,14 +55,21 @@ class Project(Model):
description = TextField()
owner = TextField()
initialized = BooleanField()
commit_sha = TextField()
branch = TextField()

@property
def abs_path(self) -> Path:
"""Full path of cached project."""
branch = self.branch
folder_name = self.branch
if not self.branch:
branch = NO_BRANCH_FOLDER
return CACHE_PROJECTS_PATH / self.user_id / self.owner / normalize_git_url(self.slug) / branch
if self.commit_sha:
# NOTE: Detached head state
folder_name = f"{DETACHED_HEAD_FOLDER_PREFIX}{self.commit_sha}"
else:
# NOTE: We are on the default branch (i.e. main)
folder_name = NO_BRANCH_FOLDER
return CACHE_PROJECTS_PATH / self.user_id / self.owner / self.slug / folder_name

def read_lock(self, timeout: Optional[float] = None):
"""Shared read lock on the project."""
Expand Down
13 changes: 11 additions & 2 deletions renku/ui/service/cache/serializers/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import uuid
from datetime import datetime

from marshmallow import fields, post_load
from marshmallow import ValidationError, fields, post_load, validates_schema

from renku.ui.service.cache.models.project import Project
from renku.ui.service.serializers.common import AccessSchema, CreationSchema, MandatoryUserSchema
Expand All @@ -39,11 +39,20 @@ class ProjectSchema(CreationSchema, AccessSchema, MandatoryUserSchema):
description = fields.String(load_default=None)
owner = fields.String(required=True)
initialized = fields.Boolean(dump_default=False)
commit_sha = fields.String(required=False, load_default=None, dump_default=None)
branch = fields.String(required=False, load_default=None, dump_default=None)

@post_load
def make_project(self, data, **options):
"""Construct project object."""
data["git_url"] = normalize_git_url(data["git_url"])
if data.get("git_url"):
data["git_url"] = normalize_git_url(data["git_url"])
data["name"] = normalize_git_url(data["name"])
data["slug"] = normalize_git_url(data["slug"])
return Project(**data)

@validates_schema
def ensure_only_commit_sha_or_branch(self, data, **kwargs):
"""Checks that only a commit SHA or branch is set and not both."""
if data.get("commit_sha") and data.get("branch"):
raise ValidationError("You cannot specify a commit SHA and a branch, only one or the other")
1 change: 1 addition & 0 deletions renku/ui/service/controllers/api/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def local(self):
self.request_data.get("branch"),
self.user,
self.clone_depth is not None,
self.request_data.get("commit_sha"),
)

self.context["project_id"] = project.project_id
Expand Down
31 changes: 26 additions & 5 deletions renku/ui/service/gateways/repository_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,13 @@ class LocalRepositoryCache(IRepositoryCache):
"""Cache for project repos stored on local disk."""

def get(
self, cache: ServiceCache, git_url: str, branch: Optional[str], user: User, shallow: bool = True
self,
cache: ServiceCache,
git_url: str,
branch: Optional[str],
user: User,
shallow: bool = True,
commit_sha: Optional[str] = None,
) -> Project:
"""Get a project from cache (clone if necessary)."""
if git_url is None:
Expand All @@ -58,12 +64,12 @@ def get(
)
except ValueError:
# project not found in DB
return self._clone_project(cache, git_url, branch, user, shallow)
return self._clone_project(cache, git_url, branch, user, shallow, commit_sha)

if not project.abs_path.exists():
# cache folder doesn't exist anymore
project.delete()
return self._clone_project(cache, git_url, branch, user, shallow)
return self._clone_project(cache, git_url, branch, user, shallow, commit_sha)

if not shallow and project.is_shallow:
self._unshallow_project(project, user)
Expand Down Expand Up @@ -100,7 +106,13 @@ def _update_project_access_date(self, project: Project):
project.save()

def _clone_project(
self, cache: ServiceCache, git_url: str, branch: Optional[str], user: User, shallow: bool = True
self,
cache: ServiceCache,
git_url: str,
branch: Optional[str],
user: User,
shallow: bool = True,
commit_sha: Optional[str] = None,
) -> Project:
"""Clone a project to cache."""
git_url = normalize_git_url(git_url)
Expand All @@ -124,6 +136,7 @@ def _clone_project(
"branch": branch,
"git_url": git_url,
"user_id": user.user_id,
"commit_sha": commit_sha,
}
project = cache.make_project(user, project_data, persist=False)

Expand All @@ -139,6 +152,7 @@ def _clone_project(
(Project.user_id == user.user_id)
& (Project.git_url == git_url)
& (Project.branch == branch)
& (Project.commit_sha == commit_sha)
& (Project.project_id != project.project_id)
)
except ValueError:
Expand Down Expand Up @@ -170,7 +184,7 @@ def _clone_project(
"user.email": user.email,
"pull.rebase": False,
},
checkout_revision=project.branch,
checkout_revision=commit_sha or project.branch,
)
).output
project.save()
Expand All @@ -186,6 +200,9 @@ def _clone_project(

def _unshallow_project(self, project: Project, user: User):
"""Turn a shallow clone into a full clone."""
if project.commit_sha is not None:
# NOTE: A project in a detached head state at a specific commit SHA does not make sense to be unshallowed
return
try:
with project.write_lock(), Repository(project.abs_path) as repository:
try:
Expand All @@ -208,6 +225,10 @@ def _maybe_update_cache(self, project: Project, user: User):
if project.fetch_age < PROJECT_FETCH_TIME:
return

if project.commit_sha is not None:
# NOTE: A project in a detached head state at a specific commit SHA cannot be updated
return

try:
with project.write_lock(), Repository(project.abs_path) as repository:
try:
Expand Down
6 changes: 6 additions & 0 deletions renku/ui/service/serializers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ def set_branch_from_ref(self, data, **_):
return data


class GitCommitSHA:
"""Schema for a commit SHA."""

commit_sha = fields.String(load_default=None, metadata={"description": "Git commit SHA."})


class AsyncSchema(Schema):
"""Schema for adding a commit at the end of the operation."""

Expand Down
10 changes: 8 additions & 2 deletions renku/ui/service/serializers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@

from marshmallow import Schema, fields

from renku.ui.service.serializers.common import AsyncSchema, MigrateSchema, RemoteRepositorySchema, RenkuSyncSchema
from renku.ui.service.serializers.common import (
AsyncSchema,
GitCommitSHA,
MigrateSchema,
RemoteRepositorySchema,
RenkuSyncSchema,
)
from renku.ui.service.serializers.rpc import JsonRPCResponse


class ConfigShowRequest(RemoteRepositorySchema):
class ConfigShowRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for config show."""


Expand Down
5 changes: 3 additions & 2 deletions renku/ui/service/serializers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from renku.domain_model.dataset import ImageObjectRequestJson
from renku.ui.service.serializers.common import (
AsyncSchema,
GitCommitSHA,
JobDetailsResponse,
MigrateSchema,
RemoteRepositorySchema,
Expand Down Expand Up @@ -120,7 +121,7 @@ class DatasetAddResponseRPC(JsonRPCResponse):
result = fields.Nested(DatasetAddResponse)


class DatasetListRequest(RemoteRepositorySchema):
class DatasetListRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for dataset list view."""


Expand All @@ -142,7 +143,7 @@ class DatasetListResponseRPC(JsonRPCResponse):
result = fields.Nested(DatasetListResponse)


class DatasetFilesListRequest(DatasetSlugSchema, RemoteRepositorySchema):
class DatasetFilesListRequest(DatasetSlugSchema, RemoteRepositorySchema, GitCommitSHA):
"""Request schema for dataset files list view."""


Expand Down
4 changes: 2 additions & 2 deletions renku/ui/service/serializers/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
"""Renku graph serializers."""
from marshmallow import Schema, fields, validate

from renku.ui.service.serializers.common import AsyncSchema, MigrateSchema, RemoteRepositorySchema
from renku.ui.service.serializers.common import AsyncSchema, GitCommitSHA, MigrateSchema, RemoteRepositorySchema
from renku.ui.service.serializers.rpc import JsonRPCResponse


class GraphExportRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema):
class GraphExportRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema, GitCommitSHA):
"""Request schema for dataset list view."""

callback_url = fields.URL()
Expand Down
3 changes: 2 additions & 1 deletion renku/ui/service/serializers/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from renku.domain_model.dataset import ImageObjectRequestJson
from renku.ui.service.serializers.common import (
AsyncSchema,
GitCommitSHA,
MigrateSchema,
RemoteRepositoryBaseSchema,
RemoteRepositorySchema,
Expand All @@ -29,7 +30,7 @@
from renku.ui.service.serializers.rpc import JsonRPCResponse


class ProjectShowRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema):
class ProjectShowRequest(AsyncSchema, RemoteRepositorySchema, MigrateSchema, GitCommitSHA):
"""Project show metadata request."""


Expand Down
8 changes: 4 additions & 4 deletions renku/ui/service/serializers/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
from renku.domain_model.dataset import DatasetCreatorsJson
from renku.infrastructure.persistent import Persistent
from renku.ui.cli.utils.plugins import get_supported_formats
from renku.ui.service.serializers.common import RemoteRepositorySchema
from renku.ui.service.serializers.common import GitCommitSHA, RemoteRepositorySchema
from renku.ui.service.serializers.rpc import JsonRPCResponse


class WorkflowPlansListRequest(RemoteRepositorySchema):
class WorkflowPlansListRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for plan list view."""


Expand Down Expand Up @@ -85,7 +85,7 @@ class WorkflowPlansListResponseRPC(JsonRPCResponse):
result = fields.Nested(WorkflowPlansListResponse)


class WorkflowPlansShowRequest(RemoteRepositorySchema):
class WorkflowPlansShowRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for plan show view."""

plan_id = fields.String(required=True)
Expand Down Expand Up @@ -222,7 +222,7 @@ class WorkflowPlansShowResponseRPC(JsonRPCResponse):
)


class WorkflowPlansExportRequest(RemoteRepositorySchema):
class WorkflowPlansExportRequest(RemoteRepositorySchema, GitCommitSHA):
"""Request schema for exporting a plan."""

plan_id = fields.String(required=True)
Expand Down
Loading

0 comments on commit 01a0798

Please sign in to comment.