Skip to content

Commit

Permalink
feat(cli, service): support for setting an image for projects
Browse files Browse the repository at this point in the history
  • Loading branch information
m-alisafaee committed Oct 1, 2023
1 parent 19142c6 commit a6e5a0b
Show file tree
Hide file tree
Showing 33 changed files with 431 additions and 128 deletions.
4 changes: 2 additions & 2 deletions renku/command/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
"""Project management."""

from renku.command.command_builder.command import Command
from renku.core.constant import DATABASE_METADATA_PATH
from renku.core.constant import PROJECT_METADATA_PATH
from renku.core.project import edit_project, show_project


def edit_project_command():
"""Command for editing project metadata."""
command = Command().command(edit_project).lock_project().with_database(write=True)
return command.require_migration().with_commit(commit_only=DATABASE_METADATA_PATH)
return command.require_migration().with_commit(commit_only=PROJECT_METADATA_PATH)


def show_project_command():
Expand Down
18 changes: 2 additions & 16 deletions renku/command/schema/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
from renku.command.schema.annotation import AnnotationSchema
from renku.command.schema.calamus import DateTimeList, JsonLDSchema, Nested, Uri, fields, oa, prov, renku, schema
from renku.command.schema.entity import CollectionSchema, EntitySchema
from renku.domain_model.dataset import Dataset, DatasetFile, DatasetTag, ImageObject, Language, RemoteEntity, Url
from renku.command.schema.image import ImageObjectSchema
from renku.domain_model.dataset import Dataset, DatasetFile, DatasetTag, Language, RemoteEntity, Url


def dump_dataset_as_jsonld(dataset: Dataset) -> dict:
Expand Down Expand Up @@ -104,21 +105,6 @@ class Meta:
name = fields.String(schema.name)


class ImageObjectSchema(JsonLDSchema):
"""ImageObject schema."""

class Meta:
"""Meta class."""

rdf_type = schema.ImageObject
model = ImageObject
unknown = EXCLUDE

content_url = fields.String(schema.contentUrl)
id = fields.Id(load_default=None)
position = fields.Integer(schema.position)


class RemoteEntitySchema(JsonLDSchema):
"""RemoteEntity schema."""

Expand Down
36 changes: 36 additions & 0 deletions renku/command/schema/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright Swiss Data Science Center (SDSC). A partnership between
# École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Image JSON-LD scheme."""

from marshmallow import EXCLUDE

from renku.command.schema.calamus import JsonLDSchema, fields, schema
from renku.domain_model.image import ImageObject


class ImageObjectSchema(JsonLDSchema):
"""ImageObject schema."""

class Meta:
"""Meta class."""

rdf_type = schema.ImageObject
model = ImageObject
unknown = EXCLUDE

content_url = fields.String(schema.contentUrl)
id = fields.Id(load_default=None)
position = fields.Integer(schema.position)
2 changes: 2 additions & 0 deletions renku/command/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from renku.command.schema.agent import PersonSchema
from renku.command.schema.annotation import AnnotationSchema
from renku.command.schema.calamus import DateTimeList, JsonLDSchema, Nested, StringList, fields, oa, prov, renku, schema
from renku.command.schema.image import ImageObjectSchema
from renku.domain_model.project import Project


Expand All @@ -39,6 +40,7 @@ class Meta:
date_created = DateTimeList(schema.dateCreated, load_default=None, format="iso", extra_formats=("%Y-%m-%d",))
description = fields.String(schema.description, load_default=None)
id = fields.Id(load_default=None)
image = fields.Nested(schema.image, ImageObjectSchema, load_default=None)
immutable_template_files = fields.List(
renku.immutableTemplateFiles,
fields.String(),
Expand Down
11 changes: 11 additions & 0 deletions renku/core/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from enum import IntEnum
from pathlib import Path

FILESYSTEM_ROOT = os.path.abspath(os.sep)
"""Path to the root of the filesystem."""

APP_NAME = "Renku"
"""Application name for storing configuration."""

Expand All @@ -41,6 +44,9 @@
DATASET_IMAGES = "dataset_images"
"""Directory for dataset images."""

IMAGES = "images"
"""Path for images/icons."""

DEFAULT_DATA_DIR = "data"

DOCKERFILE = "Dockerfile"
Expand Down Expand Up @@ -79,6 +85,11 @@
Path(RENKU_HOME) / DATABASE_PATH,
]

PROJECT_METADATA_PATH = [
Path(RENKU_HOME) / DATABASE_PATH,
Path(RENKU_HOME) / IMAGES,
]

DATASET_METADATA_PATHS = [
Path(RENKU_HOME) / DATABASE_PATH,
Path(RENKU_HOME) / DATASET_IMAGES,
Expand Down
9 changes: 7 additions & 2 deletions renku/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
from renku.core.dataset.providers.factory import ProviderFactory
from renku.core.dataset.providers.git import GitProvider
from renku.core.dataset.providers.models import DatasetUpdateAction, ProviderDataset
from renku.core.dataset.request_model import ImageRequestModel
from renku.core.dataset.tag import get_dataset_by_tag, prompt_access_token, prompt_tag_selection
from renku.core.image import ImageRequestModel
from renku.core.interface.dataset_gateway import IDatasetGateway
from renku.core.storage import check_external_storage, track_paths_in_storage
from renku.core.util import communication
Expand Down Expand Up @@ -875,7 +875,12 @@ def set_dataset_images(dataset: Dataset, images: Optional[List[ImageRequestModel
dataset.images = []
images_updated = False
for img in images:
img_object = img.to_image_object(dataset)
try:
img_object = img.to_image_object(
image_folder=project_context.dataset_images_path / dataset.initial_identifier, owner_id=dataset.id
)
except errors.ImageError as e:
raise errors.DatasetImageError(e) from e

if not img_object:
continue
Expand Down
6 changes: 5 additions & 1 deletion renku/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,11 @@ class RenkuSaveError(RenkuException):
"""Raised when renku save doesn't work."""


class DatasetImageError(DatasetException):
class ImageError(RenkuException):
"""Raised when an image for a project/dataset is not accessible."""


class DatasetImageError(DatasetException, ImageError):
"""Raised when a local dataset image is not accessible."""


Expand Down
70 changes: 59 additions & 11 deletions renku/core/dataset/request_model.py → renku/core/image.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Copyright Swiss Data Science Center (SDSC). A partnership between
# École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).

# Copyright Swiss Data Science Center (SDSC). A partnership between
# École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
Expand All @@ -13,7 +17,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Renku management dataset request models."""
"""Renku project/dataset image management."""

import imghdr
import os
Expand All @@ -24,31 +28,33 @@
from urllib.request import urlretrieve

from renku.core import errors
from renku.domain_model.dataset import Dataset, ImageObject
from renku.core.constant import FILESYSTEM_ROOT
from renku.core.util.os import is_subpath
from renku.core.util.urls import is_remote
from renku.domain_model.image import ImageObject
from renku.domain_model.project_context import project_context


class ImageRequestModel:
"""Model for passing image information to dataset use-cases."""
"""Model for passing image information."""

def __init__(
self,
content_url: str,
position: int,
mirror_locally: bool = False,
position: int = 0,
mirror_locally: bool = True,
safe_image_paths: Optional[List[str]] = None,
) -> None:
self.content_url = content_url
self.position = position
self.mirror_locally = mirror_locally
self.safe_image_paths: List[Union[str, Path]] = cast(List[Union[str, Path]], safe_image_paths) or []

def to_image_object(self, dataset: Dataset) -> ImageObject:
def to_image_object(self, image_folder: Path, owner_id: str) -> ImageObject:
"""Convert request model to ``ImageObject``."""
image_type = None
self.safe_image_paths.append(project_context.path)

image_folder = project_context.dataset_images_path / dataset.initial_identifier
image_folder.mkdir(exist_ok=True, parents=True)

if urllib.parse.urlparse(self.content_url).netloc:
Expand All @@ -57,14 +63,14 @@ def to_image_object(self, dataset: Dataset) -> ImageObject:
return ImageObject(
content_url=self.content_url,
position=self.position,
id=ImageObject.generate_id(dataset_id=dataset.id, position=self.position),
id=ImageObject.generate_id(owner_id=owner_id, position=self.position),
)

# NOTE: mirror the image locally
try:
path, _ = urlretrieve(self.content_url)
except urllib.error.URLError as e:
raise errors.DatasetImageError(f"Dataset image with url {self.content_url} couldn't be mirrored") from e
raise errors.ImageError(f"Image with url {self.content_url} couldn't be mirrored") from e

image_type = imghdr.what(path)
if image_type:
Expand All @@ -81,7 +87,9 @@ def to_image_object(self, dataset: Dataset) -> ImageObject:
os.path.commonprefix([path, p]) == str(p) for p in self.safe_image_paths
):
# NOTE: make sure files exists and prevent path traversal
raise errors.DatasetImageError(f"Dataset image with relative path {self.content_url} not found")
raise errors.ImageError(f"Image with relative path {self.content_url} not found")

# TODO: Delete the old image since it might have a different extension and won't get copied over

if not path.startswith(str(image_folder)):
# NOTE: only copy dataset image if it's not in .renku/datasets/<id>/images/ already
Expand All @@ -98,5 +106,45 @@ def to_image_object(self, dataset: Dataset) -> ImageObject:
return ImageObject(
content_url=str(img_path.relative_to(project_context.path)),
position=self.position,
id=ImageObject.generate_id(dataset_id=dataset.id, position=self.position),
id=ImageObject.generate_id(owner_id=owner_id, position=self.position),
)

def download_image(self, owner_id: str) -> ImageObject:
"""Download the image and save it to a temporary file."""

# def to_image_object(self, image_folder: Path, owner_id: str) -> ImageObject:
"""Convert request model to ``ImageObject``."""
self.safe_image_paths.append(project_context.path)

if is_remote(self.content_url):
if not self.mirror_locally:
return ImageObject(
content_url=self.content_url,
position=self.position,
id=ImageObject.generate_id(owner_id=owner_id, position=self.position),
)

# NOTE: Download the image
try:
tmp_path, _ = urlretrieve(self.content_url)
except urllib.error.URLError as e:
raise errors.ImageError(f"Cannot download image with url {self.content_url}: {e}") from e

path = Path(tmp_path)
self.safe_image_paths.append(Path(path).parent)
else:
path = Path(self.content_url).resolve()

if not os.path.exists(path):
raise errors.ImageError(f"Image with local path '{self.content_url}' not found")
# NOTE: Prevent path traversal or usage of non-image files
elif (FILESYSTEM_ROOT in self.safe_image_paths and imghdr.what(path) is None) or not any(
is_subpath(path, base=p) for p in self.safe_image_paths
):
raise errors.ImageError(f"'{self.content_url}' isn't a valid image file")

return ImageObject(
content_url=path.as_posix(),
position=self.position,
id=ImageObject.generate_id(owner_id=owner_id, position=self.position),
)
Loading

0 comments on commit a6e5a0b

Please sign in to comment.