From 63ed138f363ca6347339fd933972e016fc6e047b Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Wed, 13 Sep 2023 17:07:39 +0200 Subject: [PATCH] feat: add cloud storage commands --- .gitignore | 1 + docs/reference/commands/storage.rst | 2 +- renku/command/checks/storage.py | 2 +- renku/command/command_builder/command.py | 45 ++ renku/command/command_builder/gitlab.py | 54 ++ renku/command/command_builder/repo.py | 35 ++ renku/command/command_builder/storage.py | 46 ++ renku/command/format/storage.py | 65 +++ renku/command/lfs.py | 137 +++++ renku/command/move.py | 2 +- renku/command/remove.py | 2 +- renku/command/save.py | 2 +- renku/command/session.py | 2 +- renku/command/storage.py | 126 +--- renku/core/dataset/dataset.py | 2 +- renku/core/dataset/dataset_add.py | 2 +- renku/core/dataset/providers/git.py | 2 +- renku/core/dataset/providers/local.py | 2 +- renku/core/dataset/providers/renku.py | 2 +- renku/core/errors.py | 8 + renku/core/git.py | 2 +- renku/core/init.py | 3 +- .../interface}/git_api_provider.py | 5 +- renku/core/interface/storage.py | 1 - .../core/interface/storage_service_gateway.py | 57 ++ renku/core/lfs.py | 543 ++++++++++++++++++ renku/core/login.py | 9 +- renku/core/session/renkulab.py | 30 + renku/core/storage.py | 535 +---------------- renku/core/workflow/execute.py | 2 +- renku/core/workflow/plan_factory.py | 2 +- renku/core/workflow/run.py | 2 +- renku/domain_model/cloud_storage.py | 58 ++ .../gitlab_api_provider.py | 68 ++- .../infrastructure/storage/storage_service.py | 154 +++++ renku/ui/cli/__init__.py | 2 + renku/ui/cli/lfs.py | 175 ++++++ renku/ui/cli/storage.py | 114 ++-- .../controllers/cache_migrations_check.py | 5 +- renku/ui/service/views/cache.py | 4 +- renku/ui/service/views/v1/cache.py | 2 +- tests/cli/test_clone.py | 4 +- tests/cli/test_datasets.py | 2 +- tests/core/commands/test_cli.py | 10 +- tests/core/commands/test_doctor.py | 2 +- tests/core/management/test_storage.py | 2 +- tests/fixtures/common.py | 2 +- 47 files changed, 1551 insertions(+), 783 deletions(-) create mode 100644 renku/command/command_builder/gitlab.py create mode 100644 renku/command/command_builder/storage.py create mode 100644 renku/command/format/storage.py create mode 100644 renku/command/lfs.py rename renku/{ui/service/interfaces => core/interface}/git_api_provider.py (87%) create mode 100644 renku/core/interface/storage_service_gateway.py create mode 100644 renku/core/lfs.py create mode 100644 renku/domain_model/cloud_storage.py rename renku/{ui/service/gateways => infrastructure}/gitlab_api_provider.py (83%) create mode 100644 renku/infrastructure/storage/storage_service.py create mode 100644 renku/ui/cli/lfs.py diff --git a/.gitignore b/.gitignore index d383f25628..3781e15121 100644 --- a/.gitignore +++ b/.gitignore @@ -94,6 +94,7 @@ helm-chart/renku-core/charts renku/templates/ temp/ tmp/ +.ropeproject/ # pytest-recording cache cassettes diff --git a/docs/reference/commands/storage.rst b/docs/reference/commands/storage.rst index 58a0c32ba0..3dafa98d39 100644 --- a/docs/reference/commands/storage.rst +++ b/docs/reference/commands/storage.rst @@ -3,4 +3,4 @@ renku storage ************* -.. automodule:: renku.ui.cli.storage +.. automodule:: renku.ui.cli.lfs diff --git a/renku/command/checks/storage.py b/renku/command/checks/storage.py index 7deb79d548..6515d480c5 100644 --- a/renku/command/checks/storage.py +++ b/renku/command/checks/storage.py @@ -16,7 +16,7 @@ """Check for large files in Git history.""" from renku.command.util import WARNING -from renku.core.storage import check_external_storage, check_lfs_migrate_info +from renku.core.lfs import check_external_storage, check_lfs_migrate_info def check_lfs_info(**_): diff --git a/renku/command/command_builder/command.py b/renku/command/command_builder/command.py index 1923c58f6a..88477cf6ad 100644 --- a/renku/command/command_builder/command.py +++ b/renku/command/command_builder/command.py @@ -17,6 +17,7 @@ import contextlib import functools +import shutil import threading from collections import defaultdict from pathlib import Path @@ -455,6 +456,13 @@ def require_clean(self) -> "Command": return RequireClean(self) + @check_finalized + def require_login(self) -> "Command": + """Check that the user is logged in.""" + from renku.command.command_builder.repo import RequireLogin + + return RequireLogin(self) + @check_finalized def with_communicator(self, communicator: CommunicationCallback) -> "Command": """Create a communicator. @@ -479,6 +487,20 @@ def with_database(self, write: bool = False, path: Optional[str] = None, create: return DatabaseCommand(self, write, path, create) + @check_finalized + def with_gitlab_api(self) -> "Command": + """Inject gitlab api client.""" + from renku.command.command_builder.gitlab import GitlabApiCommand + + return GitlabApiCommand(self) + + @check_finalized + def with_storage_api(self) -> "Command": + """Inject storage api client.""" + from renku.command.command_builder.storage import StorageApiCommand + + return StorageApiCommand(self) + class CommandResult: """The result of a command. @@ -496,3 +518,26 @@ def __init__(self, output, error, status) -> None: self.output = output self.error = error self.status = status + + +class RequireExecutable(Command): + """Builder to check if an executable is installed.""" + + HOOK_ORDER = 4 + + def __init__(self, builder: Command, executable: str) -> None: + """__init__ of RequireExecutable.""" + self._builder = builder + self._executable = executable + + def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Check if an executable exists on the system. + + Args: + builder(Command): Current ``CommandBuilder``. + context(dict): Current context. + """ + if not shutil.which(self._executable): + raise errors.ExecutableNotFound( + f"Couldn't find the executable '{self._executable}' on this system. Please make sure it's installed" + ) diff --git a/renku/command/command_builder/gitlab.py b/renku/command/command_builder/gitlab.py new file mode 100644 index 0000000000..878ac14501 --- /dev/null +++ b/renku/command/command_builder/gitlab.py @@ -0,0 +1,54 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Command builder for gitlab api.""" + + +from renku.command.command_builder.command import Command, check_finalized +from renku.core.interface.git_api_provider import IGitAPIProvider +from renku.domain_model.project_context import project_context +from renku.infrastructure.gitlab_api_provider import GitlabAPIProvider + + +class GitlabApiCommand(Command): + """Builder to get a gitlab api client.""" + + PRE_ORDER = 4 + + def __init__(self, builder: Command) -> None: + self._builder = builder + + def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Create a gitlab api provider.""" + + if not project_context.has_context(): + raise ValueError("Gitlab API builder needs a ProjectContext to be set.") + + def _get_provider(): + from renku.core.login import read_renku_token + + token = read_renku_token(None, True) + if not token: + return None + return GitlabAPIProvider(token=token) + + context["constructor_bindings"][IGitAPIProvider] = _get_provider + + @check_finalized + def build(self) -> Command: + """Build the command.""" + self._builder.add_injection_pre_hook(self.PRE_ORDER, self._injection_pre_hook) + + return self._builder.build() diff --git a/renku/command/command_builder/repo.py b/renku/command/command_builder/repo.py index 3778ef5387..6bbc41c734 100644 --- a/renku/command/command_builder/repo.py +++ b/renku/command/command_builder/repo.py @@ -21,6 +21,7 @@ from renku.command.command_builder.command import Command, CommandResult, check_finalized from renku.core import errors from renku.core.git import ensure_clean +from renku.core.login import ensure_login from renku.domain_model.project_context import project_context @@ -42,6 +43,7 @@ def __init__( """__init__ of Commit. Args: + builder(Command): The current ``CommandBuilder``. message (str): The commit message. Auto-generated if left empty (Default value = None). commit_if_empty (bool): Whether to commit if there are no modified files (Default value = None). raise_if_empty (bool): Whether to raise an exception if there are no modified files (Default value = None). @@ -164,6 +166,39 @@ def build(self) -> Command: return self._builder.build() +class RequireLogin(Command): + """Builder to check if a user is logged in.""" + + HOOK_ORDER = 4 + + def __init__(self, builder: Command) -> None: + """__init__ of RequireLogin.""" + self._builder = builder + + def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Check if the user is logged in. + + Args: + builder(Command): Current ``CommandBuilder``. + context(dict): Current context. + """ + if not project_context.has_context(): + raise ValueError("RequireLogin builder needs a ProjectContext to be set.") + + ensure_login() + + @check_finalized + def build(self) -> Command: + """Build the command. + + Returns: + Command: Finalized version of this command. + """ + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) + + return self._builder.build() + + class Isolation(Command): """Builder to run a command in git isolation.""" diff --git a/renku/command/command_builder/storage.py b/renku/command/command_builder/storage.py new file mode 100644 index 0000000000..3cc1f808ac --- /dev/null +++ b/renku/command/command_builder/storage.py @@ -0,0 +1,46 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Command builder for storage api.""" + + +from renku.command.command_builder.command import Command, check_finalized +from renku.core.interface.storage_service_gateway import IStorageService +from renku.domain_model.project_context import project_context +from renku.infrastructure.storage.storage_service import StorageService + + +class StorageApiCommand(Command): + """Builder to get a storage api client.""" + + PRE_ORDER = 4 + + def __init__(self, builder: Command) -> None: + self._builder = builder + + def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Create a storage api provider.""" + + if not project_context.has_context(): + raise ValueError("storage api builder needs a ProjectContext to be set.") + + context["constructor_bindings"][IStorageService] = lambda: StorageService() + + @check_finalized + def build(self) -> Command: + """Build the command.""" + self._builder.add_injection_pre_hook(self.PRE_ORDER, self._injection_pre_hook) + + return self._builder.build() diff --git a/renku/command/format/storage.py b/renku/command/format/storage.py new file mode 100644 index 0000000000..66afe278b4 --- /dev/null +++ b/renku/command/format/storage.py @@ -0,0 +1,65 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Serializers for storage.""" + +import json +from typing import List, Optional + +from renku.command.format.tabulate import tabulate +from renku.domain_model.cloud_storage import CloudStorage + + +def tabular(cloud_storages: List[CloudStorage], *, columns: Optional[str] = None): + """Format cloud_storages with a tabular output.""" + if not columns: + columns = "id,start_time,status,provider,url" + + if any(s.ssh_enabled for s in cloud_storages): + columns += ",ssh" + + return tabulate(collection=cloud_storages, columns=columns, columns_mapping=cloud_storage_COLUMNS) + + +def log(cloud_storages: List[CloudStorage], *, columns: Optional[str] = None): + """Format cloud_storages in a log like output.""" + from renku.ui.cli.utils.terminal import style_header, style_key + + output = [] + + for cloud_storage in cloud_storages: + output.append(style_header(f"CloudStorage {cloud_storage.name}")) + output.append(style_key("Id: ") + cloud_storage.storage_id) + output.append(style_key("Source Path: ") + cloud_storage.source_path) + output.append(style_key("Target path: ") + cloud_storage.target_path) + output.append(style_key("Private: ") + "Yes" if cloud_storage.private else "No") + output.append(style_key("Configuration: \n") + json.dumps(cloud_storage.configuration, indent=4)) + output.append("") + return "\n".join(output) + + +CLOUD_STORAGE_FORMATS = {"tabular": tabular, "log": log} +"""Valid formatting options.""" + +CLOUD_STORAGE_COLUMNS = { + "id": ("id", "id"), + "status": ("status", "status"), + "url": ("url", "url"), + "ssh": ("ssh_enabled", "SSH enabled"), + "start_time": ("start_time", "start_time"), + "commit": ("commit", "commit"), + "branch": ("branch", "branch"), + "provider": ("provider", "provider"), +} diff --git a/renku/command/lfs.py b/renku/command/lfs.py new file mode 100644 index 0000000000..a8caca897d --- /dev/null +++ b/renku/command/lfs.py @@ -0,0 +1,137 @@ +# +# Copyright 2018-2023- Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku storage command.""" + +from typing import List + +from pydantic import validate_arguments + +from renku.command.command_builder.command import Command +from renku.core.lfs import ( + check_lfs_migrate_info, + check_requires_tracking, + clean_storage_cache, + migrate_files_to_lfs, + pull_paths_from_storage, +) +from renku.core.util import communication +from renku.domain_model.project_context import project_context + + +@validate_arguments(config=dict(arbitrary_types_allowed=True)) +def _check_lfs(everything: bool = False): + """Check if large files are not in lfs. + + Args: + everything: Whether to check whole history (Default value = False). + + Returns: + List of large files. + """ + files = check_lfs_migrate_info(everything) + + if files: + communication.warn("Git history contains large files\n\t" + "\n\t".join(files)) + + return files + + +def check_lfs_command(): + """Check lfs command.""" + return Command().command(_check_lfs) + + +@validate_arguments(config=dict(arbitrary_types_allowed=True)) +def _fix_lfs(paths: List[str]): + """Migrate large files into lfs. + + Args: + paths(List[str]): Paths to migrate to LFS. + """ + migrate_files_to_lfs(paths) + + +def fix_lfs_command(): + """Fix lfs command.""" + return ( + Command() + .command(_fix_lfs) + .require_clean() + .require_migration() + .with_database(write=True) + .with_commit(commit_if_empty=False) + ) + + +@validate_arguments(config=dict(arbitrary_types_allowed=True)) +def _pull(paths: List[str]): + """Pull the specified paths from external storage. + + Args: + paths(List[str]): Paths to pull from LFS. + """ + pull_paths_from_storage(project_context.repository, *paths) + + +def pull_command(): + """Command to pull the specified paths from external storage.""" + return Command().command(_pull) + + +@validate_arguments(config=dict(arbitrary_types_allowed=True)) +def _clean(paths: List[str]): + """Remove files from lfs cache/turn them back into pointer files. + + Args: + paths:List[str]: Paths to turn back to pointer files. + """ + untracked_paths, local_only_paths = clean_storage_cache(*paths) + + if untracked_paths: + communication.warn( + "These paths were ignored as they are not tracked" + + " in git LFS:\n\t{}\n".format("\n\t".join(untracked_paths)) + ) + + if local_only_paths: + communication.warn( + "These paths were ignored as they are not pushed to " + + "a remote with git LFS:\n\t{}\n".format("\n\t".join(local_only_paths)) + ) + + +def clean_command(): + """Command to remove files from lfs cache/turn them back into pointer files.""" + return Command().command(_clean) + + +@validate_arguments(config=dict(arbitrary_types_allowed=True)) +def _check_lfs_hook(paths: List[str]): + """Check if paths should be in LFS. + + Args: + paths(List[str]): Paths to check + + Returns: + List of files that should be in LFS. + """ + return check_requires_tracking(*paths) + + +def check_lfs_hook_command(): + """Command to pull the specified paths from external storage.""" + return Command().command(_check_lfs_hook) diff --git a/renku/command/move.py b/renku/command/move.py index ab0cea8585..898305269e 100644 --- a/renku/command/move.py +++ b/renku/command/move.py @@ -28,7 +28,7 @@ from renku.core.dataset.dataset import move_files from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.interface.dataset_gateway import IDatasetGateway -from renku.core.storage import track_paths_in_storage, untrack_paths_from_storage +from renku.core.lfs import track_paths_in_storage, untrack_paths_from_storage from renku.core.util import communication from renku.core.util.metadata import is_protected_path from renku.core.util.os import get_relative_path, is_subpath diff --git a/renku/command/remove.py b/renku/command/remove.py index 7bd0d22778..7ece1dbfe2 100644 --- a/renku/command/remove.py +++ b/renku/command/remove.py @@ -28,7 +28,7 @@ from renku.core import errors from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.interface.dataset_gateway import IDatasetGateway -from renku.core.storage import check_external_storage, untrack_paths_from_storage +from renku.core.lfs import check_external_storage, untrack_paths_from_storage from renku.core.util import communication from renku.core.util.git import get_git_user from renku.core.util.os import delete_dataset_file, expand_directories diff --git a/renku/command/save.py b/renku/command/save.py index 1db8d87c1f..4213e2fdda 100644 --- a/renku/command/save.py +++ b/renku/command/save.py @@ -21,7 +21,7 @@ from renku.command.command_builder.command import Command from renku.core import errors -from renku.core.storage import track_paths_in_storage +from renku.core.lfs import track_paths_in_storage from renku.domain_model.project_context import project_context diff --git a/renku/command/session.py b/renku/command/session.py index 824ad9be58..028269056b 100644 --- a/renku/command/session.py +++ b/renku/command/session.py @@ -44,7 +44,7 @@ def session_list_command(): def session_start_command(): """Start an interactive session.""" - return Command().command(session_start).with_database().require_migration() + return Command().command(session_start).with_database().require_migration().with_gitlab_api().with_storage_api() def session_stop_command(): diff --git a/renku/command/storage.py b/renku/command/storage.py index 3522ccf877..d74ae56e50 100644 --- a/renku/command/storage.py +++ b/renku/command/storage.py @@ -1,6 +1,5 @@ -# -# Copyright 2018-2023- Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and # Eidgenössische Technische Hochschule Zürich (ETHZ). # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,124 +13,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Renku storage command.""" - -from typing import List - -from pydantic import validate_arguments - from renku.command.command_builder.command import Command -from renku.core.storage import ( - check_lfs_migrate_info, - check_requires_tracking, - clean_storage_cache, - migrate_files_to_lfs, - pull_paths_from_storage, -) -from renku.core.util import communication -from renku.domain_model.project_context import project_context - - -@validate_arguments(config=dict(arbitrary_types_allowed=True)) -def _check_lfs(everything: bool = False): - """Check if large files are not in lfs. - - Args: - everything: Whether to check whole history (Default value = False). - - Returns: - List of large files. - """ - files = check_lfs_migrate_info(everything) - - if files: - communication.warn("Git history contains large files\n\t" + "\n\t".join(files)) - - return files - - -def check_lfs_command(): - """Check lfs command.""" - return Command().command(_check_lfs) - - -@validate_arguments(config=dict(arbitrary_types_allowed=True)) -def _fix_lfs(paths: List[str]): - """Migrate large files into lfs. - - Args: - paths(List[str]): Paths to migrate to LFS. - """ - migrate_files_to_lfs(paths) - - -def fix_lfs_command(): - """Fix lfs command.""" - return ( - Command() - .command(_fix_lfs) - .require_clean() - .require_migration() - .with_database(write=True) - .with_commit(commit_if_empty=False) - ) - - -@validate_arguments(config=dict(arbitrary_types_allowed=True)) -def _pull(paths: List[str]): - """Pull the specified paths from external storage. - - Args: - paths(List[str]): Paths to pull from LFS. - """ - pull_paths_from_storage(project_context.repository, *paths) - - -def pull_command(): - """Command to pull the specified paths from external storage.""" - return Command().command(_pull) - - -@validate_arguments(config=dict(arbitrary_types_allowed=True)) -def _clean(paths: List[str]): - """Remove files from lfs cache/turn them back into pointer files. - - Args: - paths:List[str]: Paths to turn back to pointer files. - """ - untracked_paths, local_only_paths = clean_storage_cache(*paths) - - if untracked_paths: - communication.warn( - "These paths were ignored as they are not tracked" - + " in git LFS:\n\t{}\n".format("\n\t".join(untracked_paths)) - ) - - if local_only_paths: - communication.warn( - "These paths were ignored as they are not pushed to " - + "a remote with git LFS:\n\t{}\n".format("\n\t".join(local_only_paths)) - ) - - -def clean_command(): - """Command to remove files from lfs cache/turn them back into pointer files.""" - return Command().command(_clean) - - -@validate_arguments(config=dict(arbitrary_types_allowed=True)) -def _check_lfs_hook(paths: List[str]): - """Check if paths should be in LFS. - - Args: - paths(List[str]): Paths to check - Returns: - List of files that should be in LFS. - """ - return check_requires_tracking(*paths) +def list_storage_command(): + """Command to list configured cloud storage.""" + from renku.core.storage import list_storage -def check_lfs_hook_command(): - """Command to pull the specified paths from external storage.""" - return Command().command(_check_lfs_hook) + return Command().command(list_storage).require_login().with_database() diff --git a/renku/core/dataset/dataset.py b/renku/core/dataset/dataset.py index c8456ee8e8..846f1fd4d6 100644 --- a/renku/core/dataset/dataset.py +++ b/renku/core/dataset/dataset.py @@ -38,7 +38,7 @@ from renku.core.dataset.request_model import ImageRequestModel from renku.core.dataset.tag import get_dataset_by_tag, prompt_access_token, prompt_tag_selection from renku.core.interface.dataset_gateway import IDatasetGateway -from renku.core.storage import check_external_storage, track_paths_in_storage +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.util import communication from renku.core.util.datetime8601 import local_now from renku.core.util.git import get_git_user diff --git a/renku/core/dataset/dataset_add.py b/renku/core/dataset/dataset_add.py index 34328aaa7a..a480a53ccc 100644 --- a/renku/core/dataset/dataset_add.py +++ b/renku/core/dataset/dataset_add.py @@ -33,7 +33,7 @@ from renku.core.dataset.providers.models import DatasetAddAction, DatasetAddMetadata from renku.core.interface.dataset_gateway import IDatasetGateway from renku.core.interface.storage import IStorage -from renku.core.storage import check_external_storage, track_paths_in_storage +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.util import communication, requests from renku.core.util.git import get_git_user from renku.core.util.os import get_absolute_path, get_file_size, get_files, get_relative_path, hash_file, is_subpath diff --git a/renku/core/dataset/providers/git.py b/renku/core/dataset/providers/git.py index 1ed53eecd8..6d5b39c667 100644 --- a/renku/core/dataset/providers/git.py +++ b/renku/core/dataset/providers/git.py @@ -25,7 +25,7 @@ from renku.core import errors from renku.core.dataset.pointer_file import create_external_file from renku.core.dataset.providers.api import AddProviderInterface, ProviderApi, ProviderPriority -from renku.core.storage import pull_paths_from_storage +from renku.core.lfs import pull_paths_from_storage from renku.core.util import communication from renku.core.util.git import clone_repository, get_cache_directory_for_repository from renku.core.util.metadata import is_linked_file diff --git a/renku/core/dataset/providers/local.py b/renku/core/dataset/providers/local.py index e537e77958..b0a767345c 100644 --- a/renku/core/dataset/providers/local.py +++ b/renku/core/dataset/providers/local.py @@ -30,7 +30,7 @@ ProviderApi, ProviderPriority, ) -from renku.core.storage import check_external_storage, track_paths_in_storage +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.util import communication from renku.core.util.metadata import is_protected_path from renku.core.util.os import get_absolute_path, get_safe_relative_path, is_path_empty, is_subpath diff --git a/renku/core/dataset/providers/renku.py b/renku/core/dataset/providers/renku.py index 76feb8549c..b7475e4aac 100644 --- a/renku/core/dataset/providers/renku.py +++ b/renku/core/dataset/providers/renku.py @@ -25,8 +25,8 @@ from renku.core import errors from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.dataset.providers.api import ImporterApi, ImportProviderInterface, ProviderApi, ProviderPriority +from renku.core.lfs import pull_paths_from_storage from renku.core.login import read_renku_token -from renku.core.storage import pull_paths_from_storage from renku.core.util import communication from renku.core.util.git import clone_renku_repository, get_cache_directory_for_repository, get_file_size from renku.core.util.metadata import is_external_file, make_project_temp_dir diff --git a/renku/core/errors.py b/renku/core/errors.py index 0da28254c1..ce44d7b031 100644 --- a/renku/core/errors.py +++ b/renku/core/errors.py @@ -59,6 +59,14 @@ class NotFound(RenkuException): """Raise when an object is not found in KG.""" +class NotLoggedIn(RenkuException): + """Raised when a user is not logged in to a Renku platform.""" + + +class ExecutableNotFound(RenkuException): + """Raised when an executable wasn't found on the system.""" + + class ParameterError(RenkuException): """Raise in case of invalid parameter.""" diff --git a/renku/core/git.py b/renku/core/git.py index 1debddeb08..d851d0f0d2 100644 --- a/renku/core/git.py +++ b/renku/core/git.py @@ -24,7 +24,7 @@ from typing import Any, Optional, Tuple, Type from renku.core import errors -from renku.core.storage import checkout_paths_from_storage +from renku.core.lfs import checkout_paths_from_storage from renku.core.util.contexts import Isolation from renku.core.util.git import get_dirty_paths from renku.core.util.os import get_absolute_path diff --git a/renku/core/init.py b/renku/core/init.py index 18f12ef560..0bdde458dd 100644 --- a/renku/core/init.py +++ b/renku/core/init.py @@ -31,8 +31,8 @@ from renku.core.git import with_worktree from renku.core.githooks import install_githooks from renku.core.interface.database_gateway import IDatabaseGateway +from renku.core.lfs import init_external_storage, storage_installed from renku.core.migration.utils import OLD_METADATA_PATH -from renku.core.storage import init_external_storage, storage_installed from renku.core.template.template import ( FileAction, RenderedTemplate, @@ -356,6 +356,7 @@ def create_from_template_local( description(Optional[str]): Project description (Default value = None). keywords(Optional[List[str]]): Project keywords (Default value = None). data_dir(Optional[str]): Project base data directory (Default value = None). + ssh_supported(bool): Whether the template supports ssh connections (Default value = None). """ metadata = metadata or {} default_metadata = default_metadata or {} diff --git a/renku/ui/service/interfaces/git_api_provider.py b/renku/core/interface/git_api_provider.py similarity index 87% rename from renku/ui/service/interfaces/git_api_provider.py rename to renku/core/interface/git_api_provider.py index bd8407d7aa..f5fa02e48e 100644 --- a/renku/ui/service/interfaces/git_api_provider.py +++ b/renku/core/interface/git_api_provider.py @@ -29,8 +29,11 @@ def download_files_from_api( folders: List[Union[Path, str]], target_folder: Union[Path, str], remote: str, - token: str, branch: Optional[str] = None, ): """Download files through a remote Git API.""" raise NotImplementedError() + + def get_project_id(self, gitlab_url: str, namespace: str, name: str) -> str: + """Get a gitlab project id from namespace/name.""" + raise NotImplementedError() diff --git a/renku/core/interface/storage.py b/renku/core/interface/storage.py index f77d05f131..da85123c8e 100644 --- a/renku/core/interface/storage.py +++ b/renku/core/interface/storage.py @@ -1,4 +1,3 @@ -# # Copyright 2017-2023 - Swiss Data Science Center (SDSC) # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and # Eidgenössische Technische Hochschule Zürich (ETHZ). diff --git a/renku/core/interface/storage_service_gateway.py b/renku/core/interface/storage_service_gateway.py new file mode 100644 index 0000000000..587bfeb9f9 --- /dev/null +++ b/renku/core/interface/storage_service_gateway.py @@ -0,0 +1,57 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Interface for a remote storage service.""" + +from typing import List, Protocol + +from renku.domain_model.cloud_storage import CloudStorage, CloudStorageWithSensitiveFields + + +class IStorageService(Protocol): + """Interface for a storage service.""" + + @property + def project_id(self) -> str: + """Get the current gitlab project id. + + Note: This is mostly a workaround since storage service is already done to only accept + project ids, but the CLI knows nothing about those. + This could should be removed once we move to proper renku projects. + """ + ... + + def list(self, project_id: str) -> List[CloudStorageWithSensitiveFields]: + """List storage configured for the current project.""" + ... + + def create(self, storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Create a new cloud storage.""" + ... + + def edit(self, storage_id: str, new_storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Edit a cloud storage.""" + ... + + def delete(self, storage_id: str) -> None: + """Delete a cloud storage.""" + ... + + def validate(self, storage: CloudStorage) -> None: + """Validate a cloud storage. + + Raises an exception for invalid storage. + """ + ... diff --git a/renku/core/lfs.py b/renku/core/lfs.py new file mode 100644 index 0000000000..c42df05e2f --- /dev/null +++ b/renku/core/lfs.py @@ -0,0 +1,543 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Logic for handling a data storage.""" + +import functools +import itertools +import os +import re +import shlex +import tempfile +from collections import defaultdict +from pathlib import Path +from shutil import move, which +from subprocess import PIPE, STDOUT, check_output, run +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union + +import pathspec + +from renku.core import errors +from renku.core.config import get_value +from renku.core.constant import RENKU_LFS_IGNORE_PATH, RENKU_PROTECTED_PATHS +from renku.core.util import communication +from renku.core.util.git import get_in_submodules, run_command +from renku.core.util.os import expand_directories, parse_file_size +from renku.domain_model.project_context import project_context + +if TYPE_CHECKING: + from renku.infrastructure.repository import Repository + + +_CMD_STORAGE_INSTALL = ["git", "lfs", "install", "--local"] + +_CMD_STORAGE_TRACK = ["git", "lfs", "track", "--"] + +_CMD_STORAGE_UNTRACK = ["git", "lfs", "untrack", "--"] + +_CMD_STORAGE_CLEAN = ["git", "lfs", "clean"] + +_CMD_STORAGE_CHECKOUT = ["git", "lfs", "checkout"] + +_CMD_STORAGE_PULL = ["git", "lfs", "pull", "-I"] + +_CMD_STORAGE_MIGRATE_IMPORT = ["git", "lfs", "migrate", "import"] + +_CMD_STORAGE_MIGRATE_INFO = ["git", "lfs", "migrate", "info", "--top", "42000"] + +_CMD_STORAGE_LIST = ["git", "lfs", "ls-files", "-n"] + +_CMD_STORAGE_STATUS = ["git", "lfs", "status"] + +_LFS_HEADER = "version https://git-lfs.github.com/spec/" + + +class RenkuGitWildMatchPattern(pathspec.patterns.GitWildMatchPattern): + """Custom GitWildMatchPattern matcher.""" + + __slots__ = ("pattern",) + + def __init__(self, pattern, include=None): + """Initialize RenkuRegexPattern.""" + super().__init__(pattern, include) + self.pattern = pattern + + +pathspec.util.register_pattern("renku_gitwildmatch", RenkuGitWildMatchPattern) + + +def check_external_storage_wrapper(fn): + """Check availability of external storage on methods that need it. + + Raises: + ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. + ``errors.ExternalStorageDisabled``: If external storage isn't enabled. + """ + + @functools.wraps(fn) + def wrapper(*args, **kwargs): + if not check_external_storage(): + pass + else: + return fn(*args, **kwargs) + + return wrapper + + +@functools.lru_cache +def storage_installed() -> bool: + """Verify that git-lfs is installed and on system PATH.""" + return bool(which("git-lfs")) + + +def storage_installed_locally() -> bool: + """Verify that git-lfs is installed for the project.""" + repo_config = project_context.repository.get_configuration(scope="local") + return repo_config.has_section('filter "lfs"') + + +def check_external_storage(): + """Check if repository has external storage enabled. + + Raises: + ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. + ``errors.ExternalStorageDisabled``: If external storage isn't enabled. + """ + installed_locally = storage_installed_locally() + is_storage_installed = installed_locally and storage_installed() + + if project_context.external_storage_requested and not is_storage_installed: + raise errors.ExternalStorageDisabled() + + if installed_locally and not storage_installed(): + raise errors.ExternalStorageNotInstalled() + + return is_storage_installed + + +def renku_lfs_ignore() -> pathspec.PathSpec: + """Gets pathspec for files to not add to LFS.""" + ignore_path = project_context.path / RENKU_LFS_IGNORE_PATH + + if not os.path.exists(ignore_path): + return pathspec.PathSpec.from_lines("renku_gitwildmatch", RENKU_PROTECTED_PATHS) + with ignore_path.open("r") as f: + # NOTE: Append `renku_protected_paths` at the end to give it the highest priority + lines = itertools.chain(f, RENKU_PROTECTED_PATHS) + return pathspec.PathSpec.from_lines("renku_gitwildmatch", lines) + + +def get_minimum_lfs_file_size() -> int: + """The minimum size of a file in bytes to be added to lfs.""" + size = get_value("renku", "lfs_threshold") + + return parse_file_size(size) + + +def init_external_storage(force: bool = False) -> None: + """Initialize the external storage for data.""" + try: + result = run( + _CMD_STORAGE_INSTALL + (["--force"] if force else []), + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + text=True, + ) + + if result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs install: \n {result.stdout}") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") + + +@check_external_storage_wrapper +def track_paths_in_storage(*paths: Union[Path, str]) -> Optional[List[str]]: + """Track paths in the external storage.""" + if not project_context.external_storage_requested or not check_external_storage(): + return None + + # Calculate which paths can be tracked in lfs + track_paths: List[str] = [] + attrs = project_context.repository.get_attributes(*paths) + + for path in paths: + path = Path(path) + + # Do not track symlinks in LFS + if path.is_symlink(): + continue + + # Do not add files with filter=lfs in .gitattributes + if attrs.get(str(path), {}).get("filter") == "lfs" or not (project_context.path / path).exists(): + continue + + relative_path = Path(path).relative_to(project_context.path) if path.is_absolute() else path + + if ( + path.is_dir() + and not renku_lfs_ignore().match_file(relative_path) + and not any(renku_lfs_ignore().match_tree(str(relative_path))) + ): + track_paths.append(str(path / "**")) + elif not renku_lfs_ignore().match_file(str(relative_path)): + file_size = os.path.getsize(str(os.path.relpath(project_context.path / path, os.getcwd()))) + if file_size >= get_minimum_lfs_file_size(): + track_paths.append(str(relative_path)) + + if track_paths: + try: + result = run_command( + _CMD_STORAGE_TRACK, + *track_paths, + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs track: \n {result.stdout}") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") + + show_message = get_value("renku", "show_lfs_message") + if track_paths and (show_message is None or show_message.lower() == "true"): + files_list = "\n\t".join(track_paths) + communication.info( + f"Adding these files to Git LFS:\n\t{files_list}" + "\nTo disable this message in the future, run:\n\trenku config set show_lfs_message false" + ) + + return track_paths + + +@check_external_storage_wrapper +def untrack_paths_from_storage(*paths: Union[Path, str]) -> None: + """Untrack paths from the external storage.""" + try: + result = run_command( + _CMD_STORAGE_UNTRACK, + *paths, + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs untrack: \n {result.stdout}") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") + + +@check_external_storage_wrapper +def list_tracked_paths() -> List[Path]: + """List paths tracked in lfs.""" + try: + files = check_output(_CMD_STORAGE_LIST, cwd=project_context.path, encoding="UTF-8") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs ls-files':\n{e}") + files_split: List[Path] = [project_context.path / f for f in files.splitlines()] + return files_split + + +@check_external_storage_wrapper +def list_unpushed_lfs_paths(repository: "Repository") -> List[Path]: + """List paths tracked in lfs for a repository.""" + + if len(repository.remotes) < 1 or (repository.active_branch and not repository.active_branch.remote_branch): + raise errors.GitConfigurationError( + f"No git remote is configured for {project_context.path} branch " + + f"{repository.active_branch.name}." # type: ignore + + "Cleaning the storage cache would lead to a loss of data as " + + "it is not on a server. Please see " + + "https://www.atlassian.com/git/tutorials/syncing for " + + "information on how to sync with a remote." + ) + try: + status = check_output(_CMD_STORAGE_STATUS, cwd=project_context.path, encoding="UTF-8") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs status':\n{e}") + + files = status.split("Objects to be committed:")[0].splitlines()[2:] + return [project_context.path / f.rsplit("(", 1)[0].strip() for f in files if f.strip()] + + +@check_external_storage_wrapper +def pull_paths_from_storage(repository: "Repository", *paths: Union[Path, str]): + """Pull paths from LFS.""" + project_dict = defaultdict(list) + + for path in expand_directories(paths): + sub_repository, _, path = get_in_submodules(repository, repository.head.commit, path) + try: + absolute_path = Path(path).resolve() + relative_path = absolute_path.relative_to(project_context.path) + except ValueError: # An external file + continue + + project_dict[sub_repository.path].append(shlex.quote(str(relative_path))) + + for project_path, file_paths in project_dict.items(): + result = run_command( + _CMD_STORAGE_PULL, + *file_paths, + separator=",", + cwd=project_path, + stdout=PIPE, + stderr=STDOUT, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Cannot pull LFS objects from server:\n {result.stdout}") + + +@check_external_storage_wrapper +def clean_storage_cache(*check_paths: Union[Path, str]) -> Tuple[List[str], List[str]]: + """Remove paths from lfs cache.""" + project_dict = defaultdict(list) + repositories: Dict[Path, "Repository"] = {} + tracked_paths: Dict[Path, List[Path]] = {} + unpushed_paths: Dict[Path, List[Path]] = {} + untracked_paths: List[str] = [] + local_only_paths: List[str] = [] + + repository = project_context.repository + + for path in expand_directories(check_paths): + current_repository, _, path = get_in_submodules(repository=repository, commit=repository.head.commit, path=path) + try: + absolute_path = Path(path).resolve() + relative_path = absolute_path.relative_to(project_context.path) + except ValueError: # An external file + continue + + if project_context.path not in tracked_paths: + tracked_paths[project_context.path] = list_tracked_paths() + + if project_context.path not in unpushed_paths: + u_paths = list_unpushed_lfs_paths(current_repository) + unpushed_paths[project_context.path] = u_paths + + if absolute_path in unpushed_paths[project_context.path]: + local_only_paths.append(str(relative_path)) + elif absolute_path not in tracked_paths[project_context.path]: + untracked_paths.append(str(relative_path)) + else: + project_dict[project_context.path].append(str(relative_path)) + repositories[project_context.path] = current_repository + + for project_path, paths in project_dict.items(): + current_repository = repositories[project_path] + + for path in paths: + with open(path) as tracked_file: + try: + header = tracked_file.read(len(_LFS_HEADER)) + if header == _LFS_HEADER: + # file is not pulled + continue + except UnicodeDecodeError: + # likely a binary file, not lfs pointer file + pass + with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8", delete=False) as tmp, open( + path, "r+t" + ) as input_file: + result = run(_CMD_STORAGE_CLEAN, cwd=project_path, stdin=input_file, stdout=tmp, text=True) + + if result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs clean: \n {result.stdout}") + + tmp_path = tmp.name + move(tmp_path, path) + + # get lfs sha hash + old_pointer = current_repository.get_raw_content(path=path, revision="HEAD") + old_pointer = old_pointer.splitlines()[1] + old_pointer = old_pointer.split(" ")[1].split(":")[1] + + prefix1 = old_pointer[:2] + prefix2 = old_pointer[2:4] + + # remove from lfs cache + object_path = project_context.path / ".git" / "lfs" / "objects" / prefix1 / prefix2 / old_pointer + object_path.unlink() + + # add paths so they don't show as modified + current_repository.add(*paths) + + return untracked_paths, local_only_paths + + +@check_external_storage_wrapper +def checkout_paths_from_storage(*paths: Union[Path, str]): + """Checkout a paths from LFS.""" + result = run_command( + _CMD_STORAGE_CHECKOUT, + *paths, + cwd=project_context.path, + stdout=PIPE, + stderr=STDOUT, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs checkout: \n {result.stdout}") + + +def check_requires_tracking(*paths: Union[Path, str]) -> Optional[List[str]]: + """Check paths and return a list of those that must be tracked.""" + + if not project_context.external_storage_requested: + return None + + attrs = project_context.repository.get_attributes(*paths) + track_paths: List[str] = [] + + for path in paths: + absolute_path = Path(os.path.abspath(project_context.path / path)) + path = str(path) + + # Do not track symlinks in LFS + if absolute_path.is_symlink(): + continue + + # Do not add files with filter=lfs in .gitattributes + if attrs.get(path, {}).get("filter") == "lfs": + continue + + if not absolute_path.is_dir(): + if renku_lfs_ignore().match_file(path): + continue + if os.path.getsize(absolute_path) < get_minimum_lfs_file_size(): + continue + + track_paths.append(path) + + return track_paths + + +def get_lfs_migrate_filters() -> Tuple[List[str], List[str]]: + """Gets include, exclude and above filters for lfs migrate.""" + + def add_migrate_pattern(pattern, collection): + if pattern in RENKU_PROTECTED_PATHS: + return + pattern = pattern.strip() + if pattern.endswith("*"): + return + pattern = pattern.rstrip("/") + collection.append(f"{pattern}/**") + + includes = [] + excludes = [] + for p in renku_lfs_ignore().patterns: + if p.regex is None: + continue + + pattern = p.pattern.replace(os.linesep, "").replace("\n", "") + if pattern.startswith("!"): + pattern = pattern.replace("!", "", 1) + + if p.include: # File ignored by LFS + excludes.append(pattern) + add_migrate_pattern(pattern, excludes) + else: + includes.append(pattern) + add_migrate_pattern(pattern, includes) + + if excludes: + excludes = ["--exclude", ",".join(excludes)] + if includes: + includes = ["--include", ",".join(includes)] + + return includes, excludes + + +def check_lfs_migrate_info(everything: bool = False, use_size_filter: bool = True) -> List[str]: + """Return list of file groups in history should be in LFS.""" + ref = ( + ["--everything"] + if everything or not project_context.repository.active_branch + else ["--include-ref", project_context.repository.active_branch.name] + ) + + includes, excludes = get_lfs_migrate_filters() + + ignore_pointers = ["--pointers", "ignore"] + + command = _CMD_STORAGE_MIGRATE_INFO + ref + includes + excludes + + # NOTE: ``lfs migrate info`` supports ``--above`` while ``lfs migrate import`` doesn't. + if use_size_filter: + above = ["--above", str(get_minimum_lfs_file_size())] + command += above + + try: + lfs_output = run( + command + ignore_pointers, + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + text=True, + ) + except (KeyboardInterrupt, OSError) as e: + raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") + + if lfs_output.returncode != 0: + # NOTE: try running without --pointers (old versions of git lfs) + try: + lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) + except (KeyboardInterrupt, OSError) as e: + raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") + + if lfs_output.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs migrate info: \n {lfs_output.stdout}") + + groups: List[str] = [] + files_re = re.compile(r"(.*\s+[\d.]+\s+\S+).*") + + for line in lfs_output.stdout.split("\n"): + match = files_re.match(line) + if match: + groups.append(match.groups()[0]) + + if groups and use_size_filter: + # NOTE: Since there are some large files, remove the size filter so that users get list of all files that + # will be moved to LFS. + return check_lfs_migrate_info(everything=everything, use_size_filter=False) + + return groups + + +def migrate_files_to_lfs(paths: List[str]): + """Migrate files to Git LFS.""" + if paths: + includes: List[str] = ["--include", ",".join(paths)] + excludes: List[str] = [] + else: + includes, excludes = get_lfs_migrate_filters() + + command = _CMD_STORAGE_MIGRATE_IMPORT + includes + excludes + + try: + lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) + except (KeyboardInterrupt, OSError) as e: + raise errors.GitError(f"Couldn't run 'git lfs migrate import':\n{e}") + + if lfs_output.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs migrate import: \n {lfs_output.stdout}") diff --git a/renku/core/login.py b/renku/core/login.py index 1004d23219..dc9ed4775d 100644 --- a/renku/core/login.py +++ b/renku/core/login.py @@ -195,7 +195,7 @@ def _set_renku_url_for_remote(repository: "Repository", remote_name: str, remote raise errors.GitError(f"Cannot change remote url for '{remote_name}' to '{new_remote_url}'") from e -def read_renku_token(endpoint: str, get_endpoint_from_remote=False) -> str: +def read_renku_token(endpoint: Optional[str], get_endpoint_from_remote=False) -> str: """Read renku token from renku config file. Args: @@ -287,3 +287,10 @@ def credentials(command: str, hostname: Optional[str]): communication.echo("username=renku") communication.echo(f"password={token}") + + +def ensure_login(): + """Ensure a user is logged in.""" + token = read_renku_token(None, True) + if not token: + raise errors.NotLoggedIn("You are not logged into to a Renku platform. Use 'renku login ' to log in.") diff --git a/renku/core/session/renkulab.py b/renku/core/session/renkulab.py index 4693c0d9ac..d967d41979 100644 --- a/renku/core/session/renkulab.py +++ b/renku/core/session/renkulab.py @@ -23,9 +23,11 @@ from time import monotonic, sleep from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from renku.command.command_builder.command import inject from renku.core import errors from renku.core.config import get_value from renku.core.constant import ProviderPriority +from renku.core.interface.storage_service_gateway import IStorageService from renku.core.login import read_renku_token from renku.core.plugin import hookimpl from renku.core.session.utils import get_renku_project_name, get_renku_url @@ -261,6 +263,33 @@ def find_image(self, image_name: str, config: Optional[Dict[str, Any]]) -> bool: == 200 ) + def get_cloudstorage(self): + """Get cloudstorage configured for the project.""" + storage_service: IStorageService = inject.instance(IStorageService) + storages = storage_service.list(storage_service.project_id) + + if not storages: + return [] + + storages_to_mount = [] + for storage, private_fields in storages: + if not communication.confirm(f"Do you want to mount storage '{storage.name}'({storage.storage_type})?"): + continue + if storage.private: + # check for credentials for user + private_field_names = [f["name"] for f in private_fields] + for name, value in storage.configuration.items(): + if name not in private_field_names: + continue + field = next(f for f in private_fields if f["name"] == name) + + secret = communication.prompt(f"{field['help']}\nPlease provide a value for secret '{name}':") + storage.configuration[name] = secret + + storages_to_mount.append({"storage_id": storage.storage_id, "configuration": storage.configuration}) + + return storages_to_mount + @hookimpl def session_provider(self) -> ISessionProvider: """Supported session provider. @@ -374,6 +403,7 @@ def session_start( "commit_sha": session_commit, "serverOptions": server_options, "branch": repository.active_branch.name if repository.active_branch else "master", + "cloudstorage": self.get_cloudstorage(), **self._get_renku_project_name_parts(), } res = self._send_renku_request( diff --git a/renku/core/storage.py b/renku/core/storage.py index c42df05e2f..f8c9fa39f6 100644 --- a/renku/core/storage.py +++ b/renku/core/storage.py @@ -13,531 +13,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Logic for handling a data storage.""" +"""Functionality for interacting with cloud storage.""" -import functools -import itertools -import os -import re -import shlex -import tempfile -from collections import defaultdict -from pathlib import Path -from shutil import move, which -from subprocess import PIPE, STDOUT, check_output, run -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union +from pydantic import validate_arguments -import pathspec +from renku.command.command_builder import inject +from renku.core.interface.storage_service_gateway import IStorageService -from renku.core import errors -from renku.core.config import get_value -from renku.core.constant import RENKU_LFS_IGNORE_PATH, RENKU_PROTECTED_PATHS -from renku.core.util import communication -from renku.core.util.git import get_in_submodules, run_command -from renku.core.util.os import expand_directories, parse_file_size -from renku.domain_model.project_context import project_context -if TYPE_CHECKING: - from renku.infrastructure.repository import Repository - - -_CMD_STORAGE_INSTALL = ["git", "lfs", "install", "--local"] - -_CMD_STORAGE_TRACK = ["git", "lfs", "track", "--"] - -_CMD_STORAGE_UNTRACK = ["git", "lfs", "untrack", "--"] - -_CMD_STORAGE_CLEAN = ["git", "lfs", "clean"] - -_CMD_STORAGE_CHECKOUT = ["git", "lfs", "checkout"] - -_CMD_STORAGE_PULL = ["git", "lfs", "pull", "-I"] - -_CMD_STORAGE_MIGRATE_IMPORT = ["git", "lfs", "migrate", "import"] - -_CMD_STORAGE_MIGRATE_INFO = ["git", "lfs", "migrate", "info", "--top", "42000"] - -_CMD_STORAGE_LIST = ["git", "lfs", "ls-files", "-n"] - -_CMD_STORAGE_STATUS = ["git", "lfs", "status"] - -_LFS_HEADER = "version https://git-lfs.github.com/spec/" - - -class RenkuGitWildMatchPattern(pathspec.patterns.GitWildMatchPattern): - """Custom GitWildMatchPattern matcher.""" - - __slots__ = ("pattern",) - - def __init__(self, pattern, include=None): - """Initialize RenkuRegexPattern.""" - super().__init__(pattern, include) - self.pattern = pattern - - -pathspec.util.register_pattern("renku_gitwildmatch", RenkuGitWildMatchPattern) - - -def check_external_storage_wrapper(fn): - """Check availability of external storage on methods that need it. - - Raises: - ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. - ``errors.ExternalStorageDisabled``: If external storage isn't enabled. - """ - - @functools.wraps(fn) - def wrapper(*args, **kwargs): - if not check_external_storage(): - pass - else: - return fn(*args, **kwargs) - - return wrapper - - -@functools.lru_cache -def storage_installed() -> bool: - """Verify that git-lfs is installed and on system PATH.""" - return bool(which("git-lfs")) - - -def storage_installed_locally() -> bool: - """Verify that git-lfs is installed for the project.""" - repo_config = project_context.repository.get_configuration(scope="local") - return repo_config.has_section('filter "lfs"') - - -def check_external_storage(): - """Check if repository has external storage enabled. - - Raises: - ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. - ``errors.ExternalStorageDisabled``: If external storage isn't enabled. - """ - installed_locally = storage_installed_locally() - is_storage_installed = installed_locally and storage_installed() - - if project_context.external_storage_requested and not is_storage_installed: - raise errors.ExternalStorageDisabled() - - if installed_locally and not storage_installed(): - raise errors.ExternalStorageNotInstalled() - - return is_storage_installed - - -def renku_lfs_ignore() -> pathspec.PathSpec: - """Gets pathspec for files to not add to LFS.""" - ignore_path = project_context.path / RENKU_LFS_IGNORE_PATH - - if not os.path.exists(ignore_path): - return pathspec.PathSpec.from_lines("renku_gitwildmatch", RENKU_PROTECTED_PATHS) - with ignore_path.open("r") as f: - # NOTE: Append `renku_protected_paths` at the end to give it the highest priority - lines = itertools.chain(f, RENKU_PROTECTED_PATHS) - return pathspec.PathSpec.from_lines("renku_gitwildmatch", lines) - - -def get_minimum_lfs_file_size() -> int: - """The minimum size of a file in bytes to be added to lfs.""" - size = get_value("renku", "lfs_threshold") - - return parse_file_size(size) - - -def init_external_storage(force: bool = False) -> None: - """Initialize the external storage for data.""" - try: - result = run( - _CMD_STORAGE_INSTALL + (["--force"] if force else []), - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - text=True, - ) - - if result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs install: \n {result.stdout}") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") - - -@check_external_storage_wrapper -def track_paths_in_storage(*paths: Union[Path, str]) -> Optional[List[str]]: - """Track paths in the external storage.""" - if not project_context.external_storage_requested or not check_external_storage(): - return None - - # Calculate which paths can be tracked in lfs - track_paths: List[str] = [] - attrs = project_context.repository.get_attributes(*paths) - - for path in paths: - path = Path(path) - - # Do not track symlinks in LFS - if path.is_symlink(): - continue - - # Do not add files with filter=lfs in .gitattributes - if attrs.get(str(path), {}).get("filter") == "lfs" or not (project_context.path / path).exists(): - continue - - relative_path = Path(path).relative_to(project_context.path) if path.is_absolute() else path - - if ( - path.is_dir() - and not renku_lfs_ignore().match_file(relative_path) - and not any(renku_lfs_ignore().match_tree(str(relative_path))) - ): - track_paths.append(str(path / "**")) - elif not renku_lfs_ignore().match_file(str(relative_path)): - file_size = os.path.getsize(str(os.path.relpath(project_context.path / path, os.getcwd()))) - if file_size >= get_minimum_lfs_file_size(): - track_paths.append(str(relative_path)) - - if track_paths: - try: - result = run_command( - _CMD_STORAGE_TRACK, - *track_paths, - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs track: \n {result.stdout}") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") - - show_message = get_value("renku", "show_lfs_message") - if track_paths and (show_message is None or show_message.lower() == "true"): - files_list = "\n\t".join(track_paths) - communication.info( - f"Adding these files to Git LFS:\n\t{files_list}" - "\nTo disable this message in the future, run:\n\trenku config set show_lfs_message false" - ) - - return track_paths - - -@check_external_storage_wrapper -def untrack_paths_from_storage(*paths: Union[Path, str]) -> None: - """Untrack paths from the external storage.""" - try: - result = run_command( - _CMD_STORAGE_UNTRACK, - *paths, - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs untrack: \n {result.stdout}") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") - - -@check_external_storage_wrapper -def list_tracked_paths() -> List[Path]: - """List paths tracked in lfs.""" - try: - files = check_output(_CMD_STORAGE_LIST, cwd=project_context.path, encoding="UTF-8") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs ls-files':\n{e}") - files_split: List[Path] = [project_context.path / f for f in files.splitlines()] - return files_split - - -@check_external_storage_wrapper -def list_unpushed_lfs_paths(repository: "Repository") -> List[Path]: - """List paths tracked in lfs for a repository.""" - - if len(repository.remotes) < 1 or (repository.active_branch and not repository.active_branch.remote_branch): - raise errors.GitConfigurationError( - f"No git remote is configured for {project_context.path} branch " - + f"{repository.active_branch.name}." # type: ignore - + "Cleaning the storage cache would lead to a loss of data as " - + "it is not on a server. Please see " - + "https://www.atlassian.com/git/tutorials/syncing for " - + "information on how to sync with a remote." - ) - try: - status = check_output(_CMD_STORAGE_STATUS, cwd=project_context.path, encoding="UTF-8") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs status':\n{e}") - - files = status.split("Objects to be committed:")[0].splitlines()[2:] - return [project_context.path / f.rsplit("(", 1)[0].strip() for f in files if f.strip()] - - -@check_external_storage_wrapper -def pull_paths_from_storage(repository: "Repository", *paths: Union[Path, str]): - """Pull paths from LFS.""" - project_dict = defaultdict(list) - - for path in expand_directories(paths): - sub_repository, _, path = get_in_submodules(repository, repository.head.commit, path) - try: - absolute_path = Path(path).resolve() - relative_path = absolute_path.relative_to(project_context.path) - except ValueError: # An external file - continue - - project_dict[sub_repository.path].append(shlex.quote(str(relative_path))) - - for project_path, file_paths in project_dict.items(): - result = run_command( - _CMD_STORAGE_PULL, - *file_paths, - separator=",", - cwd=project_path, - stdout=PIPE, - stderr=STDOUT, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Cannot pull LFS objects from server:\n {result.stdout}") - - -@check_external_storage_wrapper -def clean_storage_cache(*check_paths: Union[Path, str]) -> Tuple[List[str], List[str]]: - """Remove paths from lfs cache.""" - project_dict = defaultdict(list) - repositories: Dict[Path, "Repository"] = {} - tracked_paths: Dict[Path, List[Path]] = {} - unpushed_paths: Dict[Path, List[Path]] = {} - untracked_paths: List[str] = [] - local_only_paths: List[str] = [] - - repository = project_context.repository - - for path in expand_directories(check_paths): - current_repository, _, path = get_in_submodules(repository=repository, commit=repository.head.commit, path=path) - try: - absolute_path = Path(path).resolve() - relative_path = absolute_path.relative_to(project_context.path) - except ValueError: # An external file - continue - - if project_context.path not in tracked_paths: - tracked_paths[project_context.path] = list_tracked_paths() - - if project_context.path not in unpushed_paths: - u_paths = list_unpushed_lfs_paths(current_repository) - unpushed_paths[project_context.path] = u_paths - - if absolute_path in unpushed_paths[project_context.path]: - local_only_paths.append(str(relative_path)) - elif absolute_path not in tracked_paths[project_context.path]: - untracked_paths.append(str(relative_path)) - else: - project_dict[project_context.path].append(str(relative_path)) - repositories[project_context.path] = current_repository - - for project_path, paths in project_dict.items(): - current_repository = repositories[project_path] - - for path in paths: - with open(path) as tracked_file: - try: - header = tracked_file.read(len(_LFS_HEADER)) - if header == _LFS_HEADER: - # file is not pulled - continue - except UnicodeDecodeError: - # likely a binary file, not lfs pointer file - pass - with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8", delete=False) as tmp, open( - path, "r+t" - ) as input_file: - result = run(_CMD_STORAGE_CLEAN, cwd=project_path, stdin=input_file, stdout=tmp, text=True) - - if result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs clean: \n {result.stdout}") - - tmp_path = tmp.name - move(tmp_path, path) - - # get lfs sha hash - old_pointer = current_repository.get_raw_content(path=path, revision="HEAD") - old_pointer = old_pointer.splitlines()[1] - old_pointer = old_pointer.split(" ")[1].split(":")[1] - - prefix1 = old_pointer[:2] - prefix2 = old_pointer[2:4] - - # remove from lfs cache - object_path = project_context.path / ".git" / "lfs" / "objects" / prefix1 / prefix2 / old_pointer - object_path.unlink() - - # add paths so they don't show as modified - current_repository.add(*paths) - - return untracked_paths, local_only_paths - - -@check_external_storage_wrapper -def checkout_paths_from_storage(*paths: Union[Path, str]): - """Checkout a paths from LFS.""" - result = run_command( - _CMD_STORAGE_CHECKOUT, - *paths, - cwd=project_context.path, - stdout=PIPE, - stderr=STDOUT, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs checkout: \n {result.stdout}") - - -def check_requires_tracking(*paths: Union[Path, str]) -> Optional[List[str]]: - """Check paths and return a list of those that must be tracked.""" - - if not project_context.external_storage_requested: - return None - - attrs = project_context.repository.get_attributes(*paths) - track_paths: List[str] = [] - - for path in paths: - absolute_path = Path(os.path.abspath(project_context.path / path)) - path = str(path) - - # Do not track symlinks in LFS - if absolute_path.is_symlink(): - continue - - # Do not add files with filter=lfs in .gitattributes - if attrs.get(path, {}).get("filter") == "lfs": - continue - - if not absolute_path.is_dir(): - if renku_lfs_ignore().match_file(path): - continue - if os.path.getsize(absolute_path) < get_minimum_lfs_file_size(): - continue - - track_paths.append(path) - - return track_paths - - -def get_lfs_migrate_filters() -> Tuple[List[str], List[str]]: - """Gets include, exclude and above filters for lfs migrate.""" - - def add_migrate_pattern(pattern, collection): - if pattern in RENKU_PROTECTED_PATHS: - return - pattern = pattern.strip() - if pattern.endswith("*"): - return - pattern = pattern.rstrip("/") - collection.append(f"{pattern}/**") - - includes = [] - excludes = [] - for p in renku_lfs_ignore().patterns: - if p.regex is None: - continue - - pattern = p.pattern.replace(os.linesep, "").replace("\n", "") - if pattern.startswith("!"): - pattern = pattern.replace("!", "", 1) - - if p.include: # File ignored by LFS - excludes.append(pattern) - add_migrate_pattern(pattern, excludes) - else: - includes.append(pattern) - add_migrate_pattern(pattern, includes) - - if excludes: - excludes = ["--exclude", ",".join(excludes)] - if includes: - includes = ["--include", ",".join(includes)] - - return includes, excludes - - -def check_lfs_migrate_info(everything: bool = False, use_size_filter: bool = True) -> List[str]: - """Return list of file groups in history should be in LFS.""" - ref = ( - ["--everything"] - if everything or not project_context.repository.active_branch - else ["--include-ref", project_context.repository.active_branch.name] - ) - - includes, excludes = get_lfs_migrate_filters() - - ignore_pointers = ["--pointers", "ignore"] - - command = _CMD_STORAGE_MIGRATE_INFO + ref + includes + excludes - - # NOTE: ``lfs migrate info`` supports ``--above`` while ``lfs migrate import`` doesn't. - if use_size_filter: - above = ["--above", str(get_minimum_lfs_file_size())] - command += above - - try: - lfs_output = run( - command + ignore_pointers, - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - text=True, - ) - except (KeyboardInterrupt, OSError) as e: - raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") - - if lfs_output.returncode != 0: - # NOTE: try running without --pointers (old versions of git lfs) - try: - lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) - except (KeyboardInterrupt, OSError) as e: - raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") - - if lfs_output.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs migrate info: \n {lfs_output.stdout}") - - groups: List[str] = [] - files_re = re.compile(r"(.*\s+[\d.]+\s+\S+).*") - - for line in lfs_output.stdout.split("\n"): - match = files_re.match(line) - if match: - groups.append(match.groups()[0]) - - if groups and use_size_filter: - # NOTE: Since there are some large files, remove the size filter so that users get list of all files that - # will be moved to LFS. - return check_lfs_migrate_info(everything=everything, use_size_filter=False) - - return groups - - -def migrate_files_to_lfs(paths: List[str]): - """Migrate files to Git LFS.""" - if paths: - includes: List[str] = ["--include", ",".join(paths)] - excludes: List[str] = [] - else: - includes, excludes = get_lfs_migrate_filters() - - command = _CMD_STORAGE_MIGRATE_IMPORT + includes + excludes - - try: - lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) - except (KeyboardInterrupt, OSError) as e: - raise errors.GitError(f"Couldn't run 'git lfs migrate import':\n{e}") - - if lfs_output.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs migrate import: \n {lfs_output.stdout}") +@inject.autoparams() +@validate_arguments(config=dict(arbitrary_types_allowed=True)) +def list_storage(storage_service: IStorageService): + """List configured cloud storage for project.""" + project_id = storage_service.project_id + storages = storage_service.ls(project_id) + return storages diff --git a/renku/core/workflow/execute.py b/renku/core/workflow/execute.py index 517ce18e92..75c6094310 100644 --- a/renku/core/workflow/execute.py +++ b/renku/core/workflow/execute.py @@ -28,8 +28,8 @@ from renku.core import errors from renku.core.interface.activity_gateway import IActivityGateway from renku.core.interface.plan_gateway import IPlanGateway +from renku.core.lfs import check_external_storage, pull_paths_from_storage from renku.core.plugin.provider import execute -from renku.core.storage import check_external_storage, pull_paths_from_storage from renku.core.util import communication from renku.core.util.datetime8601 import local_now from renku.core.util.os import is_subpath, safe_read_yaml diff --git a/renku/core/workflow/plan_factory.py b/renku/core/workflow/plan_factory.py index 959a85d30d..acb0dec298 100644 --- a/renku/core/workflow/plan_factory.py +++ b/renku/core/workflow/plan_factory.py @@ -33,8 +33,8 @@ from renku.core import errors from renku.core.constant import RENKU_HOME, RENKU_TMP from renku.core.interface.project_gateway import IProjectGateway +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.plugin.pluginmanager import get_plugin_manager -from renku.core.storage import check_external_storage, track_paths_in_storage from renku.core.util.git import is_path_safe from renku.core.util.metadata import is_external_file from renku.core.util.os import get_absolute_path, get_relative_path, is_subpath diff --git a/renku/core/workflow/run.py b/renku/core/workflow/run.py index 1b806f7bd1..3b5a59c7fa 100644 --- a/renku/core/workflow/run.py +++ b/renku/core/workflow/run.py @@ -34,7 +34,7 @@ from renku.core.git import get_mapped_std_streams from renku.core.interface.activity_gateway import IActivityGateway from renku.core.interface.plan_gateway import IPlanGateway -from renku.core.storage import check_external_storage, pull_paths_from_storage +from renku.core.lfs import check_external_storage, pull_paths_from_storage from renku.core.util.datetime8601 import local_now from renku.core.util.git import get_git_user from renku.core.util.os import get_relative_path_to_cwd, get_relative_paths diff --git a/renku/domain_model/cloud_storage.py b/renku/domain_model/cloud_storage.py new file mode 100644 index 0000000000..5e397104d8 --- /dev/null +++ b/renku/domain_model/cloud_storage.py @@ -0,0 +1,58 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Domain model for cloud storage.""" +from dataclasses import dataclass +from typing import Any, Dict, List, NamedTuple, Optional + + +@dataclass +class CloudStorage: + """A cloud storage definition. + + Cloud storages are defined on the storage service to easily reuse storage configurations (RClone) in projects. + """ + + name: str + source_path: str + target_path: str + configuration: Dict[str, Any] + private: bool + storage_id: Optional[str] = None + project_id: Optional[str] = None + _storage_type: Optional[str] = None + + @property + def storage_type(self) -> str: + """The type of storage e.g. S3.""" + return self._storage_type or self.configuration["type"] + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "CloudStorage": + """Instantiate from a dict.""" + return CloudStorage( + storage_id=data["storage_id"], + name=data["name"], + source_path=data["source_path"], + target_path=data["target_path"], + private=data["private"], + configuration=data["configuration"], + project_id=data.get("project_id"), + ) + + +CloudStorageWithSensitiveFields = NamedTuple( + "CloudStorageWithSensitiveFields", [("storage", CloudStorage), ("private_fields", List[Dict[str, Any]])] +) diff --git a/renku/ui/service/gateways/gitlab_api_provider.py b/renku/infrastructure/gitlab_api_provider.py similarity index 83% rename from renku/ui/service/gateways/gitlab_api_provider.py rename to renku/infrastructure/gitlab_api_provider.py index eac4b6a511..0453c9c5c6 100644 --- a/renku/ui/service/gateways/gitlab_api_provider.py +++ b/renku/infrastructure/gitlab_api_provider.py @@ -23,9 +23,9 @@ import gitlab from renku.core import errors +from renku.core.interface.git_api_provider import IGitAPIProvider from renku.core.util.os import delete_dataset_file from renku.domain_model.git import GitURL -from renku.ui.service.interfaces.git_api_provider import IGitAPIProvider from renku.ui.service.logger import service_log @@ -43,13 +43,16 @@ class GitlabAPIProvider(IGitAPIProvider): errors.AuthenticationError: If the bearer token is invalid in any way. """ + def __init__(self, token: str): + """Init gitlab provider.""" + self.token = token + def download_files_from_api( self, files: List[Union[Path, str]], folders: List[Union[Path, str]], target_folder: Union[Path, str], remote: str, - token: str, branch: Optional[str] = None, ): """Download files through a remote Git API. @@ -59,7 +62,6 @@ def download_files_from_api( folders(List[Union[Path, str]]): Folders to download. target_folder(Union[Path, str]): Destination to save downloads to. remote(str): Git remote URL. - token(str): Gitlab API token. branch(Optional[str]): Git reference (Default value = None). """ if not branch: @@ -68,30 +70,7 @@ def download_files_from_api( target_folder = Path(target_folder) git_data = GitURL.parse(remote) - try: - gl = gitlab.Gitlab(git_data.instance_url, oauth_token=token) - project = gl.projects.get(f"{git_data.owner}/{git_data.name}") - except gitlab.GitlabAuthenticationError: - # NOTE: Invalid or expired tokens fail even on public projects. Let's give it a try without tokens - try: - gl = gitlab.Gitlab(git_data.instance_url) - project = gl.projects.get(f"{git_data.owner}/{git_data.name}") - except gitlab.GitlabAuthenticationError as e: - raise errors.AuthenticationError from e - except gitlab.GitlabGetError as e: - # NOTE: better to re-raise this as a core error since it's a common case - service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) - if "project not found" in getattr(e, "error_message", "").lower(): - raise errors.ProjectNotFound from e - else: - raise - except gitlab.GitlabGetError as e: - # NOTE: better to re-raise this as a core error since it's a common case - service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) - if "project not found" in getattr(e, "error_message", "").lower(): - raise errors.ProjectNotFound from e - else: - raise + project = self._get_project(git_data.instance_url, git_data.owner, git_data.name) for file in files: full_path = target_folder / file @@ -113,6 +92,41 @@ def download_files_from_api( with tarfile.open(fileobj=f) as archive: archive.extractall(path=target_folder, members=tar_members_without_top_folder(archive, 1)) + def get_project_id(self, gitlab_url: str, namespace: str, name: str) -> Optional[str]: + """Get a gitlab project id from namespace/name.""" + project = self._get_project(gitlab_url, namespace, name) + if not project: + return None + return project.id + + def _get_project(self, gitlab_url: str, namespace: str, name: str): + """Get a gitlab project.""" + try: + gl = gitlab.Gitlab(gitlab_url, oauth_token=self.token) + project = gl.projects.get(f"{namespace}/{name}") + except gitlab.GitlabAuthenticationError: + # NOTE: Invalid or expired tokens fail even on public projects. Let's give it a try without tokens + try: + gl = gitlab.Gitlab(gitlab_url) + project = gl.projects.get(f"{namespace}/{name}") + except gitlab.GitlabAuthenticationError as e: + raise errors.AuthenticationError from e + except gitlab.GitlabGetError as e: + # NOTE: better to re-raise this as a core error since it's a common case + service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) + if "project not found" in getattr(e, "error_message", "").lower(): + raise errors.ProjectNotFound from e + else: + raise + except gitlab.GitlabGetError as e: + # NOTE: better to re-raise this as a core error since it's a common case + service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) + if "project not found" in getattr(e, "error_message", "").lower(): + raise errors.ProjectNotFound from e + else: + raise + return project + def tar_members_without_top_folder(tar: tarfile.TarFile, strip: int) -> Generator[tarfile.TarInfo, None, None]: """Gets tar members, ignoring the top folder.""" diff --git a/renku/infrastructure/storage/storage_service.py b/renku/infrastructure/storage/storage_service.py new file mode 100644 index 0000000000..77932f3fc5 --- /dev/null +++ b/renku/infrastructure/storage/storage_service.py @@ -0,0 +1,154 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Gateway for storage service.""" +from dataclasses import asdict +from functools import cached_property +from typing import Any, Callable, Dict, List, Optional + +import requests + +from renku.command.command_builder.command import inject +from renku.core import errors +from renku.core.interface.git_api_provider import IGitAPIProvider +from renku.core.interface.storage_service_gateway import IStorageService +from renku.core.login import read_renku_token +from renku.core.session.utils import get_renku_url +from renku.domain_model.cloud_storage import CloudStorage, CloudStorageWithSensitiveFields +from renku.domain_model.project import Project +from renku.domain_model.project_context import project_context + +TIMEOUT = 5 + + +class StorageService(IStorageService): + """Storage service gateway.""" + + base_url: str + _gl: IGitAPIProvider = inject.attr(IGitAPIProvider) + + def __init__(self): + """Create an instance.""" + renku_url = get_renku_url() + if not renku_url: + raise errors.RenkulabSessionGetUrlError() + self.base_url = f"{renku_url}api/data" + + @cached_property + def project_id(self) -> str: + """Get the current gitlab project id. + + Note: This is mostly a workaround since storage service is already done to only accept + project ids, but the CLI knows nothing about those. + This could should be removed once we move to proper renku projects. + """ + namespace, name = Project.get_namespace_and_name( + remote=project_context.remote, name=project_context.project.name, repository=project_context.repository + ) + + if namespace is None or name is None: + raise errors.ParameterError("Couldn't get namespace or name for current project") + if namespace.startswith("repos/"): + namespace = namespace[6:] + gitlab_url = f"https://{project_context.remote.host}/repos/" + + return self._gl.get_project_id(gitlab_url, namespace, name) + + def _auth_headers(self) -> Dict[str, Any]: + """Send a request with authentication headers.""" + token = read_renku_token(None, get_endpoint_from_remote=True) + if not token: + raise errors.NotLoggedIn("Must be logged in to get access storage for a project.") + + return {"Authorization": f"Bearer {token}"} + + def _send_request( + self, + path: str, + parameters: Optional[Dict[str, Any]] = None, + body: Optional[Dict[str, Any]] = None, + method="GET", + auth=False, + expected_response=[200], + ): + """Send an unauthenticated request.""" + request_method: Callable[..., Any] + if method == "GET": + request_method = requests.get + elif method == "POST": + request_method = requests.post + elif method == "PUT": + request_method = requests.put + elif method == "DELETE": + request_method = requests.delete + else: + raise NotImplementedError() + + url = f"{self.base_url}{path}" + headers = None + + if auth: + headers = self._auth_headers() + + resp = request_method(url, headers=headers, params=parameters, data=body, timeout=TIMEOUT) # type: ignore + + if resp.status_code not in expected_response: + raise errors.RequestError(f"Request to storage service failed ({resp.status_code}): {resp.text}") + + return resp.json() + + def list(self, project_id: str) -> List[CloudStorageWithSensitiveFields]: + """List storage configured for the current project.""" + response = self._send_request("/storage", parameters={"project_id": project_id}, auth=True) + results = [] + for res in response: + results.append( + CloudStorageWithSensitiveFields(CloudStorage.from_dict(res["storage"]), res["sensitive_fields"]) + ) + + return results + + def create(self, storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Create a new cloud storage.""" + if storage.storage_id is not None: + raise ValueError("Cannot create storage with 'storage_id' already set.") + if storage.project_id is None: + raise ValueError("'project_id' must be set when creating CloudStorage.") + response = self._send_request( + "/storage", body=asdict(storage), method="POST", auth=True, expected_response=[201] + ) + return CloudStorageWithSensitiveFields( + CloudStorage.from_dict(response["storage"]), response["sensitive_fields"] + ) + + def edit(self, storage_id: str, new_storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Edit a cloud storage.""" + response = self._send_request(f"/storage/{storage_id}", body=asdict(new_storage), method="PUT", auth=True) + return CloudStorageWithSensitiveFields( + CloudStorage.from_dict(response["storage"]), response["sensitive_fields"] + ) + + def delete(self, storage_id: str) -> None: + """Delete a cloud storage.""" + self._send_request(f"/storage{storage_id}", method="DELETE", auth=True, expected_response=[204]) + + def validate(self, storage: CloudStorage) -> None: + """Validate a cloud storage. + + Raises an exception for invalid storage. + """ + self._send_request( + "/storage_schema/validate", body=storage.configuration, method="POST", expected_response=[204] + ) diff --git a/renku/ui/cli/__init__.py b/renku/ui/cli/__init__.py index 3a3ad44ab8..1e2557a463 100644 --- a/renku/ui/cli/__init__.py +++ b/renku/ui/cli/__init__.py @@ -103,6 +103,7 @@ from renku.ui.cli.githooks import githooks as githooks_command from renku.ui.cli.graph import graph from renku.ui.cli.init import init +from renku.ui.cli.lfs import lfs from renku.ui.cli.log import log from renku.ui.cli.login import credentials, login, logout from renku.ui.cli.mergetool import mergetool @@ -258,6 +259,7 @@ def help(ctx): cli.add_command(githooks_command) cli.add_command(graph) cli.add_command(init) +cli.add_command(lfs) cli.add_command(log) cli.add_command(login) cli.add_command(logout) diff --git a/renku/ui/cli/lfs.py b/renku/ui/cli/lfs.py new file mode 100644 index 0000000000..9070055c6f --- /dev/null +++ b/renku/ui/cli/lfs.py @@ -0,0 +1,175 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +r"""Manage an external storage. + +Commands and options +~~~~~~~~~~~~~~~~~~~~ + +.. rst-class:: cli-reference-commands + +.. click:: renku.ui.cli.lfs:lfs + :prog: renku lfs + :nested: full + +Pulling files from git LFS +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +LFS works by checking small pointer files into git and saving the actual +contents of a file in LFS. If instead of your file content, you see +something like this, it means the file is stored in git LFS and its +contents are not currently available locally (they are not pulled): + +.. code-block:: console + + version https://git-lfs.github.com/spec/v1 + oid sha256:42b5c7fb2acd54f6d3cd930f18fee3bdcb20598764ca93bdfb38d7989c054bcf + size 12 + +You can manually pull contents of file(s) you want with: + +.. code-block:: console + + $ renku lfs pull file1 file2 + +.. cheatsheet:: + :group: Misc + :command: $ renku lfs pull ... + :description: Pull 's from external storage (LFS). + :target: rp + +Removing local content of files stored in git LFS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you want to restore a file back to its pointer file state, for instance +to free up space locally, you can run: + +.. code-block:: console + + $ renku lfs clean file1 file2 + +This removes any data cached locally for files tracked in in git LFS. + +Migrate large files to git LFS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you accidentally checked a large file into git or are moving a non-LFS +renku repo to git LFS, you can use the following command to migrate the files +to LFS: + +.. code-block:: console + + $ renku lfs migrate --all + +This will move all files that are not excluded by `.renkulfsignore` into git +LFS. + +.. note:: + + Recent versions of Git LFS don't support filtering files based on their + size. Therefore, Renku ignores `lfs_threshold` config value when migrating + files to LFS using this command. + +To only migrate specific files, you can also pass their paths to the command +like: + +.. code-block:: console + + $ renku lfs migrate big_file other_big_file +""" +import os + +import click + +import renku.ui.cli.utils.color as color +from renku.command.util import WARNING +from renku.ui.cli.utils.callback import ClickCallback + + +@click.group() +def lfs(): + """Manage lfs.""" + + +@lfs.command() +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) +def pull(paths): + """Pull the specified paths from external storage.""" + from renku.command.lfs import pull_command + + pull_command().build().execute(paths=paths) + + +@lfs.command() +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) +def clean(paths): + """Remove files from lfs cache/turn them back into pointer files.""" + from renku.command.lfs import clean_command + + communicator = ClickCallback() + clean_command().with_communicator(communicator).build().execute(paths=paths) + + click.secho("OK", fg=color.GREEN) + + +@lfs.command("check-lfs-hook", hidden=True) +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) +def check_lfs_hook(paths): + """Check specified paths are tracked in external storage.""" + from renku.command.lfs import check_lfs_hook_command + + paths = check_lfs_hook_command().build().execute(paths=paths).output + if paths: + click.echo(os.linesep.join(paths)) + exit(1) + + +@lfs.command() +@click.option("--all", is_flag=True, help="Include all branches.") +def check(all): + """Check if large files are committed to Git history.""" + from renku.command.lfs import check_lfs_command + + files = check_lfs_command().build().execute(everything=all).output + if files: + message = WARNING + "Git history contains large files\n\t" + "\n\t".join(files) + click.echo(message) + exit(1) + else: + click.secho("OK", fg=color.GREEN) + + +@lfs.command() +@click.option("--all", "-a", "migrate_all", is_flag=True, default=False, help="Migrate all large files not in git LFS.") +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1) +def migrate(migrate_all, paths): + """Migrate large files committed to git by moving them to LFS.""" + from renku.command.lfs import check_lfs_command, fix_lfs_command + + if not paths: + if not migrate_all: + click.echo("Please specify paths to migrate or use the --all flag to migrate all large files.") + exit(1) + + lfs_paths = check_lfs_command().build().execute(everything=migrate_all).output + + if not lfs_paths: + click.echo("All files are already in LFS") + exit(0) + + if not click.confirm("The following files will be moved to Git LFS:\n\t" + "\n\t".join(lfs_paths)): + exit(0) + + fix_lfs_command().build().execute(paths) diff --git a/renku/ui/cli/storage.py b/renku/ui/cli/storage.py index ce5f8a14e2..6158335699 100644 --- a/renku/ui/cli/storage.py +++ b/renku/ui/cli/storage.py @@ -1,5 +1,4 @@ -# -# Copyright 2018-2023 - Swiss Data Science Center (SDSC) +# Copyright Swiss Data Science Center (SDSC) # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and # Eidgenössische Technische Hochschule Zürich (ETHZ). # @@ -14,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -r"""Manage an external storage. +r"""Manage an cloud storage. Commands and options ~~~~~~~~~~~~~~~~~~~~ @@ -25,99 +24,60 @@ :prog: renku storage :nested: full -Pulling files from git LFS -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -LFS works by checking small pointer files into git and saving the actual -contents of a file in LFS. If instead of your file content, you see -something like this, it means the file is stored in git LFS and its -contents are not currently available locally (they are not pulled): - -.. code-block:: console - - version https://git-lfs.github.com/spec/v1 - oid sha256:42b5c7fb2acd54f6d3cd930f18fee3bdcb20598764ca93bdfb38d7989c054bcf - size 12 - -You can manually pull contents of file(s) you want with: - -.. code-block:: console - - $ renku storage pull file1 file2 - -.. cheatsheet:: - :group: Misc - :command: $ renku storage pull ... - :description: Pull 's from external storage (LFS). - :target: rp - -Removing local content of files stored in git LFS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you want to restore a file back to its pointer file state, for instance -to free up space locally, you can run: - -.. code-block:: console - - $ renku storage clean file1 file2 - -This removes any data cached locally for files tracked in in git LFS. - -Migrate large files to git LFS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you accidentally checked a large file into git or are moving a non-LFS -renku repo to git LFS, you can use the following command to migrate the files -to LFS: - -.. code-block:: console - - $ renku storage migrate --all - -This will move all files that are not excluded by `.renkulfsignore` into git -LFS. - -.. note:: - - Recent versions of Git LFS don't support filtering files based on their - size. Therefore, Renku ignores `lfs_threshold` config value when migrating - files to LFS using this command. - -To only migrate specific files, you can also pass their paths to the command -like: - -.. code-block:: console - - $ renku storage migrate big_file other_big_file """ import os import click import renku.ui.cli.utils.color as color +from renku.command.format.storage import CLOUD_STORAGE_COLUMNS, CLOUD_STORAGE_FORMATS from renku.command.util import WARNING from renku.ui.cli.utils.callback import ClickCallback @click.group() def storage(): - """Manage an external storage.""" + """Manage storage.""" @storage.command() +@click.option( + "--columns", + type=click.STRING, + default=None, + metavar="", + help="Comma-separated list of column to display: {}.".format(", ".join(CLOUD_STORAGE_COLUMNS.keys())), + show_default=True, +) +@click.option( + "--format", type=click.Choice(list(CLOUD_STORAGE_FORMATS.keys())), default="log", help="Choose an output format." +) +def ls(columns, format): + """List configured cloud storage for a project.""" + from renku.command.storage import list_storage_command + + storages = list_storage_command().build().execute() + + click.echo(STORAGE_FORMATS[format](storages.output, columns=columns)) + + +# ============================================= +# Deprecated LFS commands below, see lfs.py +# ============================================= +@storage.command(hidden=True, deprecated=True) @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) def pull(paths): """Pull the specified paths from external storage.""" - from renku.command.storage import pull_command + from renku.command.lfs import pull_command pull_command().build().execute(paths=paths) -@storage.command() +@storage.command(hidden=True, deprecated=True) @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) def clean(paths): """Remove files from lfs cache/turn them back into pointer files.""" - from renku.command.storage import clean_command + from renku.command.lfs import clean_command communicator = ClickCallback() clean_command().with_communicator(communicator).build().execute(paths=paths) @@ -125,11 +85,11 @@ def clean(paths): click.secho("OK", fg=color.GREEN) -@storage.command("check-lfs-hook", hidden=True) +@storage.command("check-lfs-hook", hidden=True, deprecated=True) @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) def check_lfs_hook(paths): """Check specified paths are tracked in external storage.""" - from renku.command.storage import check_lfs_hook_command + from renku.command.lfs import check_lfs_hook_command paths = check_lfs_hook_command().build().execute(paths=paths).output if paths: @@ -137,11 +97,11 @@ def check_lfs_hook(paths): exit(1) -@storage.command() +@storage.command(hidden=True, deprecated=True) @click.option("--all", is_flag=True, help="Include all branches.") def check(all): """Check if large files are committed to Git history.""" - from renku.command.storage import check_lfs_command + from renku.command.lfs import check_lfs_command files = check_lfs_command().build().execute(everything=all).output if files: @@ -152,12 +112,12 @@ def check(all): click.secho("OK", fg=color.GREEN) -@storage.command() +@storage.command(hidden=True, deprecated=True) @click.option("--all", "-a", "migrate_all", is_flag=True, default=False, help="Migrate all large files not in git LFS.") @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1) def migrate(migrate_all, paths): """Migrate large files committed to git by moving them to LFS.""" - from renku.command.storage import check_lfs_command, fix_lfs_command + from renku.command.lfs import check_lfs_command, fix_lfs_command if not paths: if not migrate_all: diff --git a/renku/ui/service/controllers/cache_migrations_check.py b/renku/ui/service/controllers/cache_migrations_check.py index 7c5a666c59..050efadb45 100644 --- a/renku/ui/service/controllers/cache_migrations_check.py +++ b/renku/ui/service/controllers/cache_migrations_check.py @@ -19,6 +19,7 @@ import tempfile from dataclasses import asdict from pathlib import Path +from typing import Type from renku.command.migrate import MigrationCheckResult, migrations_check from renku.core.errors import AuthenticationError, MinimumVersionError, ProjectNotFound, RenkuException @@ -37,11 +38,11 @@ class MigrationsCheckCtrl(ServiceCtrl, RenkuOperationMixin): REQUEST_SERIALIZER = ProjectMigrationCheckRequest() RESPONSE_SERIALIZER = ProjectMigrationCheckResponseRPC() - def __init__(self, cache, user_data, request_data, git_api_provider: IGitAPIProvider): + def __init__(self, cache, user_data, request_data, git_api_provider: Type[IGitAPIProvider]): """Construct migration check controller.""" self.ctx = MigrationsCheckCtrl.REQUEST_SERIALIZER.load(request_data) - self.git_api_provider = git_api_provider super().__init__(cache, user_data, request_data) + self.git_api_provider = git_api_provider(token=self.user.token) @property def context(self): diff --git a/renku/ui/service/views/cache.py b/renku/ui/service/views/cache.py index 9803c25e4d..cb1333abcd 100644 --- a/renku/ui/service/views/cache.py +++ b/renku/ui/service/views/cache.py @@ -17,13 +17,13 @@ """Renku service cache views.""" from flask import jsonify, request +from renku.infrastructure.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.config import SERVICE_PREFIX from renku.ui.service.controllers.cache_files_delete_chunks import DeleteFileChunksCtrl from renku.ui.service.controllers.cache_files_upload import UploadFilesCtrl from renku.ui.service.controllers.cache_list_uploaded import ListUploadedFilesCtrl from renku.ui.service.controllers.cache_migrate_project import MigrateProjectCtrl from renku.ui.service.controllers.cache_migrations_check import MigrationsCheckCtrl -from renku.ui.service.gateways.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.gateways.repository_cache import LocalRepositoryCache from renku.ui.service.jobs.cleanup import cache_files_cleanup from renku.ui.service.views.api_versions import ALL_VERSIONS, V2_0, V2_1, VERSIONS_FROM_V1_1, VersionedBlueprint @@ -181,7 +181,7 @@ def migration_check_project_view(user_data, cache): tags: - cache """ - return MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider()).to_response() + return MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider).to_response() @cache_blueprint.route("/cache.cleanup", methods=["GET"], provide_automatic_options=False, versions=[V2_1]) diff --git a/renku/ui/service/views/v1/cache.py b/renku/ui/service/views/v1/cache.py index 78101db73a..b9de3fb9c3 100644 --- a/renku/ui/service/views/v1/cache.py +++ b/renku/ui/service/views/v1/cache.py @@ -20,9 +20,9 @@ from flask import request from renku.core.errors import AuthenticationError, ProjectNotFound +from renku.infrastructure.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.controllers.cache_migrate_project import MigrateProjectCtrl from renku.ui.service.controllers.cache_migrations_check import MigrationsCheckCtrl -from renku.ui.service.gateways.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.serializers.v1.cache import ProjectMigrateResponseRPC_1_0, ProjectMigrationCheckResponseRPC_1_5 from renku.ui.service.views import result_response from renku.ui.service.views.api_versions import V1_0, V1_1, V1_2, V1_3, V1_4, V1_5 diff --git a/tests/cli/test_clone.py b/tests/cli/test_clone.py index 8eb55d289a..906f191e74 100644 --- a/tests/cli/test_clone.py +++ b/tests/cli/test_clone.py @@ -32,7 +32,7 @@ @pytest.mark.parametrize("url", ["https://gitlab.dev.renku.ch/renku-testing/project-9"]) def test_clone(runner, monkeypatch, url): """Test cloning of a Renku repo and existence of required settings.""" - import renku.core.storage + import renku.core.lfs with runner.isolated_filesystem() as project_path: result = runner.invoke(cli, ["clone", url, project_path]) @@ -50,7 +50,7 @@ def test_clone(runner, monkeypatch, url): # Check Git LFS is enabled with monkeypatch.context() as monkey: # Pretend that git-lfs is not installed. - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) # Repo is using external storage but it's not installed. result = runner.invoke(cli, ["run", "touch", "output"]) diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py index b7aeb73ad3..56910cadd1 100644 --- a/tests/cli/test_datasets.py +++ b/tests/cli/test_datasets.py @@ -35,7 +35,7 @@ from renku.core.dataset.providers.factory import ProviderFactory from renku.core.dataset.providers.zenodo import ZenodoProvider from renku.core.interface.storage import FileHash -from renku.core.storage import track_paths_in_storage +from renku.core.lfs import track_paths_in_storage from renku.core.util.git import get_dirty_paths from renku.core.util.urls import get_slug from renku.domain_model.dataset import Dataset diff --git a/tests/core/commands/test_cli.py b/tests/core/commands/test_cli.py index 76f9d98afe..ec4450d3ac 100644 --- a/tests/core/commands/test_cli.py +++ b/tests/core/commands/test_cli.py @@ -24,7 +24,7 @@ import pytest -import renku.core.storage +import renku.core.lfs from renku import __version__ from renku.core.config import get_value, load_config, remove_value, set_value, store_config from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR @@ -248,7 +248,7 @@ def test_configuration_of_no_external_storage(isolated_runner, monkeypatch, proj assert 0 == result.exit_code, format_result_exception(result) # Pretend that git-lfs is not installed. with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) # Missing --no-external-storage flag. result = runner.invoke(cli, ["run", "touch", "output"]) assert "External storage is not configured" in result.output @@ -274,7 +274,7 @@ def test_configuration_of_external_storage(isolated_runner, monkeypatch, project assert 0 == result.exit_code, format_result_exception(result) # Pretend that git-lfs is not installed. with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) # Repo is using external storage but it's not installed. result = runner.invoke(cli, ["run", "touch", "output"]) assert 1 == result.exit_code @@ -304,7 +304,7 @@ def test_early_check_of_external_storage(isolated_runner, monkeypatch, directory # Pretend that git-lfs is not installed. with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) failing_command = ["dataset", "add", "--copy", "-s", "src", "my-dataset", str(directory_tree)] result = isolated_runner.invoke(cli, failing_command) @@ -363,7 +363,7 @@ def test_status_with_submodules(isolated_runner, monkeypatch, project_init): os.chdir("../foo") with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) result = runner.invoke(cli, ["dataset", "add", "--copy", "f", "../woop"], catch_exceptions=False) diff --git a/tests/core/commands/test_doctor.py b/tests/core/commands/test_doctor.py index 5d1d4dfbff..d8a472cf03 100644 --- a/tests/core/commands/test_doctor.py +++ b/tests/core/commands/test_doctor.py @@ -17,7 +17,7 @@ """Renku doctor tests.""" from renku.core.constant import RENKU_LFS_IGNORE_PATH -from renku.core.storage import get_minimum_lfs_file_size +from renku.core.lfs import get_minimum_lfs_file_size from renku.domain_model.dataset import DatasetFile, Url from renku.domain_model.project_context import project_context from renku.infrastructure.gateway.activity_gateway import ActivityGateway diff --git a/tests/core/management/test_storage.py b/tests/core/management/test_storage.py index 95b92db85a..8b017a1b35 100644 --- a/tests/core/management/test_storage.py +++ b/tests/core/management/test_storage.py @@ -20,7 +20,7 @@ import pytest -from renku.core.storage import get_lfs_migrate_filters, track_paths_in_storage +from renku.core.lfs import get_lfs_migrate_filters, track_paths_in_storage from renku.domain_model.project_context import project_context diff --git a/tests/fixtures/common.py b/tests/fixtures/common.py index e581b60ecf..be860de4a9 100644 --- a/tests/fixtures/common.py +++ b/tests/fixtures/common.py @@ -23,7 +23,7 @@ import pytest from renku.core.config import set_value -from renku.core.storage import get_minimum_lfs_file_size +from renku.core.lfs import get_minimum_lfs_file_size @pytest.fixture