Skip to content

Commit

Permalink
Build: use rclone for sync
Browse files Browse the repository at this point in the history
  • Loading branch information
stsewd committed Dec 23, 2022
1 parent c4a15c8 commit 1ccc43b
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 5 deletions.
3 changes: 2 additions & 1 deletion dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ RUN apt-get -y install \
netcat \
telnet \
lsb-release \
npm
npm \
rclone

# Gets the MinIO mc client used to add buckets upon initialization
# If this client should have issues running inside this image, it is also
Expand Down
10 changes: 10 additions & 0 deletions readthedocs/builds/storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from functools import cached_property

import structlog
from django.conf import settings
Expand All @@ -7,6 +8,7 @@
from storages.utils import get_available_overwrite_name, safe_join

from readthedocs.core.utils.filesystem import safe_open
from readthedocs.storage.rclone import RClone

log = structlog.get_logger(__name__)

Expand Down Expand Up @@ -153,6 +155,14 @@ def sync_directory(self, source, destination):
log.debug('Deleting file from media storage.', filepath=filepath)
self.delete(filepath)

@cached_property
def _rclone(self):
return RClone()

def rclone_sync(self, source, destination):
"""Sync a directory recursively to storage using rclone sync."""
return self._rclone.sync(source, destination)

def join(self, directory, filepath):
return safe_join(directory, filepath)

Expand Down
5 changes: 5 additions & 0 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,7 @@ def add_features(sender, **kwargs):
USE_SPHINX_BUILDERS = "use_sphinx_builders"
CANCEL_OLD_BUILDS = "cancel_old_builds"
DONT_CREATE_INDEX = "dont_create_index"
USE_RCLONE = "use_rclone"

FEATURES = (
(ALLOW_DEPRECATED_WEBHOOKS, _('Allow deprecated webhook views')),
Expand Down Expand Up @@ -1998,6 +1999,10 @@ def add_features(sender, **kwargs):
DONT_CREATE_INDEX,
_('Do not create index.md or README.rst if the project does not have one.'),
),
(
USE_RCLONE,
_("Use rclone for syncing files to the media storage."),
),
)

projects = models.ManyToManyField(
Expand Down
5 changes: 4 additions & 1 deletion readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,10 @@ def store_build_artifacts(
version_type=self.data.version.type,
)
try:
build_media_storage.sync_directory(from_path, to_path)
if self.data.project.has_feature(Feature.USE_RCLONE):
build_media_storage.rclone_sync(from_path, to_path)
else:
build_media_storage.sync_directory(from_path, to_path)
except Exception:
# Ideally this should just be an IOError
# but some storage backends unfortunately throw other errors
Expand Down
91 changes: 91 additions & 0 deletions readthedocs/storage/rclone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Wrapper around the rclone command."""

import os
import subprocess

import structlog

log = structlog.get_logger(__name__)


class RClone:

remote_type = "local"
rclone_bin = "rclone"
default_options = [
# Number of file transfers to run in parallel.
"--transfers=8",
"--verbose",
]
env_vars = {}

def build_target(self, path):
return f":{self.remote_type}:{path}"

def execute(self, action, args, options=None):
options = options or []
command = [
self.rclone_bin,
action,
*self.default_options,
*options,
"--",
*args,
]
env = os.environ.copy()
# env = {}
env.update(self.env_vars)
log.info("Executing rclone command.", command=command)
log.debug("env", env=env)
result = subprocess.run(
command,
capture_output=True,
env=env,
)
log.debug(
"Result.",
stdout=result.stdout.decode(),
stderr=result.stderr.decode(),
exit_code=result.returncode,
)
return result

def sync(self, source, destination):
# TODO: check if source can be a symlink.
return self.execute("sync", args=[source, self.build_target(destination)])


class RCloneS3Remote(RClone):

remote_type = "s3"

def __init__(
self,
bucket_name,
access_key_id,
secret_acces_key,
region,
provider="AWS",
acl=None,
endpoint=None,
):
super().__init__()
# rclone S3 options passed as env vars.
# https://rclone.org/s3/#standard-options.
region = region or ""
self.env_vars = {
"RCLONE_S3_PROVIDER": provider,
"RCLONE_S3_ACCESS_KEY_ID": access_key_id,
"RCLONE_S3_SECRET_ACCESS_KEY": secret_acces_key,
"RCLONE_S3_REGION": region,
"RCLONE_S3_LOCATION_CONSTRAINT": region,
}
if acl:
self.env_vars["RCLONE_S3_ACL"] = acl
if endpoint:
self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint
self.bucket_name = bucket_name

def build_target(self, path):
path = f"{self.bucket_name}/{path}"
return super().build_target(path)
29 changes: 26 additions & 3 deletions readthedocs/storage/s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,39 @@

# Disable abstract method because we are not overriding all the methods
# pylint: disable=abstract-method
from functools import cached_property
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from storages.backends.s3boto3 import S3Boto3Storage, S3ManifestStaticStorage

from readthedocs.builds.storage import BuildMediaStorageMixin
from readthedocs.storage.rclone import RCloneS3Remote

from .mixins import OverrideHostnameMixin, S3PrivateBucketMixin


class S3BuildMediaStorage(BuildMediaStorageMixin, OverrideHostnameMixin, S3Boto3Storage):
class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage):

@cached_property
def _rclone(self):
provider = "AWS"
# If a cutom endpoint URL is given and
# we are runnin in DEBUG mode, use minio as provider.
if self.endpoint_url and settings.DEBUG:
provider = "minio"

return RCloneS3Remote(
bucket_name=self.bucket_name,
access_key_id=self.access_key,
secret_acces_key=self.secret_key,
region=self.region_name,
acl=self.default_acl,
endpoint=self.endpoint_url,
provider=provider,
)


class S3BuildMediaStorage(OverrideHostnameMixin, S3BuildMediaStorageMixin):

"""An AWS S3 Storage backend for build artifacts."""

Expand Down Expand Up @@ -94,7 +117,7 @@ class NoManifestS3StaticStorage(
"""


class S3BuildEnvironmentStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
class S3BuildEnvironmentStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):

bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET', None)

Expand All @@ -108,7 +131,7 @@ def __init__(self, *args, **kwargs):
)


class S3BuildToolsStorage(S3PrivateBucketMixin, BuildMediaStorageMixin, S3Boto3Storage):
class S3BuildToolsStorage(S3PrivateBucketMixin, S3BuildMediaStorageMixin):

bucket_name = getattr(settings, 'S3_BUILD_TOOLS_STORAGE_BUCKET', None)

Expand Down

0 comments on commit 1ccc43b

Please sign in to comment.