From bf4fb1fb400daad23702bc0b3231ec069d68e87e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:20:59 +0100 Subject: [PATCH 01/29] Basic implementation of backup media store --- synapse/config/repository.py | 18 ++ synapse/rest/media/v1/media_repository.py | 221 +++++++++++----------- synapse/rest/media/v1/thumbnailer.py | 16 +- synapse/rest/media/v1/upload_resource.py | 2 +- 4 files changed, 131 insertions(+), 126 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2c6f57168e07..e3c83d56faec 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -70,7 +70,17 @@ def read_config(self, config): self.max_upload_size = self.parse_size(config["max_upload_size"]) self.max_image_pixels = self.parse_size(config["max_image_pixels"]) self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.media_store_path = self.ensure_directory(config["media_store_path"]) + + self.backup_media_store_path = config.get("backup_media_store_path") + if self.backup_media_store_path: + self.ensure_directory(self.backup_media_store_path) + + self.synchronous_backup_media_store = config.get( + "synchronous_backup_media_store", False + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -115,6 +125,14 @@ def default_config(self, **kwargs): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" + # A secondary directory where uploaded images and attachments are + # stored as a backup. + # backup_media_store_path: "%(media_store)s" + + # Whether to wait for successful write to backup media store before + # returning successfully. + # synchronous_backup_media_store: false + # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0ea1248ce60d..3b442cc16b67 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.logcontext import preserve_context_over_fn, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -59,7 +59,12 @@ def __init__(self, hs): self.store = hs.get_datastore() self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels + self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.backup_filepaths = None + if hs.config.backup_media_store_path: + self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -87,18 +92,43 @@ def _makedirs(filepath): if not os.path.exists(dirname): os.makedirs(dirname) + @defer.inlineCallbacks + def _write_to_file(self, source, file_name_func): + def write_file_thread(file_name): + source.seek(0) # Ensure we read from the start of the file + with open(file_name, "wb") as f: + shutil.copyfileobj(source, f) + + fname = file_name_func(self.filepaths) + self._makedirs(fname) + + # Write to the main repository + yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + + # Write to backup repository + if self.backup_filepaths: + backup_fname = file_name_func(backup_filepaths) + self._makedirs(backup_fname) + + # We can either wait for successful writing to the backup repository + # or write in the background and immediately return + if hs.config.synchronous_backup_media_store: + yield preserve_context_over_fn( + threads.deferToThread, write_file_thread, backup_fname, + ) + else: + preserve_fn(threads.deferToThread)(write_file, backup_fname) + + defer.returnValue(fname) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = self.filepaths.local_media_filepath(media_id) - self._makedirs(fname) - - # This shouldn't block for very long because the content will have - # already been uploaded at this point. - with open(fname, "wb") as f: - f.write(content) + fname = yield self._write_to_file( + content, lambda f: f.local_media_filepath(media_id) + ) logger.info("Stored local media in file %r", fname) @@ -253,9 +283,8 @@ def _download_remote_file(self, server_name, media_id): def _get_thumbnail_requirements(self, media_type): return self.thumbnail_requirements.get(media_type, ()) - def _generate_thumbnail(self, input_path, t_path, t_width, t_height, + def _generate_thumbnail(self, thumbnailer, t_width, t_height, t_method, t_type): - thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -267,36 +296,40 @@ def _generate_thumbnail(self, input_path, t_path, t_width, t_height, return if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) elif t_method == "scale": t_width, t_height = thumbnailer.aspect(t_width, t_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.scale(t_width, t_height, t_type) else: - t_len = None + t_byte_source = None - return t_len + return t_byte_source @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, t_method, t_type): input_path = self.filepaths.local_media_filepath(media_id) - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -306,21 +339,25 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, t_width, t_height, t_method, t_type): input_path = self.filepaths.remote_media_filepath(server_name, file_id) - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -351,59 +388,32 @@ def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): local_thumbnails = [] def generate_thumbnails(): - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - - local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len - )) + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, + ) - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len + r_width, r_height, r_method, r_type, t_byte_source )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for l in local_thumbnails: - yield self.store.store_local_thumbnail(*l) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + if url_cache: + path_name_func = lambda f: f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + path_name_func = lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) defer.returnValue({ "width": m_width, @@ -433,51 +443,32 @@ def generate_thumbnails(): ) return - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) - - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) + + remote_thumbnails.append(( + r_width, r_height, r_method, r_type, t_byte_source + )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + path_name_func = lambda f: f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) + defer.returnValue({ "width": m_width, "height": m_height, diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 3868d4f65f22..60498b08aad9 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -50,12 +50,12 @@ def aspect(self, max_width, max_height): else: return ((max_height * self.width) // self.height, max_height) - def scale(self, output_path, width, height, output_type): + def scale(self, width, height, output_type): """Rescales the image to the given dimensions""" scaled = self.image.resize((width, height), Image.ANTIALIAS) - return self.save_image(scaled, output_type, output_path) + return self._encode_image(scaled, output_type) - def crop(self, output_path, width, height, output_type): + def crop(self, width, height, output_type): """Rescales and crops the image to the given dimensions preserving aspect:: (w_in / h_in) = (w_scaled / h_scaled) @@ -82,13 +82,9 @@ def crop(self, output_path, width, height, output_type): crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - return self.save_image(cropped, output_type, output_path) + return self._encode_image(cropped, output_type) - def save_image(self, output_image, output_type, output_path): + def _encode_image(self, output_image, output_type): output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_bytes = output_bytes_io.getvalue() - with open(output_path, "wb") as output_file: - output_file.write(output_bytes) - logger.info("Stored thumbnail in file %r", output_path) - return len(output_bytes) + return output_bytes_io diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 4ab33f73bff2..f6f498cdc56c 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -93,7 +93,7 @@ def _async_render_POST(self, request): # TODO(markjh): parse content-dispostion content_uri = yield self.media_repo.create_content( - media_type, upload_name, request.content.read(), + media_type, upload_name, request.content, content_length, requester.user ) From 67cb89fbdf62dfb2ff65f6f7f0ca23445cdac0ac Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:23:41 +0100 Subject: [PATCH 02/29] Fix typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3b442cc16b67..f26f793bed74 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -107,7 +107,7 @@ def write_file_thread(file_name): # Write to backup repository if self.backup_filepaths: - backup_fname = file_name_func(backup_filepaths) + backup_fname = file_name_func(self.backup_filepaths) self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository From c8eeef6947af762c3eabef6ecca0f69833fbf8ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:28:24 +0100 Subject: [PATCH 03/29] Fix typos --- synapse/rest/media/v1/media_repository.py | 46 +++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f26f793bed74..a16034fd678f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -65,6 +65,8 @@ def __init__(self, hs): if hs.config.backup_media_store_path: self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -112,12 +114,12 @@ def write_file_thread(file_name): # We can either wait for successful writing to the backup repository # or write in the background and immediately return - if hs.config.synchronous_backup_media_store: + if self.synchronous_backup_media_store: yield preserve_context_over_fn( threads.deferToThread, write_file_thread, backup_fname, ) else: - preserve_fn(threads.deferToThread)(write_file, backup_fname) + preserve_fn(threads.deferToThread)(write_file_thread, backup_fname) defer.returnValue(fname) @@ -321,7 +323,7 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, if t_byte_source: output_path = yield self._write_to_file( - content, + t_byte_source, lambda f: f.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) @@ -329,10 +331,11 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, logger.info("Stored thumbnail in file %r", output_path) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, + len(t_byte_source.getvalue()) ) - defer.returnValue(t_path) + defer.returnValue(output_path) @defer.inlineCallbacks def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, @@ -348,7 +351,7 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, if t_byte_source: output_path = yield self._write_to_file( - content, + t_byte_source, lambda f: f.remote_media_thumbnail( server_name, file_id, t_width, t_height, t_type, t_method ) @@ -360,7 +363,7 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) - defer.returnValue(t_path) + defer.returnValue(output_path) @defer.inlineCallbacks def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): @@ -400,19 +403,21 @@ def generate_thumbnails(): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - if url_cache: - path_name_func = lambda f: f.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - path_name_func = lambda f: f.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + def path_name_func(f): + if url_cache: + return f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + return f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) yield self._write_to_file(t_byte_source, path_name_func) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, + len(t_byte_source.getvalue()) ) defer.returnValue({ @@ -457,10 +462,11 @@ def generate_thumbnails(): for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) - for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - path_name_func = lambda f: f.remote_media_thumbnail( - server_name, media_id, file_id, t_width, t_height, t_type, t_method - ) + for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: + def path_name_func(f): + return f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) yield self._write_to_file(t_byte_source, path_name_func) From 6dfde6d4856695890271232f8a2e4c5f32615dd1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:30:26 +0100 Subject: [PATCH 04/29] Remove dead code --- synapse/rest/media/v1/media_repository.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a16034fd678f..1eeb128d2a09 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -459,9 +459,6 @@ def generate_thumbnails(): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for r in remote_thumbnails: - yield self.store.store_remote_media_thumbnail(*r) - for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: def path_name_func(f): return f.remote_media_thumbnail( From b77a13812c38b2e79b2ebfddb52ce88a2ac8e9b9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:32:32 +0100 Subject: [PATCH 05/29] Typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 1eeb128d2a09..93b35af9cfb0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -462,7 +462,7 @@ def generate_thumbnails(): for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: def path_name_func(f): return f.remote_media_thumbnail( - server_name, media_id, file_id, t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) yield self._write_to_file(t_byte_source, path_name_func) From e283b555b1f20de4fd393fd947e82eb3c635b7e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:31:24 +0100 Subject: [PATCH 06/29] Copy everything to backup --- synapse/config/repository.py | 4 +- synapse/rest/media/v1/filepath.py | 99 ++++++++++------ synapse/rest/media/v1/media_repository.py | 109 +++++++++++------- synapse/rest/media/v1/preview_url_resource.py | 7 +- synapse/rest/media/v1/thumbnailer.py | 9 +- 5 files changed, 151 insertions(+), 77 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index e3c83d56faec..6baa474931a4 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -75,7 +75,9 @@ def read_config(self, config): self.backup_media_store_path = config.get("backup_media_store_path") if self.backup_media_store_path: - self.ensure_directory(self.backup_media_store_path) + self.backup_media_store_path = self.ensure_directory( + self.backup_media_store_path + ) self.synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index d5cec10127e0..43d0eea00d5e 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -15,103 +15,134 @@ import os import re +import functools NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") +def _wrap_in_base_path(func): + """Takes a function that returns a relative path and turns it into an + absolute path based on the location of the primary media store + """ + @functools.wraps(func) + def _wrapped(self, *args, **kwargs): + path = func(self, *args, **kwargs) + return os.path.join(self.primary_base_path, path) + + return _wrapped + + class MediaFilePaths(object): + """Describes where files are stored on disk. - def __init__(self, base_path): - self.base_path = base_path + Most of the function have a `*_rel` variant which returns a file path that + is relative to the base media store path. This is mainly used when we want + to write to the backup media store (when one is configured) + """ - def default_thumbnail(self, default_top_level, default_sub_type, width, - height, content_type, method): + def __init__(self, primary_base_path): + self.primary_base_path = primary_base_path + + def default_thumbnail_rel(self, default_top_level, default_sub_type, width, + height, content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) return os.path.join( - self.base_path, "default_thumbnails", default_top_level, + "default_thumbnails", default_top_level, default_sub_type, file_name ) - def local_media_filepath(self, media_id): + default_thumbnail = _wrap_in_base_path(default_thumbnail_rel) + + def local_media_filepath_rel(self, media_id): return os.path.join( - self.base_path, "local_content", + "local_content", media_id[0:2], media_id[2:4], media_id[4:] ) - def local_media_thumbnail(self, media_id, width, height, content_type, - method): + local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) + + def local_media_thumbnail_rel(self, media_id, width, height, content_type, + method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) return os.path.join( - self.base_path, "local_thumbnails", + "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) - def remote_media_filepath(self, server_name, file_id): + local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + + def remote_media_filepath_rel(self, server_name, file_id): return os.path.join( - self.base_path, "remote_content", server_name, + "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] ) - def remote_media_thumbnail(self, server_name, file_id, width, height, - content_type, method): + remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) + + def remote_media_thumbnail_rel(self, server_name, file_id, width, height, + content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( - self.base_path, "remote_thumbnail", server_name, + "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], file_name ) + remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel) + def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - self.base_path, "remote_thumbnail", server_name, + "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) - def url_cache_filepath(self, media_id): + def url_cache_filepath_rel(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf return os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[:10], media_id[11:] ) else: return os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], media_id[2:4], media_id[4:], ) + url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) + def url_cache_filepath_dirs_to_delete(self, media_id): "The dirs to try and remove if we delete the media_id file" if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[:10], ), ] else: return [ os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], ), ] - def url_cache_thumbnail(self, media_id, width, height, content_type, - method): + def url_cache_thumbnail_rel(self, media_id, width, height, content_type, + method): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf @@ -122,29 +153,31 @@ def url_cache_thumbnail(self, media_id, width, height, content_type, if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], file_name ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) + url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) + def url_cache_thumbnail_directory(self, media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -155,26 +188,26 @@ def url_cache_thumbnail_dirs_to_delete(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], ), ] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 93b35af9cfb0..398e973ca9b8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -60,10 +60,12 @@ def __init__(self, hs): self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels - self.filepaths = MediaFilePaths(hs.config.media_store_path) - self.backup_filepaths = None + self.primary_base_path = hs.config.media_store_path + self.filepaths = MediaFilePaths(self.primary_base_path) + + self.backup_base_path = None if hs.config.backup_media_store_path: - self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.backup_base_path = hs.config.backup_media_store_path self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store @@ -94,42 +96,63 @@ def _makedirs(filepath): if not os.path.exists(dirname): os.makedirs(dirname) - @defer.inlineCallbacks - def _write_to_file(self, source, file_name_func): - def write_file_thread(file_name): - source.seek(0) # Ensure we read from the start of the file - with open(file_name, "wb") as f: - shutil.copyfileobj(source, f) + @staticmethod + def write_file_synchronously(source, fname): + source.seek(0) # Ensure we read from the start of the file + with open(fname, "wb") as f: + shutil.copyfileobj(source, f) - fname = file_name_func(self.filepaths) + @defer.inlineCallbacks + def write_to_file(self, source, path): + """Write `source` to the on disk media store, and also the backup store + if configured. + + Args: + source: A file like object that should be written + path: Relative path to write file to + + Returns: + string: the file path written to in the primary media store + """ + fname = os.path.join(self.primary_base_path, path) self._makedirs(fname) # Write to the main repository - yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + yield preserve_context_over_fn( + threads.deferToThread, + self.write_file_synchronously, source, fname, + ) # Write to backup repository - if self.backup_filepaths: - backup_fname = file_name_func(self.backup_filepaths) + yield self.copy_to_backup(source, path) + + defer.returnValue(fname) + + @defer.inlineCallbacks + def copy_to_backup(self, source, path): + if self.backup_base_path: + backup_fname = os.path.join(self.backup_base_path, path) self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: yield preserve_context_over_fn( - threads.deferToThread, write_file_thread, backup_fname, + threads.deferToThread, + self.write_file_synchronously, source, backup_fname, ) else: - preserve_fn(threads.deferToThread)(write_file_thread, backup_fname) - - defer.returnValue(fname) + preserve_fn(threads.deferToThread)( + self.write_file_synchronously, source, backup_fname, + ) @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = yield self._write_to_file( - content, lambda f: f.local_media_filepath(media_id) + fname = yield self.write_to_file( + content, self.filepaths.local_media_filepath_rel(media_id) ) logger.info("Stored local media in file %r", fname) @@ -180,9 +203,10 @@ def _get_remote_media_impl(self, server_name, media_id): def _download_remote_file(self, server_name, media_id): file_id = random_string(24) - fname = self.filepaths.remote_media_filepath( + fpath = self.filepaths.remote_media_filepath_rel( server_name, file_id ) + fname = os.path.join(self.primary_base_path, fpath) self._makedirs(fname) try: @@ -224,6 +248,9 @@ def _download_remote_file(self, server_name, media_id): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") + with open(fname) as f: + yield self.copy_to_backup(f, fpath) + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -322,15 +349,15 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, ) if t_byte_source: - output_path = yield self._write_to_file( + output_path = yield self.write_to_file( t_byte_source, - lambda f: f.local_media_thumbnail( + self.filepaths.local_media_thumbnail_rel( media_id, t_width, t_height, t_type, t_method ) ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_local_thumbnail( + yield self.store.store_local_thumbnail_rel( media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) @@ -350,15 +377,15 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, ) if t_byte_source: - output_path = yield self._write_to_file( + output_path = yield self.write_to_file( t_byte_source, - lambda f: f.remote_media_thumbnail( + self.filepaths.remote_media_thumbnail_rel( server_name, file_id, t_width, t_height, t_type, t_method ) ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_remote_media_thumbnail( + yield self.store.store_remote_media_thumbnail_rel( server_name, media_id, file_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) @@ -403,17 +430,16 @@ def generate_thumbnails(): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - def path_name_func(f): - if url_cache: - return f.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - return f.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + if url_cache: + file_path = self.filepaths.url_cache_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) + else: + file_path = self.filepaths.local_media_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) - yield self._write_to_file(t_byte_source, path_name_func) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, @@ -460,12 +486,11 @@ def generate_thumbnails(): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: - def path_name_func(f): - return f.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) + file_path = self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) - yield self._write_to_file(t_byte_source, path_name_func) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, @@ -491,6 +516,8 @@ def delete_old_remote_media(self, before_ts): logger.info("Deleting: %r", key) + # TODO: Should we delete from the backup store + with (yield self.remote_media_linearizer.queue(key)): full_path = self.filepaths.remote_media_filepath(origin, file_id) try: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 895b480d5cc0..f82b8fbc5157 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -59,6 +59,7 @@ def __init__(self, hs, media_repo): self.store = hs.get_datastore() self.client = SpiderHttpClient(hs) self.media_repo = media_repo + self.primary_base_path = media_repo.primary_base_path self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -262,7 +263,8 @@ def _download_url(self, url, user): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fname = self.filepaths.url_cache_filepath(file_id) + fpath = self.filepaths.url_cache_filepath_rel(file_id) + fname = os.path.join(self.primary_base_path, fpath) self.media_repo._makedirs(fname) try: @@ -273,6 +275,9 @@ def _download_url(self, url, user): ) # FIXME: pass through 404s and other error messages nicely + with open(fname) as f: + yield self.media_repo.copy_to_backup(f, fpath) + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 60498b08aad9..e1ee535b9a01 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -51,7 +51,11 @@ def aspect(self, max_width, max_height): return ((max_height * self.width) // self.height, max_height) def scale(self, width, height, output_type): - """Rescales the image to the given dimensions""" + """Rescales the image to the given dimensions. + + Returns: + BytesIO: the bytes of the encoded image ready to be written to disk + """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -65,6 +69,9 @@ def crop(self, width, height, output_type): Args: max_width: The largest possible width. max_height: The larget possible height. + + Returns: + BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width From 802ca12d0551ff761e01d9af8348df1dc96fc372 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:37:21 +0100 Subject: [PATCH 07/29] Don't close file prematurely --- synapse/rest/media/v1/media_repository.py | 22 ++++++++++++++----- synapse/rest/media/v1/preview_url_resource.py | 4 ++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 398e973ca9b8..63ed1c4268be 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -97,16 +97,20 @@ def _makedirs(filepath): os.makedirs(dirname) @staticmethod - def write_file_synchronously(source, fname): + def _write_file_synchronously(source, fname): source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) + source.close() + @defer.inlineCallbacks def write_to_file(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. + Will close source once finished. + Args: source: A file like object that should be written path: Relative path to write file to @@ -120,7 +124,7 @@ def write_to_file(self, source, path): # Write to the main repository yield preserve_context_over_fn( threads.deferToThread, - self.write_file_synchronously, source, fname, + self._write_file_synchronously, source, fname, ) # Write to backup repository @@ -130,6 +134,10 @@ def write_to_file(self, source, path): @defer.inlineCallbacks def copy_to_backup(self, source, path): + """Copy file like object source to the backup media store, if configured. + + Will close source after its done. + """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) self._makedirs(backup_fname) @@ -139,12 +147,14 @@ def copy_to_backup(self, source, path): if self.synchronous_backup_media_store: yield preserve_context_over_fn( threads.deferToThread, - self.write_file_synchronously, source, backup_fname, + self._write_file_synchronously, source, backup_fname, ) else: preserve_fn(threads.deferToThread)( - self.write_file_synchronously, source, backup_fname, + self._write_file_synchronously, source, backup_fname, ) + else: + source.close() @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, @@ -248,8 +258,8 @@ def _download_remote_file(self, server_name, media_id): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - with open(fname) as f: - yield self.copy_to_backup(f, fpath) + # Will close the file after its done + yield self.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f82b8fbc5157..a3288c9cc608 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -275,8 +275,8 @@ def _download_url(self, url, user): ) # FIXME: pass through 404s and other error messages nicely - with open(fname) as f: - yield self.media_repo.copy_to_backup(f, fpath) + # Will close the file after its done + yield self.media_repo.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() From 1259a76047a0a718ce0c9fb26513c9127f8ea919 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:39:23 +0100 Subject: [PATCH 08/29] Get len before close --- synapse/rest/media/v1/media_repository.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 63ed1c4268be..d25b98db458f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -359,6 +359,8 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, ) if t_byte_source: + t_len = len(t_byte_source.getvalue()) + output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -368,8 +370,7 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, logger.info("Stored thumbnail in file %r", output_path) yield self.store.store_local_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method, - len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, t_len ) defer.returnValue(output_path) @@ -387,6 +388,7 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, ) if t_byte_source: + t_len = len(t_byte_source.getvalue()) output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -397,7 +399,7 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, yield self.store.store_remote_media_thumbnail_rel( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + t_width, t_height, t_type, t_method, t_len ) defer.returnValue(output_path) @@ -449,11 +451,12 @@ def generate_thumbnails(): media_id, t_width, t_height, t_type, t_method ) + t_len = len(t_byte_source.getvalue()) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, - len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, t_len ) defer.returnValue({ @@ -500,11 +503,13 @@ def generate_thumbnails(): server_name, file_id, t_width, t_height, t_type, t_method ) + t_len = len(t_byte_source.getvalue()) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + t_width, t_height, t_type, t_method, t_len ) defer.returnValue({ From cc505b4b5e98ba70d8576a562fc36b03d6aa5150 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:52:30 +0100 Subject: [PATCH 09/29] getvalue closes buffer --- synapse/rest/media/v1/media_repository.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d25b98db458f..ff2ddd2f1807 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -258,7 +258,7 @@ def _download_remote_file(self, server_name, media_id): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - # Will close the file after its done + # Will close the file after its done yield self.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] @@ -359,8 +359,6 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, ) if t_byte_source: - t_len = len(t_byte_source.getvalue()) - output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -369,6 +367,8 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, ) logger.info("Stored thumbnail in file %r", output_path) + t_len = os.path.getsize(output_path) + yield self.store.store_local_thumbnail_rel( media_id, t_width, t_height, t_type, t_method, t_len ) @@ -388,7 +388,6 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, ) if t_byte_source: - t_len = len(t_byte_source.getvalue()) output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -397,7 +396,9 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_remote_media_thumbnail_rel( + t_len = os.path.getsize(output_path) + + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len ) @@ -451,9 +452,8 @@ def generate_thumbnails(): media_id, t_width, t_height, t_type, t_method ) - t_len = len(t_byte_source.getvalue()) - - yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file(t_byte_source, file_path) + t_len = os.path.getsize(output_path) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -503,9 +503,8 @@ def generate_thumbnails(): server_name, file_id, t_width, t_height, t_type, t_method ) - t_len = len(t_byte_source.getvalue()) - - yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file(t_byte_source, file_path) + t_len = os.path.getsize(output_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, From 4ae85ae12190015595f979f1a302ee608de6fd65 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:57:31 +0100 Subject: [PATCH 10/29] Don't close prematurely.. --- synapse/rest/media/v1/media_repository.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index ff2ddd2f1807..80b14a673933 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -97,12 +97,13 @@ def _makedirs(filepath): os.makedirs(dirname) @staticmethod - def _write_file_synchronously(source, fname): + def _write_file_synchronously(source, fname, close_source=False): source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) - source.close() + if close_source: + source.close() @defer.inlineCallbacks def write_to_file(self, source, path): @@ -148,10 +149,12 @@ def copy_to_backup(self, source, path): yield preserve_context_over_fn( threads.deferToThread, self._write_file_synchronously, source, backup_fname, + close_source=True, ) else: preserve_fn(threads.deferToThread)( self._write_file_synchronously, source, backup_fname, + close_source=True, ) else: source.close() From d76621a47b7b4b778055760d43df9d02614dac19 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 18:16:25 +0100 Subject: [PATCH 11/29] Fix comments --- synapse/rest/media/v1/filepath.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 43d0eea00d5e..6923a3fbd314 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -35,7 +35,7 @@ def _wrapped(self, *args, **kwargs): class MediaFilePaths(object): """Describes where files are stored on disk. - Most of the function have a `*_rel` variant which returns a file path that + Most of the functions have a `*_rel` variant which returns a file path that is relative to the base media store path. This is mainly used when we want to write to the backup media store (when one is configured) """ diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index a3288c9cc608..e986e855a783 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -343,6 +343,9 @@ def _download_url(self, url, user): def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. """ + + # TODO: Delete from backup media store + now = self.clock.time_msec() # First we delete expired url cache entries From b60859d6cc429ea1934b94a8749caadd9a96ee21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:24:19 +0100 Subject: [PATCH 12/29] Use make_deferred_yieldable --- synapse/rest/media/v1/media_repository.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 80b14a673933..5c5020fe9d9c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn, preserve_fn +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -123,7 +123,7 @@ def write_to_file(self, source, path): self._makedirs(fname) # Write to the main repository - yield preserve_context_over_fn( + yield make_deferred_yieldable( threads.deferToThread, self._write_file_synchronously, source, fname, ) @@ -146,7 +146,7 @@ def copy_to_backup(self, source, path): # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: - yield preserve_context_over_fn( + yield make_deferred_yieldable( threads.deferToThread, self._write_file_synchronously, source, backup_fname, close_source=True, @@ -355,7 +355,7 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield preserve_context_over_fn( + t_byte_source = yield make_deferred_yieldable( threads.deferToThread, self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type @@ -384,7 +384,7 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield preserve_context_over_fn( + t_byte_source = yield make_deferred_yieldable( threads.deferToThread, self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type @@ -443,7 +443,7 @@ def generate_thumbnails(): r_width, r_height, r_method, r_type, t_byte_source )) - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: if url_cache: @@ -499,7 +499,7 @@ def generate_thumbnails(): r_width, r_height, r_method, r_type, t_byte_source )) - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: file_path = self.filepaths.remote_media_thumbnail_rel( From 64db043a71238db3f65f575c40f29260b83145be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:25:01 +0100 Subject: [PATCH 13/29] Move makedirs to thread --- synapse/rest/media/v1/media_repository.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5c5020fe9d9c..72aad221a897 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -98,6 +98,7 @@ def _makedirs(filepath): @staticmethod def _write_file_synchronously(source, fname, close_source=False): + MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) @@ -120,7 +121,6 @@ def write_to_file(self, source, path): string: the file path written to in the primary media store """ fname = os.path.join(self.primary_base_path, path) - self._makedirs(fname) # Write to the main repository yield make_deferred_yieldable( @@ -141,7 +141,6 @@ def copy_to_backup(self, source, path): """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) - self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository # or write in the background and immediately return From 35332298ef6a9828aa1fdb10f59230f47763084e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:39:32 +0100 Subject: [PATCH 14/29] Fix up comments --- synapse/rest/media/v1/media_repository.py | 28 +++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 72aad221a897..f3a5b19a808f 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -98,6 +98,14 @@ def _makedirs(filepath): @staticmethod def _write_file_synchronously(source, fname, close_source=False): + """Write `source` to the path `fname` synchronously. Should be called + from a thread. + + Args: + source: A file like object to be written + fname (str): Path to write to + close_source (bool): Whether to close source after writing + """ MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: @@ -115,10 +123,10 @@ def write_to_file(self, source, path): Args: source: A file like object that should be written - path: Relative path to write file to + path(str): Relative path to write file to Returns: - string: the file path written to in the primary media store + Deferred[str]: the file path written to in the primary media store """ fname = os.path.join(self.primary_base_path, path) @@ -138,6 +146,10 @@ def copy_to_backup(self, source, path): """Copy file like object source to the backup media store, if configured. Will close source after its done. + + Args: + source: A file like object that should be written + path(str): Relative path to write file to """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) @@ -161,6 +173,18 @@ def copy_to_backup(self, source, path): @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): + """Store uploaded content for a local user and return the mxc URL + + Args: + media_type(str): The content type of the file + upload_name(str): The name of the file + content: A file like object that is the content to store + content_length(int): The length of the content + auth_user(str): The user_id of the uploader + + Returns: + Deferred[str]: The mxc url of the stored content + """ media_id = random_string(24) fname = yield self.write_to_file( From e3428d26ca5a23a3dac6d106aff8ac19f9839f32 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:39:59 +0100 Subject: [PATCH 15/29] Fix typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f3a5b19a808f..76220a553173 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -395,7 +395,7 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, t_len = os.path.getsize(output_path) - yield self.store.store_local_thumbnail_rel( + yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len ) From 505371414f6ba9aeaa95eb8d34f7893c4cc2b07e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:23:53 +0100 Subject: [PATCH 16/29] Fix up thumbnailing function --- synapse/rest/media/v1/media_repository.py | 127 ++++++++---------- synapse/rest/media/v1/preview_url_resource.py | 8 +- synapse/rest/media/v1/thumbnailer.py | 13 +- 3 files changed, 73 insertions(+), 75 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 76220a553173..36f42c73be78 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -206,7 +206,7 @@ def create_content(self, media_type, upload_name, content, content_length, "media_length": content_length, } - yield self._generate_local_thumbnails(media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -339,7 +339,7 @@ def _download_remote_file(self, server_name, media_id): "filesystem_id": file_id, } - yield self._generate_remote_thumbnails( + yield self._generate_thumbnails( server_name, media_id, media_info ) @@ -385,6 +385,8 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, ) if t_byte_source: + t_width, t_height = t_byte_source.dimensions + output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -414,6 +416,8 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, ) if t_byte_source: + t_width, t_height = t_byte_source.dimensions + output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -432,13 +436,28 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + """Generate and store thumbnails for an image. + + Args: + server_name(str|None): The server name if remote media, else None if local + media_id(str) + media_info(dict) + url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + used exclusively by the url previewer + + Returns: + Deferred[dict]: Dict with "width" and "height" keys of original image + """ media_type = media_info["media_type"] + file_id = media_info.get("filesystem_id") requirements = self._get_thumbnail_requirements(media_type) if not requirements: return - if url_cache: + if server_name: + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + elif url_cache: input_path = self.filepaths.url_cache_filepath(media_id) else: input_path = self.filepaths.local_media_filepath(media_id) @@ -454,22 +473,40 @@ def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): ) return - local_thumbnails = [] + # We deduplicate the thumbnail sizes by ignoring the cropped versions if + # they have the same dimensions of a scaled one. + thumbnails = {} + for r_width, r_height, r_method, r_type in requirements: + if r_method == "crop": + thumbnails.setdefault[(r_width, r_height)] = (r_method, r_type) + elif r_method == "scale": + t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + thumbnails[(t_width, t_height)] = (r_method, r_type) + + # Now we generate the thumbnails for each dimension, store it + for (r_width, r_height), (r_method, r_type) in thumbnails.iteritems(): + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) - def generate_thumbnails(): - for r_width, r_height, r_method, r_type in requirements: - t_byte_source = self._generate_thumbnail( - thumbnailer, r_width, r_height, r_method, r_type, + if r_type == "crop": + t_byte_source = yield make_deferred_yieldable( + threads.deferToThread, thumbnailer.crop, + r_width, r_height, r_type, + ) + else: + t_byte_source = yield make_deferred_yieldable( + threads.deferToThread, thumbnailer.scale, + r_width, r_height, r_type, ) - local_thumbnails.append(( - r_width, r_height, r_method, r_type, t_byte_source - )) - - yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) + t_width, t_height = t_byte_source.dimensions - for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - if url_cache: + if server_name: + file_path = self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) + elif url_cache: file_path = self.filepaths.url_cache_thumbnail_rel( media_id, t_width, t_height, t_type, t_method ) @@ -481,61 +518,15 @@ def generate_thumbnails(): output_path = yield self.write_to_file(t_byte_source, file_path) t_len = os.path.getsize(output_path) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) - - defer.returnValue({ - "width": m_width, - "height": m_height, - }) - - @defer.inlineCallbacks - def _generate_remote_thumbnails(self, server_name, media_id, media_info): - media_type = media_info["media_type"] - file_id = media_info["filesystem_id"] - requirements = self._get_thumbnail_requirements(media_type) - if not requirements: - return - - remote_thumbnails = [] - - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height - - def generate_thumbnails(): - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return - - for r_width, r_height, r_method, r_type in requirements: - t_byte_source = self._generate_thumbnail( - thumbnailer, r_width, r_height, r_method, r_type, - ) - - remote_thumbnails.append(( - r_width, r_height, r_method, r_type, t_byte_source - )) - - yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) - - for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: - file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) - - output_path = yield self.write_to_file(t_byte_source, file_path) - t_len = os.path.getsize(output_path) - - yield self.store.store_remote_media_thumbnail( + if server_name: + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len ) + else: + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) defer.returnValue({ "width": m_width, diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index e986e855a783..c734f6b7cd99 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -171,8 +171,8 @@ def callback(media_info): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): - dims = yield self.media_repo._generate_local_thumbnails( - media_info['filesystem_id'], media_info, url_cache=True, + dims = yield self.media_repo._generate_thumbnails( + None, media_info['filesystem_id'], media_info, url_cache=True, ) og = { @@ -217,8 +217,8 @@ def callback(media_info): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images - dims = yield self.media_repo._generate_local_thumbnails( - image_info['filesystem_id'], image_info, url_cache=True, + dims = yield self.media_repo._generate_thumbnails( + None, image_info['filesystem_id'], image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index e1ee535b9a01..09650bd52772 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -54,7 +54,7 @@ def scale(self, width, height, output_type): """Rescales the image to the given dimensions. Returns: - BytesIO: the bytes of the encoded image ready to be written to disk + ImageIO: the bytes of the encoded image ready to be written to disk """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -71,7 +71,7 @@ def crop(self, width, height, output_type): max_height: The larget possible height. Returns: - BytesIO: the bytes of the encoded image ready to be written to disk + ImageIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width @@ -92,6 +92,13 @@ def crop(self, width, height, output_type): return self._encode_image(cropped, output_type) def _encode_image(self, output_image, output_type): - output_bytes_io = BytesIO() + output_bytes_io = ImageIO(output_image.size) output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) + output_image.close() return output_bytes_io + + +class ImageIO(BytesIO): + def __init__(self, dimensions): + super(ImageIO, self).__init__() + self.dimensions = dimensions From 0e28281a021101ac199cbf2d0d130190110921bb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:33:49 +0100 Subject: [PATCH 17/29] Fix up --- synapse/rest/media/v1/media_repository.py | 62 +++++++++++------------ synapse/rest/media/v1/thumbnailer.py | 13 ++--- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 36f42c73be78..a310d08f5fb8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -131,10 +131,9 @@ def write_to_file(self, source, path): fname = os.path.join(self.primary_base_path, path) # Write to the main repository - yield make_deferred_yieldable( - threads.deferToThread, + yield make_deferred_yieldable(threads.deferToThread( self._write_file_synchronously, source, fname, - ) + )) # Write to backup repository yield self.copy_to_backup(source, path) @@ -157,11 +156,10 @@ def copy_to_backup(self, source, path): # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: - yield make_deferred_yieldable( - threads.deferToThread, + yield make_deferred_yieldable(threads.deferToThread( self._write_file_synchronously, source, backup_fname, close_source=True, - ) + )) else: preserve_fn(threads.deferToThread)( self._write_file_synchronously, source, backup_fname, @@ -378,11 +376,10 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type - ) + )) if t_byte_source: t_width, t_height = t_byte_source.dimensions @@ -409,11 +406,10 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type - ) + )) if t_byte_source: t_width, t_height = t_byte_source.dimensions @@ -478,34 +474,32 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault[(r_width, r_height)] = (r_method, r_type) + thumbnails.setdefault((r_width, r_height), (r_method, r_type)) elif r_method == "scale": - t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) thumbnails[(t_width, t_height)] = (r_method, r_type) # Now we generate the thumbnails for each dimension, store it - for (r_width, r_height), (r_method, r_type) in thumbnails.iteritems(): - t_byte_source = thumbnailer.crop(t_width, t_height, t_type) - - if r_type == "crop": - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, thumbnailer.crop, - r_width, r_height, r_type, - ) + for (t_width, t_height), (t_method, t_type) in thumbnails.iteritems(): + # Generate the thumbnail + if t_type == "crop": + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.crop, + r_width, r_height, t_type, + )) else: - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, thumbnailer.scale, - r_width, r_height, r_type, - ) - - t_width, t_height = t_byte_source.dimensions + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.scale, + r_width, r_height, t_type, + )) + # Work out the correct file name for thumbnail if server_name: file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) + server_name, file_id, t_width, t_height, t_type, t_method + ) elif url_cache: file_path = self.filepaths.url_cache_thumbnail_rel( media_id, t_width, t_height, t_type, t_method @@ -515,14 +509,16 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals media_id, t_width, t_height, t_type, t_method ) + # Write to disk output_path = yield self.write_to_file(t_byte_source, file_path) t_len = os.path.getsize(output_path) + # Write to database if server_name: yield self.store.store_remote_media_thumbnail( - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ) + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) else: yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 09650bd52772..e1ee535b9a01 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -54,7 +54,7 @@ def scale(self, width, height, output_type): """Rescales the image to the given dimensions. Returns: - ImageIO: the bytes of the encoded image ready to be written to disk + BytesIO: the bytes of the encoded image ready to be written to disk """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -71,7 +71,7 @@ def crop(self, width, height, output_type): max_height: The larget possible height. Returns: - ImageIO: the bytes of the encoded image ready to be written to disk + BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width @@ -92,13 +92,6 @@ def crop(self, width, height, output_type): return self._encode_image(cropped, output_type) def _encode_image(self, output_image, output_type): - output_bytes_io = ImageIO(output_image.size) + output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_image.close() return output_bytes_io - - -class ImageIO(BytesIO): - def __init__(self, dimensions): - super(ImageIO, self).__init__() - self.dimensions = dimensions From 9732ec6797339dd33bc472cef5081a858ddccb30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:34:41 +0100 Subject: [PATCH 18/29] s/write_to_file/write_to_file_and_backup/ --- synapse/rest/media/v1/media_repository.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a310d08f5fb8..c9753ebb52a6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -115,7 +115,7 @@ def _write_file_synchronously(source, fname, close_source=False): source.close() @defer.inlineCallbacks - def write_to_file(self, source, path): + def write_to_file_and_backup(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. @@ -185,7 +185,7 @@ def create_content(self, media_type, upload_name, content, content_length, """ media_id = random_string(24) - fname = yield self.write_to_file( + fname = yield self.write_to_file_and_backup( content, self.filepaths.local_media_filepath_rel(media_id) ) @@ -384,7 +384,7 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, if t_byte_source: t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file( + output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.local_media_thumbnail_rel( media_id, t_width, t_height, t_type, t_method @@ -414,7 +414,7 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, if t_byte_source: t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file( + output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.remote_media_thumbnail_rel( server_name, file_id, t_width, t_height, t_type, t_method @@ -510,7 +510,7 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals ) # Write to disk - output_path = yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file_and_backup(t_byte_source, file_path) t_len = os.path.getsize(output_path) # Write to database From ae5d18617afd98bb5d51b43c2a12a99e9d96da39 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:35:44 +0100 Subject: [PATCH 19/29] Make things be absolute paths again --- synapse/rest/media/v1/filepath.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 6923a3fbd314..a3a15ac30275 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -172,12 +172,12 @@ def url_cache_thumbnail_directory(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -188,26 +188,26 @@ def url_cache_thumbnail_dirs_to_delete(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], ), ] From 4d7e1dde70e6f2300ab83fb3208152f3d73bde71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:36:32 +0100 Subject: [PATCH 20/29] Remove unnecessary diff --- synapse/rest/media/v1/media_repository.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index c9753ebb52a6..f06813c48c58 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -63,9 +63,7 @@ def __init__(self, hs): self.primary_base_path = hs.config.media_store_path self.filepaths = MediaFilePaths(self.primary_base_path) - self.backup_base_path = None - if hs.config.backup_media_store_path: - self.backup_base_path = hs.config.backup_media_store_path + self.backup_base_path = hs.config.backup_media_store_path self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store From a675bd08bd1a016a16bd0e10547e8c26be391ee0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:41:06 +0100 Subject: [PATCH 21/29] Add paths back in... --- synapse/rest/media/v1/filepath.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index a3a15ac30275..fec0bbf57239 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -100,7 +100,7 @@ def remote_media_thumbnail_rel(self, server_name, file_id, width, height, def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - "remote_thumbnail", server_name, + self.primary_base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) @@ -125,18 +125,18 @@ def url_cache_filepath_dirs_to_delete(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[:10], ), ] else: return [ os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[0:2], ), ] From 1f43d2239757db0b376a3582066190221942cddc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:42:07 +0100 Subject: [PATCH 22/29] Don't needlessly rename variable --- synapse/rest/media/v1/filepath.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index fec0bbf57239..d5164e47e02f 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -27,7 +27,7 @@ def _wrap_in_base_path(func): @functools.wraps(func) def _wrapped(self, *args, **kwargs): path = func(self, *args, **kwargs) - return os.path.join(self.primary_base_path, path) + return os.path.join(self.base_path, path) return _wrapped @@ -41,7 +41,7 @@ class MediaFilePaths(object): """ def __init__(self, primary_base_path): - self.primary_base_path = primary_base_path + self.base_path = primary_base_path def default_thumbnail_rel(self, default_top_level, default_sub_type, width, height, content_type, method): @@ -100,7 +100,7 @@ def remote_media_thumbnail_rel(self, server_name, file_id, width, height, def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - self.primary_base_path, "remote_thumbnail", server_name, + self.base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) @@ -125,18 +125,18 @@ def url_cache_filepath_dirs_to_delete(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[:10], ), ] else: return [ os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[0:2], ), ] @@ -172,12 +172,12 @@ def url_cache_thumbnail_directory(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -188,26 +188,26 @@ def url_cache_thumbnail_dirs_to_delete(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], ), ] From c021c39cbd0bf1f0a85c9699275600ac35aa9ec4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:46:53 +0100 Subject: [PATCH 23/29] Remove spurious addition --- synapse/rest/media/v1/media_repository.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f06813c48c58..3b8fe5ddb484 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -380,8 +380,6 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, )) if t_byte_source: - t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -410,8 +408,6 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, )) if t_byte_source: - t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.remote_media_thumbnail_rel( From ad1911bbf46f658ee343ee26e3011b3f1bcbd572 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:47:05 +0100 Subject: [PATCH 24/29] Comment --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3b8fe5ddb484..700fd0dd244c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -121,7 +121,7 @@ def write_to_file_and_backup(self, source, path): Args: source: A file like object that should be written - path(str): Relative path to write file to + path (str): Relative path to write file to Returns: Deferred[str]: the file path written to in the primary media store From 31aa7bd8d1748548b2523f58b348bb6787dcc019 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:47:38 +0100 Subject: [PATCH 25/29] Move type into key --- synapse/rest/media/v1/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 700fd0dd244c..dee834389fbe 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -468,15 +468,15 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault((r_width, r_height), (r_method, r_type)) + thumbnails.setdefault((r_width, r_height,r_type), r_method) elif r_method == "scale": t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - thumbnails[(t_width, t_height)] = (r_method, r_type) + thumbnails[(t_width, t_height, r_type)] = r_method # Now we generate the thumbnails for each dimension, store it - for (t_width, t_height), (t_method, t_type) in thumbnails.iteritems(): + for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): # Generate the thumbnail if t_type == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( From b92a8e6e4aa2283daaa4e6050f1dbd503ddc9434 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:58:57 +0100 Subject: [PATCH 26/29] PEP8 --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index dee834389fbe..d2ac0175d7e2 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -468,7 +468,7 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault((r_width, r_height,r_type), r_method) + thumbnails.setdefault((r_width, r_height, r_type), r_method) elif r_method == "scale": t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) From 2b24416e90b0bf1ee6d29cfc384670f4eeca0ced Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 14:11:34 +0100 Subject: [PATCH 27/29] Don't reuse source but instead copy from primary media store to backup --- synapse/rest/media/v1/media_repository.py | 28 ++++++------------- synapse/rest/media/v1/preview_url_resource.py | 3 +- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d2ac0175d7e2..e32a67e16a07 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -95,7 +95,7 @@ def _makedirs(filepath): os.makedirs(dirname) @staticmethod - def _write_file_synchronously(source, fname, close_source=False): + def _write_file_synchronously(source, fname): """Write `source` to the path `fname` synchronously. Should be called from a thread. @@ -109,16 +109,11 @@ def _write_file_synchronously(source, fname, close_source=False): with open(fname, "wb") as f: shutil.copyfileobj(source, f) - if close_source: - source.close() - @defer.inlineCallbacks def write_to_file_and_backup(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. - Will close source once finished. - Args: source: A file like object that should be written path (str): Relative path to write file to @@ -134,37 +129,31 @@ def write_to_file_and_backup(self, source, path): )) # Write to backup repository - yield self.copy_to_backup(source, path) + yield self.copy_to_backup(path) defer.returnValue(fname) @defer.inlineCallbacks - def copy_to_backup(self, source, path): - """Copy file like object source to the backup media store, if configured. - - Will close source after its done. + def copy_to_backup(self, path): + """Copy a file from the primary to backup media store, if configured. Args: - source: A file like object that should be written path(str): Relative path to write file to """ if self.backup_base_path: + primary_fname = os.path.join(self.primary_base_path, path) backup_fname = os.path.join(self.backup_base_path, path) # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: yield make_deferred_yieldable(threads.deferToThread( - self._write_file_synchronously, source, backup_fname, - close_source=True, + shutil.copyfile, primary_fname, backup_fname, )) else: preserve_fn(threads.deferToThread)( - self._write_file_synchronously, source, backup_fname, - close_source=True, + shutil.copyfile, primary_fname, backup_fname, ) - else: - source.close() @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, @@ -280,8 +269,7 @@ def _download_remote_file(self, server_name, media_id): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - # Will close the file after its done - yield self.copy_to_backup(open(fname), fpath) + yield self.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c734f6b7cd99..2a3e37fdf40c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -275,8 +275,7 @@ def _download_url(self, url, user): ) # FIXME: pass through 404s and other error messages nicely - # Will close the file after its done - yield self.media_repo.copy_to_backup(open(fname), fpath) + yield self.media_repo.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() From 6b725cf56aaf090f9cc9d5409dec7912feae8869 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 15:23:41 +0100 Subject: [PATCH 28/29] Remove old comment --- synapse/rest/media/v1/media_repository.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e32a67e16a07..cc267d0c164b 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -102,7 +102,6 @@ def _write_file_synchronously(source, fname): Args: source: A file like object to be written fname (str): Path to write to - close_source (bool): Whether to close source after writing """ MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file From 1b6b0b1e66ab1cd5682ad1fa99020474afd6db7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 15:34:08 +0100 Subject: [PATCH 29/29] Add try/finally block to close t_byte_source --- synapse/rest/media/v1/media_repository.py | 65 ++++++++++++++--------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index cc267d0c164b..515b3d3e7466 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -367,12 +367,16 @@ def generate_local_exact_thumbnail(self, media_id, t_width, t_height, )) if t_byte_source: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method + try: + output_path = yield self.write_to_file_and_backup( + t_byte_source, + self.filepaths.local_media_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) ) - ) + finally: + t_byte_source.close() + logger.info("Stored thumbnail in file %r", output_path) t_len = os.path.getsize(output_path) @@ -395,12 +399,16 @@ def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, )) if t_byte_source: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method + try: + output_path = yield self.write_to_file_and_backup( + t_byte_source, + self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) ) - ) + finally: + t_byte_source.close() + logger.info("Stored thumbnail in file %r", output_path) t_len = os.path.getsize(output_path) @@ -464,18 +472,6 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals # Now we generate the thumbnails for each dimension, store it for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): - # Generate the thumbnail - if t_type == "crop": - t_byte_source = yield make_deferred_yieldable(threads.deferToThread( - thumbnailer.crop, - r_width, r_height, t_type, - )) - else: - t_byte_source = yield make_deferred_yieldable(threads.deferToThread( - thumbnailer.scale, - r_width, r_height, t_type, - )) - # Work out the correct file name for thumbnail if server_name: file_path = self.filepaths.remote_media_thumbnail_rel( @@ -490,8 +486,29 @@ def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=Fals media_id, t_width, t_height, t_type, t_method ) - # Write to disk - output_path = yield self.write_to_file_and_backup(t_byte_source, file_path) + # Generate the thumbnail + if t_type == "crop": + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.crop, + r_width, r_height, t_type, + )) + else: + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.scale, + r_width, r_height, t_type, + )) + + if not t_byte_source: + continue + + try: + # Write to disk + output_path = yield self.write_to_file_and_backup( + t_byte_source, file_path, + ) + finally: + t_byte_source.close() + t_len = os.path.getsize(output_path) # Write to database