Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix error on sqlite 3.7 #2697

Merged
merged 3 commits into from
Nov 27, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,16 @@ def _download_url(self, url, user):
def _expire_url_cache_data(self):
"""Clean up expired url cache content, media and thumbnails.
"""

# TODO: Delete from backup media store

now = self.clock.time_msec()

logger.info("Running url preview cache expiry")

if not self.store.has_completed_background_updates():
logger.info("Still running DB updates; skipping expiry")
return

# First we delete expired url cache entries
media_ids = yield self.store.get_expired_url_cache(now)

Expand Down Expand Up @@ -426,8 +431,7 @@ def _expire_url_cache_data(self):

yield self.store.delete_url_cache_media(removed_media)

if removed_media:
logger.info("Deleted %d media from url cache", len(removed_media))
logger.info("Deleted %d media from url cache", len(removed_media))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have we removed the conditional? We added it so that we wouldn't spam the logs with lots of pointless log lines.

Copy link
Member Author

@richvdh richvdh Nov 21, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it makes a nice closure to

logger.info("Running url preview cache expiry")
.

I don't think that two log lines every ten seconds constitutes a significant amount of spam. It's useful to know it's running; if nothing else it highlights the fact that it was running four times every ten seconds (leading to #2701).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

People specifically added a PR as it was annoying, fwiw. Once every ten seconds isn't too bad by itself, but we have quite a few background tasks that should probably be logging to the same extent.

Personally, I'd probably put the first log line after we ask the DB for expired URLs and then log both if any expired media ids were returned or we're at debug logging.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

People specifically added a PR as it was annoying, fwiw.

wat? people added a PR?

we have quite a few background tasks that should probably be logging to the same extent.

Probably. I'm not going to fix them in this PR though.

Personally, I'd probably put the first log line after we ask the DB for expired URLs and then log both if any expired media ids were returned or we're at debug logging.

I'm really unhappy about background tasks that run silently. Not least because it leads to the sort of fuck-up we had here where it's running much more often than we think it is and it's only good luck which stops the processes racing against each other and doing real damage.



def decode_and_calc_og(body, media_uri, request_encoding=None):
Expand Down
12 changes: 11 additions & 1 deletion synapse/storage/background_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def __init__(self, db_conn, hs):
self._background_update_performance = {}
self._background_update_queue = []
self._background_update_handlers = {}
self._all_done = False

@defer.inlineCallbacks
def start_doing_background_updates(self):
Expand All @@ -106,8 +107,17 @@ def start_doing_background_updates(self):
"No more background updates to do."
" Unscheduling background update task."
)
self._all_done = True
defer.returnValue(None)

def has_completed_background_updates(self):
"""Check if all the background updates have completed

Returns:
bool: True if all background updates have completed
"""
return self._all_done

@defer.inlineCallbacks
def do_next_background_update(self, desired_duration_ms):
"""Does some amount of work on the next queued background update
Expand Down Expand Up @@ -269,7 +279,7 @@ def create_index_sqlite(conn):
# Sqlite doesn't support concurrent creation of indexes.
#
# We don't use partial indices on SQLite as it wasn't introduced
# until 3.8, and wheezy has 3.7
# until 3.8, and wheezy and CentOS 7 have 3.7
#
# We assume that sqlite doesn't give us invalid indices; however
# we may still end up with the index existing but the
Expand Down
16 changes: 13 additions & 3 deletions synapse/storage/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,23 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from synapse.storage.background_updates import BackgroundUpdateStore

from ._base import SQLBaseStore


class MediaRepositoryStore(SQLBaseStore):
class MediaRepositoryStore(BackgroundUpdateStore):
"""Persistence for attachments and avatars"""

def __init__(self, db_conn, hs):
super(MediaRepositoryStore, self).__init__(db_conn, hs)

self.register_background_index_update(
update_name='local_media_repository_url_idx',
index_name='local_media_repository_url_idx',
table='local_media_repository',
columns=['created_ts'],
where_clause='url_cache IS NOT NULL',
)

def get_default_thumbnails(self, top_level_type, sub_type):
return []

Expand Down
5 changes: 4 additions & 1 deletion synapse/storage/schema/delta/44/expire_url_cache.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
* limitations under the License.
*/

CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL;
-- this didn't work on SQLite 3.7 (because of lack of partial indexes), so was
-- removed and replaced with 46/local_media_repository_url_idx.sql.
--
-- CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL;

-- we need to change `expires` to `expires_ts` so that we can index on it. SQLite doesn't support
-- indices on expressions until 3.9.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/* Copyright 2017 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- register a background update which will recreate the
-- local_media_repository_url_idx index.
--
-- We do this as a bg update not because it is a particularly onerous
-- operation, but because we'd like it to be a partial index if possible, and
-- the background_index_update code will understand whether we are on
-- postgres or sqlite and behave accordingly.
INSERT INTO background_updates (update_name, progress_json) VALUES
('local_media_repository_url_idx', '{}');