From c85c9125627a62c73711786723be12be30d7a81e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Oct 2015 15:48:31 +0100 Subject: [PATCH 01/24] Add basic full text search impl. --- synapse/api/constants.py | 19 ++++++ synapse/handlers/__init__.py | 2 + synapse/handlers/search.py | 95 ++++++++++++++++++++++++++ synapse/rest/client/v1/room.py | 17 +++++ synapse/storage/__init__.py | 2 + synapse/storage/_base.py | 2 +- synapse/storage/schema/delta/24/fts.py | 57 ++++++++++++++++ synapse/storage/search.py | 75 ++++++++++++++++++++ 8 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 synapse/handlers/search.py create mode 100644 synapse/storage/schema/delta/24/fts.py create mode 100644 synapse/storage/search.py diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 008ee647276f..7c7f9ff957a4 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -84,3 +84,22 @@ class RoomCreationPreset(object): PRIVATE_CHAT = "private_chat" PUBLIC_CHAT = "public_chat" TRUSTED_PRIVATE_CHAT = "trusted_private_chat" + + +class SearchConstraintTypes(object): + FTS = "fts" + EXACT = "exact" + PREFIX = "prefix" + SUBSTRING = "substring" + RANGE = "range" + + +class KnownRoomEventKeys(object): + CONTENT_BODY = "content.body" + CONTENT_MSGTYPE = "content.msgtype" + CONTENT_NAME = "content.name" + CONTENT_TOPIC = "content.topic" + + SENDER = "sender" + ORIGIN_SERVER_TS = "origin_server_ts" + ROOM_ID = "room_id" diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 8725c3c420ad..87b4d381c7da 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -32,6 +32,7 @@ from .auth import AuthHandler from .identity import IdentityHandler from .receipts import ReceiptsHandler +from .search import SearchHandler class Handlers(object): @@ -68,3 +69,4 @@ def __init__(self, hs): self.sync_handler = SyncHandler(hs) self.auth_handler = AuthHandler(hs) self.identity_handler = IdentityHandler(hs) + self.search_handler = SearchHandler(hs) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py new file mode 100644 index 000000000000..8b997fc39471 --- /dev/null +++ b/synapse/handlers/search.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from ._base import BaseHandler + +from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.api.errors import SynapseError +from synapse.events.utils import serialize_event + +import logging + + +logger = logging.getLogger(__name__) + + +KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { + KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], + KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], + KnownRoomEventKeys.CONTENT_NAME: [SearchConstraintTypes.FTS, SearchConstraintTypes.EXACT, SearchConstraintTypes.SUBSTRING], + KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], + KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], + KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], + KnownRoomEventKeys.ROOM_ID: [SearchConstraintTypes.EXACT], +} + + +class RoomConstraint(object): + def __init__(self, search_type, keys, value): + self.search_type = search_type + self.keys = keys + self.value = value + + @classmethod + def from_dict(cls, d): + search_type = d["type"] + keys = d["keys"] + + for key in keys: + if key not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES: + raise SynapseError(400, "Unrecognized key %r", key) + + if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: + raise SynapseError(400, "Disallowed constraint type %r for key %r", search_type, key) + + return cls(search_type, keys, d["value"]) + + +class SearchHandler(BaseHandler): + + def __init__(self, hs): + super(SearchHandler, self).__init__(hs) + + @defer.inlineCallbacks + def search(self, content): + constraint_dicts = content["search_categories"]["room_events"]["constraints"] + constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] + + fts = False + for c in constraints: + if c.search_type == SearchConstraintTypes.FTS: + if fts: + raise SynapseError(400, "Only one constraint can be FTS") + fts = True + + res = yield self.hs.get_datastore().search_msgs(constraints) + + time_now = self.hs.get_clock().time_msec() + + results = [ + { + "rank": r["rank"], + "result": serialize_event(r["result"], time_now) + } + for r in res + ] + + logger.info("returning: %r", results) + + results.sort(key=lambda r: -r["rank"]) + + defer.returnValue(results) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 23871f161ed9..35bd702a43a9 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -529,6 +529,22 @@ def on_PUT(self, request, room_id, user_id): defer.returnValue((200, {})) +class SearchRestServlet(ClientV1RestServlet): + PATTERN = client_path_pattern( + "/search$" + ) + + @defer.inlineCallbacks + def on_POST(self, request): + auth_user, _ = yield self.auth.get_user_by_req(request) + + content = _parse_json(request) + + results = yield self.handlers.search_handler.search(content) + + defer.returnValue((200, results)) + + def _parse_json(request): try: content = json.loads(request.content.read()) @@ -585,3 +601,4 @@ def register_servlets(hs, http_server): RoomInitialSyncRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) RoomTypingRestServlet(hs).register(http_server) + SearchRestServlet(hs).register(http_server) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 340e59afcb68..5f91ef77c047 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -40,6 +40,7 @@ from .end_to_end_keys import EndToEndKeyStore from .receipts import ReceiptsStore +from .search import SearchStore import fnmatch @@ -79,6 +80,7 @@ class DataStore(RoomMemberStore, RoomStore, EventsStore, ReceiptsStore, EndToEndKeyStore, + SearchStore, ): def __init__(self, hs): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 693784ad388e..218e7080546e 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -519,7 +519,7 @@ def _simple_select_one_onecol(self, table, keyvalues, retcol, allow_none=False, desc="_simple_select_one_onecol"): """Executes a SELECT query on the named table, which is expected to - return a single row, returning a single column from it." + return a single row, returning a single column from it. Args: table : string giving the table name diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py new file mode 100644 index 000000000000..568033275877 --- /dev/null +++ b/synapse/storage/schema/delta/24/fts.py @@ -0,0 +1,57 @@ +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from synapse.storage import get_statements +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +POSTGRES_SQL = """ +CREATE TABLE event_search ( + event_id TEXT, + room_id TEXT, + key TEXT, + vector tsvector +); + +INSERT INTO event_search SELECT + event_id, room_id, 'content.body', + to_tsvector('english', json::json->'content'->>'body') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.name', + to_tsvector('english', json::json->'content'->>'name') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; + +INSERT INTO event_search SELECT + event_id, room_id, 'content.topic', + to_tsvector('english', json::json->'content'->>'topic') + FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; + + +CREATE INDEX event_search_idx ON event_search USING gin(vector); +""" + + +def run_upgrade(cur, database_engine, *args, **kwargs): + if not isinstance(database_engine, PostgresEngine): + # We only support FTS for postgres currently. + return + + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) diff --git a/synapse/storage/search.py b/synapse/storage/search.py new file mode 100644 index 000000000000..eea44777650a --- /dev/null +++ b/synapse/storage/search.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from _base import SQLBaseStore +from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes + + +class SearchStore(SQLBaseStore): + @defer.inlineCallbacks + def search_msgs(self, constraints): + clauses = [] + args = [] + fts = None + + for c in constraints: + local_clauses = [] + if c.search_type == SearchConstraintTypes.FTS: + fts = c.value + for key in c.keys: + local_clauses.append("key = ?") + args.append(key) + elif c.search_type == SearchConstraintTypes.EXACT: + for key in c.keys: + if key == KnownRoomEventKeys.ROOM_ID: + for value in c.value: + local_clauses.append("room_id = ?") + args.append(value) + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) + + sql = ( + "SELECT ts_rank_cd(vector, query) AS rank, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " WHERE vector @@ query" + ) + + for clause in clauses: + sql += " AND " + clause + + sql += " ORDER BY rank DESC" + + results = yield self._execute( + "search_msgs", self.cursor_to_dict, sql, *([fts] + args) + ) + + events = yield self._get_events([r["event_id"] for r in results]) + + event_map = { + ev.event_id: ev + for ev in events + } + + defer.returnValue([ + { + "rank": r["rank"], + "result": event_map[r["event_id"]] + } + for r in results + if r["event_id"] in event_map + ]) From 61561b9df791ec90e287e535cc75831c2016bf36 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 10:49:53 +0100 Subject: [PATCH 02/24] Keep FTS indexes up to date. Only search through rooms currently joined --- synapse/handlers/search.py | 31 ++++++++++++++++++-------- synapse/rest/client/v1/room.py | 2 +- synapse/storage/events.py | 2 ++ synapse/storage/room.py | 22 ++++++++++++++++++ synapse/storage/schema/delta/24/fts.py | 3 ++- synapse/storage/search.py | 7 +++++- 6 files changed, 55 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8b997fc39471..b6bdb752e9f4 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -65,7 +65,7 @@ def __init__(self, hs): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def search(self, content): + def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] @@ -76,20 +76,33 @@ def search(self, content): raise SynapseError(400, "Only one constraint can be FTS") fts = True - res = yield self.hs.get_datastore().search_msgs(constraints) + rooms = yield self.store.get_rooms_for_user( + user.to_string(), + ) - time_now = self.hs.get_clock().time_msec() + # For some reason the list of events contains duplicates + # TODO(paul): work out why because I really don't think it should + room_ids = set(r.room_id for r in rooms) - results = [ - { + res = yield self.store.search_msgs(room_ids, constraints) + + time_now = self.clock.time_msec() + + results = { + r["result"].event_id: { "rank": r["rank"], "result": serialize_event(r["result"], time_now) } for r in res - ] + } logger.info("returning: %r", results) - results.sort(key=lambda r: -r["rank"]) - - defer.returnValue(results) + defer.returnValue({ + "search_categories": { + "room_events": { + "results": results, + "count": len(results) + } + } + }) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 35bd702a43a9..94adabca620b 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -540,7 +540,7 @@ def on_POST(self, request): content = _parse_json(request) - results = yield self.handlers.search_handler.search(content) + results = yield self.handlers.search_handler.search(auth_user, content) defer.returnValue((200, results)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 416ef6af938c..e6c1abfc273f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -307,6 +307,8 @@ def _persist_events_txn(self, txn, events_and_contexts, backfilled, self._store_room_name_txn(txn, event) elif event.type == EventTypes.Topic: self._store_room_topic_txn(txn, event) + elif event.type == EventTypes.Message: + self._store_room_message_txn(txn, event) elif event.type == EventTypes.Redaction: self._store_redaction(txn, event) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5e07b7e0e523..e4e830944aa6 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -175,6 +175,10 @@ def _store_room_topic_txn(self, txn, event): }, ) + self._store_event_search_txn( + txn, event, "content.topic", event.content["topic"] + ) + def _store_room_name_txn(self, txn, event): if hasattr(event, "content") and "name" in event.content: self._simple_insert_txn( @@ -187,6 +191,24 @@ def _store_room_name_txn(self, txn, event): } ) + self._store_event_search_txn( + txn, event, "content.name", event.content["name"] + ) + + def _store_room_message_txn(self, txn, event): + if hasattr(event, "content") and "body" in event.content: + self._store_event_search_txn( + txn, event, "content.body", event.content["body"] + ) + + def _store_event_search_txn(self, txn, event, key, value): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + @cachedInlineCallbacks() def get_room_name_and_aliases(self, room_id): def f(txn): diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 568033275877..05f1605fdde6 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -44,7 +44,8 @@ FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; -CREATE INDEX event_search_idx ON event_search USING gin(vector); +CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); +CREATE INDEX event_search_ev_idx ON event_search(event_id); """ diff --git a/synapse/storage/search.py b/synapse/storage/search.py index eea44777650a..e66b5f9edc68 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -21,11 +21,16 @@ class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, constraints): + def search_msgs(self, room_ids, constraints): clauses = [] args = [] fts = None + clauses.append( + "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) + ) + args.extend(room_ids) + for c in constraints: local_clauses = [] if c.search_type == SearchConstraintTypes.FTS: From ae72e247fa478a541c837aaa7663aa3ca01ba840 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 10:50:46 +0100 Subject: [PATCH 03/24] PEP8 --- synapse/handlers/search.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index b6bdb752e9f4..9dc474aa56eb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -30,7 +30,11 @@ KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.CONTENT_NAME: [SearchConstraintTypes.FTS, SearchConstraintTypes.EXACT, SearchConstraintTypes.SUBSTRING], + KnownRoomEventKeys.CONTENT_NAME: [ + SearchConstraintTypes.FTS, + SearchConstraintTypes.EXACT, + SearchConstraintTypes.SUBSTRING, + ], KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], @@ -54,7 +58,10 @@ def from_dict(cls, d): raise SynapseError(400, "Unrecognized key %r", key) if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: - raise SynapseError(400, "Disallowed constraint type %r for key %r", search_type, key) + raise SynapseError( + 400, + "Disallowed constraint type %r for key %r", search_type, key + ) return cls(search_type, keys, d["value"]) From 927004e34905d4ad6a69576ee1799fe8019d8985 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 15:06:14 +0100 Subject: [PATCH 04/24] Remove unused room_id parameter --- synapse/handlers/federation.py | 2 +- synapse/handlers/message.py | 10 +++---- synapse/handlers/search.py | 50 +++++++++++++++++++++++++++++++++- synapse/handlers/sync.py | 2 +- synapse/storage/state.py | 11 ++++---- 5 files changed, 61 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3882ba79edb3..a710bdcfdb07 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -242,7 +242,7 @@ def log_failure(f): @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): event_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, None), diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 30949ff7a673..d2f0892f7a78 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -164,7 +164,7 @@ def get_messages(self, user_id=None, room_id=None, pagin_config=None, @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): event_id_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), @@ -290,7 +290,7 @@ def get_room_data(self, user_id=None, room_id=None, elif member_event.membership == Membership.LEAVE: key = (event_type, state_key) room_state = yield self.store.get_state_for_events( - room_id, [member_event.event_id], [key] + [member_event.event_id], [key] ) data = room_state[member_event.event_id].get(key) @@ -314,7 +314,7 @@ def get_state_events(self, user_id, room_id): room_state = yield self.state_handler.get_current_state(room_id) elif member_event.membership == Membership.LEAVE: room_state = yield self.store.get_state_for_events( - room_id, [member_event.event_id], None + [member_event.event_id], None ) room_state = room_state[member_event.event_id] @@ -403,7 +403,7 @@ def handle_room(event): elif event.membership == Membership.LEAVE: room_end_token = "s%d" % (event.stream_ordering,) deferred_room_state = self.store.get_state_for_events( - event.room_id, [event.event_id], None + [event.event_id], None ) deferred_room_state.addCallback( lambda states: states[event.event_id] @@ -496,7 +496,7 @@ def room_initial_sync(self, user_id, room_id, pagin_config=None): def _room_initial_sync_parted(self, user_id, room_id, pagin_config, member_event): room_state = yield self.store.get_state_for_events( - member_event.room_id, [member_event.event_id], None + [member_event.event_id], None ) room_state = room_state[member_event.event_id] diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 9dc474aa56eb..71182a8fe004 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -17,7 +17,9 @@ from ._base import BaseHandler -from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.api.constants import ( + EventTypes, KnownRoomEventKeys, Membership, SearchConstraintTypes +) from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -71,6 +73,52 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def _filter_events_for_client(self, user_id, room_id, events): + event_id_to_state = yield self.store.get_state_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) + ) + + def allowed(event, state): + if event.type == EventTypes.RoomHistoryVisibility: + return True + + membership_ev = state.get((EventTypes.Member, user_id), None) + if membership_ev: + membership = membership_ev.membership + else: + membership = Membership.LEAVE + + if membership == Membership.JOIN: + return True + + history = state.get((EventTypes.RoomHistoryVisibility, ''), None) + if history: + visibility = history.content.get("history_visibility", "shared") + else: + visibility = "shared" + + if visibility == "public": + return True + elif visibility == "shared": + return True + elif visibility == "joined": + return membership == Membership.JOIN + elif visibility == "invited": + return membership == Membership.INVITE + + return True + + defer.returnValue([ + event + for event in events + if allowed(event, event_id_to_state[event.event_id]) + ]) + @defer.inlineCallbacks def search(self, user, content): constraint_dicts = content["search_categories"]["room_events"]["constraints"] diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9914ff6f9c01..a8940de16653 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -312,7 +312,7 @@ def incremental_sync_with_gap(self, sync_config, since_token): @defer.inlineCallbacks def _filter_events_for_client(self, user_id, room_id, events): event_id_to_state = yield self.store.get_state_for_events( - room_id, frozenset(e.event_id for e in events), + frozenset(e.event_id for e in events), types=( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e935b9443b40..acfb322a539f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -54,7 +54,7 @@ def get_state_groups(self, room_id, event_ids): defer.returnValue({}) event_to_groups = yield self._get_state_group_for_events( - room_id, event_ids, + event_ids, ) groups = set(event_to_groups.values()) @@ -208,13 +208,12 @@ def f(txn): ) @defer.inlineCallbacks - def get_state_for_events(self, room_id, event_ids, types): + def get_state_for_events(self, event_ids, types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: - room_id (str) event_ids (list) types (list): List of (type, state_key) tuples which are used to filter the state fetched. `state_key` may be None, which matches @@ -225,7 +224,7 @@ def get_state_for_events(self, room_id, event_ids, types): The dicts are mappings from (type, state_key) -> state_events """ event_to_groups = yield self._get_state_group_for_events( - room_id, event_ids, + event_ids, ) groups = set(event_to_groups.values()) @@ -251,8 +250,8 @@ def _get_state_group_for_event(self, room_id, event_id): ) @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", - num_args=2) - def _get_state_group_for_events(self, room_id, event_ids): + num_args=1) + def _get_state_group_for_events(self, event_ids): """Returns mapping event_id -> state_group """ def f(txn): From ca53ad74250d94b8c9b6581e6cedef0a29520fc2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Oct 2015 15:52:55 +0100 Subject: [PATCH 05/24] Filter events to only thsoe that the user is allowed to see --- synapse/handlers/search.py | 16 ++++++++++------ synapse/storage/search.py | 14 +++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 71182a8fe004..49b786dadbcb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -74,7 +74,7 @@ def __init__(self, hs): super(SearchHandler, self).__init__(hs) @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, room_id, events): + def _filter_events_for_client(self, user_id, events): event_id_to_state = yield self.store.get_state_for_events( frozenset(e.event_id for e in events), types=( @@ -139,16 +139,20 @@ def search(self, user, content): # TODO(paul): work out why because I really don't think it should room_ids = set(r.room_id for r in rooms) - res = yield self.store.search_msgs(room_ids, constraints) + rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) + + allowed_events = yield self._filter_events_for_client( + user.to_string(), event_map.values() + ) time_now = self.clock.time_msec() results = { - r["result"].event_id: { - "rank": r["rank"], - "result": serialize_event(r["result"], time_now) + e.event_id: { + "rank": rank_map[e.event_id], + "result": serialize_event(e, time_now) } - for r in res + for e in allowed_events } logger.info("returning: %r", results) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index e66b5f9edc68..238df3844054 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -70,11 +70,11 @@ def search_msgs(self, room_ids, constraints): for ev in events } - defer.returnValue([ + defer.returnValue(( { - "rank": r["rank"], - "result": event_map[r["event_id"]] - } - for r in results - if r["event_id"] in event_map - ]) + r["event_id"]: r["rank"] + for r in results + if r["event_id"] in event_map + }, + event_map + )) From 1a40afa75693f0c2ae3b2eaac62ff9ca6bb02488 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 10:36:25 +0100 Subject: [PATCH 06/24] Add sqlite schema --- synapse/storage/schema/delta/24/fts.py | 69 ++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index 05f1605fdde6..a806f4b8d3f6 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -15,7 +15,9 @@ import logging from synapse.storage import get_statements -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine + +import ujson logger = logging.getLogger(__name__) @@ -46,13 +48,70 @@ CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); CREATE INDEX event_search_ev_idx ON event_search(event_id); +CREATE INDEX event_search_ev_ridx ON event_search(room_id); """ +SQLITE_TABLE = ( + "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" +) +SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)" + + def run_upgrade(cur, database_engine, *args, **kwargs): - if not isinstance(database_engine, PostgresEngine): - # We only support FTS for postgres currently. + if isinstance(database_engine, PostgresEngine): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) return - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + if isinstance(database_engine, Sqlite3Engine): + cur.execute(SQLITE_TABLE) + + rowid = -1 + while True: + cur.execute( + "SELECT rowid, json FROM event_json" + " WHERE rowid > ?" + " ORDER BY rowid ASC LIMIT 100", + (rowid,) + ) + + res = cur.fetchall() + + if not res: + break + + events = [ + ujson.loads(js) + for _, js in res + ] + + rowid = max(rid for rid, _ in res) + + rows = [] + for ev in events: + if ev["type"] == "m.room.message": + rows.append(( + ev["event_id"], ev["room_id"], "content.body", + ev["content"]["body"] + )) + if ev["type"] == "m.room.name": + rows.append(( + ev["event_id"], ev["room_id"], "content.name", + ev["content"]["name"] + )) + if ev["type"] == "m.room.topic": + rows.append(( + ev["event_id"], ev["room_id"], "content.topic", + ev["content"]["topic"] + )) + + if rows: + logger.info(rows) + cur.executemany( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)", + rows + ) + + # cur.execute(SQLITE_INDEX) From 30c2783d2f2983764738383d73c378ec5dc61279 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 10:36:36 +0100 Subject: [PATCH 07/24] Search left rooms too --- synapse/handlers/search.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 49b786dadbcb..d5c395061c54 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -131,12 +131,9 @@ def search(self, user, content): raise SynapseError(400, "Only one constraint can be FTS") fts = True - rooms = yield self.store.get_rooms_for_user( - user.to_string(), + rooms = yield self.store.get_rooms_for_user_where_membership_is( + user.to_string(), membership_list=[Membership.JOIN, Membership.LEAVE], ) - - # For some reason the list of events contains duplicates - # TODO(paul): work out why because I really don't think it should room_ids = set(r.room_id for r in rooms) rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) From cfd39d6b55fad5b176f1883e1bc87ed8e14acf42 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 13:47:50 +0100 Subject: [PATCH 08/24] Add SQLite support --- synapse/storage/search.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 238df3844054..5843f8087661 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -17,6 +17,7 @@ from _base import SQLBaseStore from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes +from synapse.storage.engines import PostgresEngine class SearchStore(SQLBaseStore): @@ -48,11 +49,17 @@ def search_msgs(self, room_ids, constraints): "(%s)" % (" OR ".join(local_clauses),) ) - sql = ( - "SELECT ts_rank_cd(vector, query) AS rank, event_id" - " FROM plainto_tsquery('english', ?) as query, event_search" - " WHERE vector @@ query" - ) + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "SELECT ts_rank_cd(vector, query) AS rank, event_id" + " FROM plainto_tsquery('english', ?) as query, event_search" + " WHERE vector @@ query" + ) + else: + sql = ( + "SELECT 0 as rank, event_id FROM event_search" + " WHERE value MATCH ?" + ) for clause in clauses: sql += " AND " + clause From 3e2a1297b513dc1fadb288c74684f6651a88016d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 15:22:14 +0100 Subject: [PATCH 09/24] Remove constraints in preperation of using filters --- synapse/handlers/search.py | 61 +++++++------------------------------- synapse/storage/search.py | 30 +++++++------------ 2 files changed, 20 insertions(+), 71 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index d5c395061c54..8864a921fc39 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -18,7 +18,7 @@ from ._base import BaseHandler from synapse.api.constants import ( - EventTypes, KnownRoomEventKeys, Membership, SearchConstraintTypes + EventTypes, Membership, ) from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event @@ -29,45 +29,6 @@ logger = logging.getLogger(__name__) -KEYS_TO_ALLOWED_CONSTRAINT_TYPES = { - KnownRoomEventKeys.CONTENT_BODY: [SearchConstraintTypes.FTS], - KnownRoomEventKeys.CONTENT_MSGTYPE: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.CONTENT_NAME: [ - SearchConstraintTypes.FTS, - SearchConstraintTypes.EXACT, - SearchConstraintTypes.SUBSTRING, - ], - KnownRoomEventKeys.CONTENT_TOPIC: [SearchConstraintTypes.FTS], - KnownRoomEventKeys.SENDER: [SearchConstraintTypes.EXACT], - KnownRoomEventKeys.ORIGIN_SERVER_TS: [SearchConstraintTypes.RANGE], - KnownRoomEventKeys.ROOM_ID: [SearchConstraintTypes.EXACT], -} - - -class RoomConstraint(object): - def __init__(self, search_type, keys, value): - self.search_type = search_type - self.keys = keys - self.value = value - - @classmethod - def from_dict(cls, d): - search_type = d["type"] - keys = d["keys"] - - for key in keys: - if key not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES: - raise SynapseError(400, "Unrecognized key %r", key) - - if search_type not in KEYS_TO_ALLOWED_CONSTRAINT_TYPES[key]: - raise SynapseError( - 400, - "Disallowed constraint type %r for key %r", search_type, key - ) - - return cls(search_type, keys, d["value"]) - - class SearchHandler(BaseHandler): def __init__(self, hs): @@ -121,22 +82,20 @@ def allowed(event, state): @defer.inlineCallbacks def search(self, user, content): - constraint_dicts = content["search_categories"]["room_events"]["constraints"] - constraints = [RoomConstraint.from_dict(c)for c in constraint_dicts] - - fts = False - for c in constraints: - if c.search_type == SearchConstraintTypes.FTS: - if fts: - raise SynapseError(400, "Only one constraint can be FTS") - fts = True + try: + search_term = content["search_categories"]["room_events"]["search_term"] + keys = content["search_categories"]["room_events"]["keys"] + except KeyError: + raise SynapseError(400, "Invalid search query") rooms = yield self.store.get_rooms_for_user_where_membership_is( - user.to_string(), membership_list=[Membership.JOIN, Membership.LEAVE], + user.to_string(), + membership_list=[Membership.JOIN], + # membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban], ) room_ids = set(r.room_id for r in rooms) - rank_map, event_map = yield self.store.search_msgs(room_ids, constraints) + rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) allowed_events = yield self._filter_events_for_client( user.to_string(), event_map.values() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 5843f8087661..7a30ce25eb2c 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,38 +16,28 @@ from twisted.internet import defer from _base import SQLBaseStore -from synapse.api.constants import KnownRoomEventKeys, SearchConstraintTypes from synapse.storage.engines import PostgresEngine class SearchStore(SQLBaseStore): @defer.inlineCallbacks - def search_msgs(self, room_ids, constraints): + def search_msgs(self, room_ids, search_term, keys): clauses = [] args = [] - fts = None clauses.append( "room_id IN (%s)" % (",".join(["?"] * len(room_ids)),) ) args.extend(room_ids) - for c in constraints: - local_clauses = [] - if c.search_type == SearchConstraintTypes.FTS: - fts = c.value - for key in c.keys: - local_clauses.append("key = ?") - args.append(key) - elif c.search_type == SearchConstraintTypes.EXACT: - for key in c.keys: - if key == KnownRoomEventKeys.ROOM_ID: - for value in c.value: - local_clauses.append("room_id = ?") - args.append(value) - clauses.append( - "(%s)" % (" OR ".join(local_clauses),) - ) + local_clauses = [] + for key in keys: + local_clauses.append("key = ?") + args.append(key) + + clauses.append( + "(%s)" % (" OR ".join(local_clauses),) + ) if isinstance(self.database_engine, PostgresEngine): sql = ( @@ -67,7 +57,7 @@ def search_msgs(self, room_ids, constraints): sql += " ORDER BY rank DESC" results = yield self._execute( - "search_msgs", self.cursor_to_dict, sql, *([fts] + args) + "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) ) events = yield self._get_events([r["event_id"] for r in results]) From 7ecd11accb68cc0f20e7ab84673df38413ba7cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Oct 2015 15:50:56 +0100 Subject: [PATCH 10/24] Add paranoia limit --- synapse/storage/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7a30ce25eb2c..1b987161e2e7 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -54,7 +54,7 @@ def search_msgs(self, room_ids, search_term, keys): for clause in clauses: sql += " AND " + clause - sql += " ORDER BY rank DESC" + sql += " ORDER BY rank DESC LIMIT 500" results = yield self._execute( "search_msgs", self.cursor_to_dict, sql, *([search_term] + args) From d25b0f65ea9ab36dbf4285d86a1ca3e357f6ad1c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 09:46:31 +0100 Subject: [PATCH 11/24] Add TODO markers --- synapse/handlers/search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8864a921fc39..79c1569868c1 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -88,6 +88,7 @@ def search(self, user, content): except KeyError: raise SynapseError(400, "Invalid search query") + # TODO: Search through left rooms too rooms = yield self.store.get_rooms_for_user_where_membership_is( user.to_string(), membership_list=[Membership.JOIN], @@ -95,6 +96,8 @@ def search(self, user, content): ) room_ids = set(r.room_id for r in rooms) + # TODO: Apply room filter to rooms list + rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) allowed_events = yield self._filter_events_for_client( @@ -111,7 +114,7 @@ def search(self, user, content): for e in allowed_events } - logger.info("returning: %r", results) + logger.info("Found %d results", len(results)) defer.returnValue({ "search_categories": { From 1d9e109820c1aec7193278b2b26042259329c144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 09:49:00 +0100 Subject: [PATCH 12/24] More TODO markers --- synapse/handlers/search.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 79c1569868c1..8140c0b9d42c 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -104,6 +104,9 @@ def search(self, user, content): user.to_string(), event_map.values() ) + # TODO: Filter allowed_events + # TODO: Add a limit + time_now = self.clock.time_msec() results = { From 99c7fbfef7729e6f3cceb9cea64f21d5a2c5b41f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 09:52:40 +0100 Subject: [PATCH 13/24] Fix to work with SQLite --- synapse/storage/room.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index e4e830944aa6..0527cee05d5f 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -19,6 +19,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks +from .engines import PostgresEngine import collections import logging @@ -202,10 +203,16 @@ def _store_room_message_txn(self, txn, event): ) def _store_event_search_txn(self, txn, event, key, value): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, vector)" + " VALUES (?,?,?,to_tsvector('english', ?))" + ) + else: + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) txn.execute(sql, (event.event_id, event.room_id, key, value,)) From 8c9df8774e781da838efc18953785cfa1a2af0a7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 10:35:50 +0100 Subject: [PATCH 14/24] Make 'keys' optional --- synapse/handlers/search.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 8140c0b9d42c..7f1efe2b4617 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -84,7 +84,9 @@ def allowed(event, state): def search(self, user, content): try: search_term = content["search_categories"]["room_events"]["search_term"] - keys = content["search_categories"]["room_events"]["keys"] + keys = content["search_categories"]["room_events"].get("keys", [ + "content.body", "content.name", "content.topic", + ]) except KeyError: raise SynapseError(400, "Invalid search query") From f45aaf0e35b447c15aace330d2daaa0005ad8461 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Oct 2015 10:36:55 +0100 Subject: [PATCH 15/24] Remove unused constatns --- synapse/api/constants.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 7c7f9ff957a4..008ee647276f 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -84,22 +84,3 @@ class RoomCreationPreset(object): PRIVATE_CHAT = "private_chat" PUBLIC_CHAT = "public_chat" TRUSTED_PRIVATE_CHAT = "trusted_private_chat" - - -class SearchConstraintTypes(object): - FTS = "fts" - EXACT = "exact" - PREFIX = "prefix" - SUBSTRING = "substring" - RANGE = "range" - - -class KnownRoomEventKeys(object): - CONTENT_BODY = "content.body" - CONTENT_MSGTYPE = "content.msgtype" - CONTENT_NAME = "content.name" - CONTENT_TOPIC = "content.topic" - - SENDER = "sender" - ORIGIN_SERVER_TS = "origin_server_ts" - ROOM_ID = "room_id" From 22a8c91448f710c20a6aee66ec2a452528f1d637 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:19:44 +0100 Subject: [PATCH 16/24] Split up run_upgrade --- synapse/storage/schema/delta/24/fts.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/synapse/storage/schema/delta/24/fts.py b/synapse/storage/schema/delta/24/fts.py index b45a5fd8201e..0c752d84268c 100644 --- a/synapse/storage/schema/delta/24/fts.py +++ b/synapse/storage/schema/delta/24/fts.py @@ -55,16 +55,24 @@ SQLITE_TABLE = ( "CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" ) -SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)" def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): - for statement in get_statements(POSTGRES_SQL.splitlines()): - cur.execute(statement) + run_postgres_upgrade(cur) return if isinstance(database_engine, Sqlite3Engine): + run_sqlite_upgrade(cur) + return + + +def run_postgres_upgrade(cur): + for statement in get_statements(POSTGRES_SQL.splitlines()): + cur.execute(statement) + + +def run_sqlite_upgrade(cur): cur.execute(SQLITE_TABLE) rowid = -1 @@ -113,5 +121,3 @@ def run_upgrade(cur, database_engine, *args, **kwargs): " VALUES (?,?,?,?)", rows ) - - # cur.execute(SQLITE_INDEX) From 73260ad01f067495e541a936eef4a14ba2fea5ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:24:02 +0100 Subject: [PATCH 17/24] Comment on the LIMIT 500 --- synapse/storage/search.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 1b987161e2e7..7d642e18ff47 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -54,6 +54,8 @@ def search_msgs(self, room_ids, search_term, keys): for clause in clauses: sql += " AND " + clause + # We add an arbitrary limit here to ensure we don't try to pull the + # entire table from the database. sql += " ORDER BY rank DESC LIMIT 500" results = yield self._execute( From 3cf9948b8d5956c05026ee734ccf65d203eb6d6b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:28:12 +0100 Subject: [PATCH 18/24] Add docstring --- synapse/storage/search.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7d642e18ff47..6c10f9631e43 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -22,6 +22,17 @@ class SearchStore(SQLBaseStore): @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): + """Performs a full text search over events with give keys. + + Args: + room_ids (list): List of room ids to search in + search_term (str): Search term to search for + keys (list): List of keys to search in, currently supports + "content.body", "content.name", "content.body" + + Returns: + 2-tuple of (dict event_id -> rank, dict event_id -> event) + """ clauses = [] args = [] From b62da463e18a05205725f75508d5053232f1a158 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 11:52:16 +0100 Subject: [PATCH 19/24] docstring --- synapse/handlers/search.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 7f1efe2b4617..c01c12f8c16a 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -82,6 +82,16 @@ def allowed(event, state): @defer.inlineCallbacks def search(self, user, content): + """Performs a full text search for a user. + + Args: + user (UserID) + content (dict): Search parameters + + Returns: + dict to be returned to the client with results of search + """ + try: search_term = content["search_categories"]["room_events"]["search_term"] keys = content["search_categories"]["room_events"].get("keys", [ From edb998ba23cf74de624963f61ca9c897260a3e7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 14:37:14 +0100 Subject: [PATCH 20/24] Explicitly check for Sqlite3Engine --- synapse/storage/search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 6c10f9631e43..dd012fa56546 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,7 +16,7 @@ from twisted.internet import defer from _base import SQLBaseStore -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine class SearchStore(SQLBaseStore): @@ -56,11 +56,14 @@ def search_msgs(self, room_ids, search_term, keys): " FROM plainto_tsquery('english', ?) as query, event_search" " WHERE vector @@ query" ) - else: + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "SELECT 0 as rank, event_id FROM event_search" " WHERE value MATCH ?" ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") for clause in clauses: sql += " AND " + clause From d4b5621e0a5edeb66a80d8dd88055a0129def2a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 15:19:52 +0100 Subject: [PATCH 21/24] Remove duplicate _filter_events_for_client --- synapse/handlers/search.py | 46 -------------------------------------- 1 file changed, 46 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index c01c12f8c16a..1a5d7381db8f 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -34,52 +34,6 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) - @defer.inlineCallbacks - def _filter_events_for_client(self, user_id, events): - event_id_to_state = yield self.store.get_state_for_events( - frozenset(e.event_id for e in events), - types=( - (EventTypes.RoomHistoryVisibility, ""), - (EventTypes.Member, user_id), - ) - ) - - def allowed(event, state): - if event.type == EventTypes.RoomHistoryVisibility: - return True - - membership_ev = state.get((EventTypes.Member, user_id), None) - if membership_ev: - membership = membership_ev.membership - else: - membership = Membership.LEAVE - - if membership == Membership.JOIN: - return True - - history = state.get((EventTypes.RoomHistoryVisibility, ''), None) - if history: - visibility = history.content.get("history_visibility", "shared") - else: - visibility = "shared" - - if visibility == "public": - return True - elif visibility == "shared": - return True - elif visibility == "joined": - return membership == Membership.JOIN - elif visibility == "invited": - return membership == Membership.INVITE - - return True - - defer.returnValue([ - event - for event in events - if allowed(event, event_id_to_state[event.event_id]) - ]) - @defer.inlineCallbacks def search(self, user, content): """Performs a full text search for a user. From 380f148db7d710ece7679e207334483bda407aa5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 15:32:51 +0100 Subject: [PATCH 22/24] Remove unused import --- synapse/handlers/search.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 1a5d7381db8f..22808b9c07d1 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -17,9 +17,7 @@ from ._base import BaseHandler -from synapse.api.constants import ( - EventTypes, Membership, -) +from synapse.api.constants import Membership from synapse.api.errors import SynapseError from synapse.events.utils import serialize_event From f2d698cb52883d8d43faabefdc70e2ade9ebb8b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 16:46:48 +0100 Subject: [PATCH 23/24] Typing --- synapse/storage/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index dd012fa56546..a3c69c5ab375 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -22,13 +22,13 @@ class SearchStore(SQLBaseStore): @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): - """Performs a full text search over events with give keys. + """Performs a full text search over events with given keys. Args: room_ids (list): List of room ids to search in search_term (str): Search term to search for keys (list): List of keys to search in, currently supports - "content.body", "content.name", "content.body" + "content.body", "content.name", "content.topic" Returns: 2-tuple of (dict event_id -> rank, dict event_id -> event) From 46d39343d976a933c3f2dfd19e5e552c01c93bf4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Oct 2015 16:58:00 +0100 Subject: [PATCH 24/24] Explicitly check for Sqlite3Engine --- synapse/storage/room.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 0527cee05d5f..13441fcdceb9 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -19,7 +19,7 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks -from .engines import PostgresEngine +from .engines import PostgresEngine, Sqlite3Engine import collections import logging @@ -208,11 +208,14 @@ def _store_event_search_txn(self, txn, event, key, value): "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" ) - else: + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") txn.execute(sql, (event.event_id, event.room_id, key, value,))