-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Add basic search API #307
Add basic search API #307
Changes from 21 commits
c85c912
61561b9
ae72e24
927004e
f6fde34
ca53ad7
1a40afa
30c2783
f9340ea
cfd39d6
88971fd
323d3e5
3e2a129
7ecd11a
d25b0f6
1d9e109
99c7fbf
8c9df87
f45aaf0
bcfb653
22a8c91
73260ad
3cf9948
b62da46
edb998b
12122bf
23ed7dc
d4b5621
380f148
33646eb
f2d698c
46d3934
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
# -*- coding: utf-8 -*- | ||
# Copyright 2015 OpenMarket Ltd | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from twisted.internet import defer | ||
|
||
from ._base import BaseHandler | ||
|
||
from synapse.api.constants import ( | ||
EventTypes, Membership, | ||
) | ||
from synapse.api.errors import SynapseError | ||
from synapse.events.utils import serialize_event | ||
|
||
import logging | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class SearchHandler(BaseHandler): | ||
|
||
def __init__(self, hs): | ||
super(SearchHandler, self).__init__(hs) | ||
|
||
@defer.inlineCallbacks | ||
def _filter_events_for_client(self, user_id, events): | ||
event_id_to_state = yield self.store.get_state_for_events( | ||
frozenset(e.event_id for e in events), | ||
types=( | ||
(EventTypes.RoomHistoryVisibility, ""), | ||
(EventTypes.Member, user_id), | ||
) | ||
) | ||
|
||
def allowed(event, state): | ||
if event.type == EventTypes.RoomHistoryVisibility: | ||
return True | ||
|
||
membership_ev = state.get((EventTypes.Member, user_id), None) | ||
if membership_ev: | ||
membership = membership_ev.membership | ||
else: | ||
membership = Membership.LEAVE | ||
|
||
if membership == Membership.JOIN: | ||
return True | ||
|
||
history = state.get((EventTypes.RoomHistoryVisibility, ''), None) | ||
if history: | ||
visibility = history.content.get("history_visibility", "shared") | ||
else: | ||
visibility = "shared" | ||
|
||
if visibility == "public": | ||
return True | ||
elif visibility == "shared": | ||
return True | ||
elif visibility == "joined": | ||
return membership == Membership.JOIN | ||
elif visibility == "invited": | ||
return membership == Membership.INVITE | ||
|
||
return True | ||
|
||
defer.returnValue([ | ||
event | ||
for event in events | ||
if allowed(event, event_id_to_state[event.event_id]) | ||
]) | ||
|
||
@defer.inlineCallbacks | ||
def search(self, user, content): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we have some doc-string please? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
try: | ||
search_term = content["search_categories"]["room_events"]["search_term"] | ||
keys = content["search_categories"]["room_events"].get("keys", [ | ||
"content.body", "content.name", "content.topic", | ||
]) | ||
except KeyError: | ||
raise SynapseError(400, "Invalid search query") | ||
|
||
# TODO: Search through left rooms too | ||
rooms = yield self.store.get_rooms_for_user_where_membership_is( | ||
user.to_string(), | ||
membership_list=[Membership.JOIN], | ||
# membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban], | ||
) | ||
room_ids = set(r.room_id for r in rooms) | ||
|
||
# TODO: Apply room filter to rooms list | ||
|
||
rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys) | ||
|
||
allowed_events = yield self._filter_events_for_client( | ||
user.to_string(), event_map.values() | ||
) | ||
|
||
# TODO: Filter allowed_events | ||
# TODO: Add a limit | ||
|
||
time_now = self.clock.time_msec() | ||
|
||
results = { | ||
e.event_id: { | ||
"rank": rank_map[e.event_id], | ||
"result": serialize_event(e, time_now) | ||
} | ||
for e in allowed_events | ||
} | ||
|
||
logger.info("Found %d results", len(results)) | ||
|
||
defer.returnValue({ | ||
"search_categories": { | ||
"room_events": { | ||
"results": results, | ||
"count": len(results) | ||
} | ||
} | ||
}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
from ._base import SQLBaseStore | ||
from synapse.util.caches.descriptors import cachedInlineCallbacks | ||
from .engines import PostgresEngine | ||
|
||
import collections | ||
import logging | ||
|
@@ -175,6 +176,10 @@ def _store_room_topic_txn(self, txn, event): | |
}, | ||
) | ||
|
||
self._store_event_search_txn( | ||
txn, event, "content.topic", event.content["topic"] | ||
) | ||
|
||
def _store_room_name_txn(self, txn, event): | ||
if hasattr(event, "content") and "name" in event.content: | ||
self._simple_insert_txn( | ||
|
@@ -187,6 +192,30 @@ def _store_room_name_txn(self, txn, event): | |
} | ||
) | ||
|
||
self._store_event_search_txn( | ||
txn, event, "content.name", event.content["name"] | ||
) | ||
|
||
def _store_room_message_txn(self, txn, event): | ||
if hasattr(event, "content") and "body" in event.content: | ||
self._store_event_search_txn( | ||
txn, event, "content.body", event.content["body"] | ||
) | ||
|
||
def _store_event_search_txn(self, txn, event, key, value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to move this into a method on the db engine rather than doing "isinstance" checks? |
||
if isinstance(self.database_engine, PostgresEngine): | ||
sql = ( | ||
"INSERT INTO event_search (event_id, room_id, key, vector)" | ||
" VALUES (?,?,?,to_tsvector('english', ?))" | ||
) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be an idea to do "elif isinstance(self.database_engine, Sqlite3Engine):" explicitly here |
||
sql = ( | ||
"INSERT INTO event_search (event_id, room_id, key, value)" | ||
" VALUES (?,?,?,?)" | ||
) | ||
|
||
txn.execute(sql, (event.event_id, event.room_id, key, value,)) | ||
|
||
@cachedInlineCallbacks() | ||
def get_room_name_and_aliases(self, room_id): | ||
def f(txn): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# Copyright 2015 OpenMarket Ltd | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import logging | ||
|
||
from synapse.storage.prepare_database import get_statements | ||
from synapse.storage.engines import PostgresEngine, Sqlite3Engine | ||
|
||
import ujson | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
POSTGRES_SQL = """ | ||
CREATE TABLE event_search ( | ||
event_id TEXT, | ||
room_id TEXT, | ||
key TEXT, | ||
vector tsvector | ||
); | ||
|
||
INSERT INTO event_search SELECT | ||
event_id, room_id, 'content.body', | ||
to_tsvector('english', json::json->'content'->>'body') | ||
FROM events NATURAL JOIN event_json WHERE type = 'm.room.message'; | ||
|
||
INSERT INTO event_search SELECT | ||
event_id, room_id, 'content.name', | ||
to_tsvector('english', json::json->'content'->>'name') | ||
FROM events NATURAL JOIN event_json WHERE type = 'm.room.name'; | ||
|
||
INSERT INTO event_search SELECT | ||
event_id, room_id, 'content.topic', | ||
to_tsvector('english', json::json->'content'->>'topic') | ||
FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic'; | ||
|
||
|
||
CREATE INDEX event_search_fts_idx ON event_search USING gin(vector); | ||
CREATE INDEX event_search_ev_idx ON event_search(event_id); | ||
CREATE INDEX event_search_ev_ridx ON event_search(room_id); | ||
""" | ||
|
||
|
||
SQLITE_TABLE = ( | ||
"CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)" | ||
) | ||
|
||
|
||
def run_upgrade(cur, database_engine, *args, **kwargs): | ||
if isinstance(database_engine, PostgresEngine): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might nicer to split this into a run_upgrade_postgres and a run_upgrade_sqlite3 so that it's easier see which logic is being applied to which database type. |
||
run_postgres_upgrade(cur) | ||
return | ||
|
||
if isinstance(database_engine, Sqlite3Engine): | ||
run_sqlite_upgrade(cur) | ||
return | ||
|
||
|
||
def run_postgres_upgrade(cur): | ||
for statement in get_statements(POSTGRES_SQL.splitlines()): | ||
cur.execute(statement) | ||
|
||
|
||
def run_sqlite_upgrade(cur): | ||
cur.execute(SQLITE_TABLE) | ||
|
||
rowid = -1 | ||
while True: | ||
cur.execute( | ||
"SELECT rowid, json FROM event_json" | ||
" WHERE rowid > ?" | ||
" ORDER BY rowid ASC LIMIT 100", | ||
(rowid,) | ||
) | ||
|
||
res = cur.fetchall() | ||
|
||
if not res: | ||
break | ||
|
||
events = [ | ||
ujson.loads(js) | ||
for _, js in res | ||
] | ||
|
||
rowid = max(rid for rid, _ in res) | ||
|
||
rows = [] | ||
for ev in events: | ||
if ev["type"] == "m.room.message": | ||
rows.append(( | ||
ev["event_id"], ev["room_id"], "content.body", | ||
ev["content"]["body"] | ||
)) | ||
if ev["type"] == "m.room.name": | ||
rows.append(( | ||
ev["event_id"], ev["room_id"], "content.name", | ||
ev["content"]["name"] | ||
)) | ||
if ev["type"] == "m.room.topic": | ||
rows.append(( | ||
ev["event_id"], ev["room_id"], "content.topic", | ||
ev["content"]["topic"] | ||
)) | ||
|
||
if rows: | ||
logger.info(rows) | ||
cur.executemany( | ||
"INSERT INTO event_search (event_id, room_id, key, value)" | ||
" VALUES (?,?,?,?)", | ||
rows | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Surely the time has come to factor this out into a separate function. I think this the forth copy of this function now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:(