Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add basic search API #307

Merged
merged 32 commits into from
Oct 19, 2015
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c85c912
Add basic full text search impl.
erikjohnston Oct 9, 2015
61561b9
Keep FTS indexes up to date. Only search through rooms currently joined
erikjohnston Oct 12, 2015
ae72e24
PEP8
erikjohnston Oct 12, 2015
927004e
Remove unused room_id parameter
erikjohnston Oct 12, 2015
f6fde34
Merge remote-tracking branch 'origin/develop' into erikj/search
erikjohnston Oct 12, 2015
ca53ad7
Filter events to only thsoe that the user is allowed to see
erikjohnston Oct 12, 2015
1a40afa
Add sqlite schema
erikjohnston Oct 13, 2015
30c2783
Search left rooms too
erikjohnston Oct 13, 2015
f9340ea
Merge branch 'erikj/store_engine' into erikj/search
erikjohnston Oct 13, 2015
cfd39d6
Add SQLite support
erikjohnston Oct 13, 2015
88971fd
Merge branch 'erikj/store_engine' into erikj/search
erikjohnston Oct 13, 2015
323d3e5
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/se…
erikjohnston Oct 13, 2015
3e2a129
Remove constraints in preperation of using filters
erikjohnston Oct 13, 2015
7ecd11a
Add paranoia limit
erikjohnston Oct 13, 2015
d25b0f6
Add TODO markers
erikjohnston Oct 14, 2015
1d9e109
More TODO markers
erikjohnston Oct 14, 2015
99c7fbf
Fix to work with SQLite
erikjohnston Oct 14, 2015
8c9df87
Make 'keys' optional
erikjohnston Oct 14, 2015
f45aaf0
Remove unused constatns
erikjohnston Oct 14, 2015
bcfb653
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/se…
erikjohnston Oct 15, 2015
22a8c91
Split up run_upgrade
erikjohnston Oct 16, 2015
73260ad
Comment on the LIMIT 500
erikjohnston Oct 16, 2015
3cf9948
Add docstring
erikjohnston Oct 16, 2015
b62da46
docstring
erikjohnston Oct 16, 2015
edb998b
Explicitly check for Sqlite3Engine
erikjohnston Oct 16, 2015
12122bf
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/se…
erikjohnston Oct 16, 2015
23ed7dc
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/se…
erikjohnston Oct 16, 2015
d4b5621
Remove duplicate _filter_events_for_client
erikjohnston Oct 16, 2015
380f148
Remove unused import
erikjohnston Oct 16, 2015
33646eb
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/se…
erikjohnston Oct 16, 2015
f2d698c
Typing
erikjohnston Oct 16, 2015
46d3934
Explicitly check for Sqlite3Engine
erikjohnston Oct 16, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions synapse/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from .auth import AuthHandler
from .identity import IdentityHandler
from .receipts import ReceiptsHandler
from .search import SearchHandler


class Handlers(object):
Expand Down Expand Up @@ -68,3 +69,4 @@ def __init__(self, hs):
self.sync_handler = SyncHandler(hs)
self.auth_handler = AuthHandler(hs)
self.identity_handler = IdentityHandler(hs)
self.search_handler = SearchHandler(hs)
131 changes: 131 additions & 0 deletions synapse/handlers/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# -*- coding: utf-8 -*-
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from twisted.internet import defer

from ._base import BaseHandler

from synapse.api.constants import (
EventTypes, Membership,
)
from synapse.api.errors import SynapseError
from synapse.events.utils import serialize_event

import logging


logger = logging.getLogger(__name__)


class SearchHandler(BaseHandler):

def __init__(self, hs):
super(SearchHandler, self).__init__(hs)

@defer.inlineCallbacks
def _filter_events_for_client(self, user_id, events):
event_id_to_state = yield self.store.get_state_for_events(
frozenset(e.event_id for e in events),
types=(
(EventTypes.RoomHistoryVisibility, ""),
(EventTypes.Member, user_id),
)
)

def allowed(event, state):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely the time has come to factor this out into a separate function. I think this the forth copy of this function now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:(

if event.type == EventTypes.RoomHistoryVisibility:
return True

membership_ev = state.get((EventTypes.Member, user_id), None)
if membership_ev:
membership = membership_ev.membership
else:
membership = Membership.LEAVE

if membership == Membership.JOIN:
return True

history = state.get((EventTypes.RoomHistoryVisibility, ''), None)
if history:
visibility = history.content.get("history_visibility", "shared")
else:
visibility = "shared"

if visibility == "public":
return True
elif visibility == "shared":
return True
elif visibility == "joined":
return membership == Membership.JOIN
elif visibility == "invited":
return membership == Membership.INVITE

return True

defer.returnValue([
event
for event in events
if allowed(event, event_id_to_state[event.event_id])
])

@defer.inlineCallbacks
def search(self, user, content):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have some doc-string please?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

try:
search_term = content["search_categories"]["room_events"]["search_term"]
keys = content["search_categories"]["room_events"].get("keys", [
"content.body", "content.name", "content.topic",
])
except KeyError:
raise SynapseError(400, "Invalid search query")

# TODO: Search through left rooms too
rooms = yield self.store.get_rooms_for_user_where_membership_is(
user.to_string(),
membership_list=[Membership.JOIN],
# membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban],
)
room_ids = set(r.room_id for r in rooms)

# TODO: Apply room filter to rooms list

rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys)

allowed_events = yield self._filter_events_for_client(
user.to_string(), event_map.values()
)

# TODO: Filter allowed_events
# TODO: Add a limit

time_now = self.clock.time_msec()

results = {
e.event_id: {
"rank": rank_map[e.event_id],
"result": serialize_event(e, time_now)
}
for e in allowed_events
}

logger.info("Found %d results", len(results))

defer.returnValue({
"search_categories": {
"room_events": {
"results": results,
"count": len(results)
}
}
})
17 changes: 17 additions & 0 deletions synapse/rest/client/v1/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,22 @@ def on_PUT(self, request, room_id, user_id):
defer.returnValue((200, {}))


class SearchRestServlet(ClientV1RestServlet):
PATTERN = client_path_pattern(
"/search$"
)

@defer.inlineCallbacks
def on_POST(self, request):
auth_user, _ = yield self.auth.get_user_by_req(request)

content = _parse_json(request)

results = yield self.handlers.search_handler.search(auth_user, content)

defer.returnValue((200, results))


def _parse_json(request):
try:
content = json.loads(request.content.read())
Expand Down Expand Up @@ -585,3 +601,4 @@ def register_servlets(hs, http_server):
RoomInitialSyncRestServlet(hs).register(http_server)
RoomRedactEventRestServlet(hs).register(http_server)
RoomTypingRestServlet(hs).register(http_server)
SearchRestServlet(hs).register(http_server)
2 changes: 2 additions & 0 deletions synapse/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .end_to_end_keys import EndToEndKeyStore

from .receipts import ReceiptsStore
from .search import SearchStore


import logging
Expand Down Expand Up @@ -69,6 +70,7 @@ class DataStore(RoomMemberStore, RoomStore,
EventsStore,
ReceiptsStore,
EndToEndKeyStore,
SearchStore,
):

def __init__(self, hs):
Expand Down
2 changes: 1 addition & 1 deletion synapse/storage/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ def _simple_select_one_onecol(self, table, keyvalues, retcol,
allow_none=False,
desc="_simple_select_one_onecol"):
"""Executes a SELECT query on the named table, which is expected to
return a single row, returning a single column from it."
return a single row, returning a single column from it.

Args:
table : string giving the table name
Expand Down
2 changes: 2 additions & 0 deletions synapse/storage/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,8 @@ def _persist_events_txn(self, txn, events_and_contexts, backfilled,
self._store_room_name_txn(txn, event)
elif event.type == EventTypes.Topic:
self._store_room_topic_txn(txn, event)
elif event.type == EventTypes.Message:
self._store_room_message_txn(txn, event)
elif event.type == EventTypes.Redaction:
self._store_redaction(txn, event)

Expand Down
29 changes: 29 additions & 0 deletions synapse/storage/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from ._base import SQLBaseStore
from synapse.util.caches.descriptors import cachedInlineCallbacks
from .engines import PostgresEngine

import collections
import logging
Expand Down Expand Up @@ -175,6 +176,10 @@ def _store_room_topic_txn(self, txn, event):
},
)

self._store_event_search_txn(
txn, event, "content.topic", event.content["topic"]
)

def _store_room_name_txn(self, txn, event):
if hasattr(event, "content") and "name" in event.content:
self._simple_insert_txn(
Expand All @@ -187,6 +192,30 @@ def _store_room_name_txn(self, txn, event):
}
)

self._store_event_search_txn(
txn, event, "content.name", event.content["name"]
)

def _store_room_message_txn(self, txn, event):
if hasattr(event, "content") and "body" in event.content:
self._store_event_search_txn(
txn, event, "content.body", event.content["body"]
)

def _store_event_search_txn(self, txn, event, key, value):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to move this into a method on the db engine rather than doing "isinstance" checks?

if isinstance(self.database_engine, PostgresEngine):
sql = (
"INSERT INTO event_search (event_id, room_id, key, vector)"
" VALUES (?,?,?,to_tsvector('english', ?))"
)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be an idea to do "elif isinstance(self.database_engine, Sqlite3Engine):" explicitly here

sql = (
"INSERT INTO event_search (event_id, room_id, key, value)"
" VALUES (?,?,?,?)"
)

txn.execute(sql, (event.event_id, event.room_id, key, value,))

@cachedInlineCallbacks()
def get_room_name_and_aliases(self, room_id):
def f(txn):
Expand Down
117 changes: 117 additions & 0 deletions synapse/storage/schema/delta/24/fts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from synapse.storage.prepare_database import get_statements
from synapse.storage.engines import PostgresEngine, Sqlite3Engine

import ujson

logger = logging.getLogger(__name__)


POSTGRES_SQL = """
CREATE TABLE event_search (
event_id TEXT,
room_id TEXT,
key TEXT,
vector tsvector
);

INSERT INTO event_search SELECT
event_id, room_id, 'content.body',
to_tsvector('english', json::json->'content'->>'body')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.message';

INSERT INTO event_search SELECT
event_id, room_id, 'content.name',
to_tsvector('english', json::json->'content'->>'name')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.name';

INSERT INTO event_search SELECT
event_id, room_id, 'content.topic',
to_tsvector('english', json::json->'content'->>'topic')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';


CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
CREATE INDEX event_search_ev_idx ON event_search(event_id);
CREATE INDEX event_search_ev_ridx ON event_search(room_id);
"""


SQLITE_TABLE = (
"CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)"
)
SQLITE_INDEX = "CREATE INDEX event_search_ev_idx ON event_search(event_id)"


def run_upgrade(cur, database_engine, *args, **kwargs):
if isinstance(database_engine, PostgresEngine):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might nicer to split this into a run_upgrade_postgres and a run_upgrade_sqlite3 so that it's easier see which logic is being applied to which database type.

for statement in get_statements(POSTGRES_SQL.splitlines()):
cur.execute(statement)
return

if isinstance(database_engine, Sqlite3Engine):
cur.execute(SQLITE_TABLE)

rowid = -1
while True:
cur.execute(
"SELECT rowid, json FROM event_json"
" WHERE rowid > ?"
" ORDER BY rowid ASC LIMIT 100",
(rowid,)
)

res = cur.fetchall()

if not res:
break

events = [
ujson.loads(js)
for _, js in res
]

rowid = max(rid for rid, _ in res)

rows = []
for ev in events:
if ev["type"] == "m.room.message":
rows.append((
ev["event_id"], ev["room_id"], "content.body",
ev["content"]["body"]
))
if ev["type"] == "m.room.name":
rows.append((
ev["event_id"], ev["room_id"], "content.name",
ev["content"]["name"]
))
if ev["type"] == "m.room.topic":
rows.append((
ev["event_id"], ev["room_id"], "content.topic",
ev["content"]["topic"]
))

if rows:
logger.info(rows)
cur.executemany(
"INSERT INTO event_search (event_id, room_id, key, value)"
" VALUES (?,?,?,?)",
rows
)

# cur.execute(SQLITE_INDEX)
Loading