Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #307 from matrix-org/erikj/search
Browse files Browse the repository at this point in the history
Add basic search API
  • Loading branch information
erikjohnston committed Oct 19, 2015
2 parents ae3082d + 46d3934 commit e0bf025
Show file tree
Hide file tree
Showing 10 changed files with 365 additions and 2 deletions.
2 changes: 2 additions & 0 deletions synapse/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from .auth import AuthHandler
from .identity import IdentityHandler
from .receipts import ReceiptsHandler
from .search import SearchHandler


class Handlers(object):
Expand Down Expand Up @@ -68,3 +69,4 @@ def __init__(self, hs):
self.sync_handler = SyncHandler(hs)
self.auth_handler = AuthHandler(hs)
self.identity_handler = IdentityHandler(hs)
self.search_handler = SearchHandler(hs)
93 changes: 93 additions & 0 deletions synapse/handlers/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from twisted.internet import defer

from ._base import BaseHandler

from synapse.api.constants import Membership
from synapse.api.errors import SynapseError
from synapse.events.utils import serialize_event

import logging


logger = logging.getLogger(__name__)


class SearchHandler(BaseHandler):

def __init__(self, hs):
super(SearchHandler, self).__init__(hs)

@defer.inlineCallbacks
def search(self, user, content):
"""Performs a full text search for a user.
Args:
user (UserID)
content (dict): Search parameters
Returns:
dict to be returned to the client with results of search
"""

try:
search_term = content["search_categories"]["room_events"]["search_term"]
keys = content["search_categories"]["room_events"].get("keys", [
"content.body", "content.name", "content.topic",
])
except KeyError:
raise SynapseError(400, "Invalid search query")

# TODO: Search through left rooms too
rooms = yield self.store.get_rooms_for_user_where_membership_is(
user.to_string(),
membership_list=[Membership.JOIN],
# membership_list=[Membership.JOIN, Membership.LEAVE, Membership.Ban],
)
room_ids = set(r.room_id for r in rooms)

# TODO: Apply room filter to rooms list

rank_map, event_map = yield self.store.search_msgs(room_ids, search_term, keys)

allowed_events = yield self._filter_events_for_client(
user.to_string(), event_map.values()
)

# TODO: Filter allowed_events
# TODO: Add a limit

time_now = self.clock.time_msec()

results = {
e.event_id: {
"rank": rank_map[e.event_id],
"result": serialize_event(e, time_now)
}
for e in allowed_events
}

logger.info("Found %d results", len(results))

defer.returnValue({
"search_categories": {
"room_events": {
"results": results,
"count": len(results)
}
}
})
17 changes: 17 additions & 0 deletions synapse/rest/client/v1/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,22 @@ def on_PUT(self, request, room_id, user_id):
defer.returnValue((200, {}))


class SearchRestServlet(ClientV1RestServlet):
PATTERN = client_path_pattern(
"/search$"
)

@defer.inlineCallbacks
def on_POST(self, request):
auth_user, _ = yield self.auth.get_user_by_req(request)

content = _parse_json(request)

results = yield self.handlers.search_handler.search(auth_user, content)

defer.returnValue((200, results))


def _parse_json(request):
try:
content = json.loads(request.content.read())
Expand Down Expand Up @@ -611,3 +627,4 @@ def register_servlets(hs, http_server):
RoomInitialSyncRestServlet(hs).register(http_server)
RoomRedactEventRestServlet(hs).register(http_server)
RoomTypingRestServlet(hs).register(http_server)
SearchRestServlet(hs).register(http_server)
2 changes: 2 additions & 0 deletions synapse/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .end_to_end_keys import EndToEndKeyStore

from .receipts import ReceiptsStore
from .search import SearchStore


import logging
Expand Down Expand Up @@ -69,6 +70,7 @@ class DataStore(RoomMemberStore, RoomStore,
EventsStore,
ReceiptsStore,
EndToEndKeyStore,
SearchStore,
):

def __init__(self, hs):
Expand Down
2 changes: 1 addition & 1 deletion synapse/storage/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ def _simple_select_one_onecol(self, table, keyvalues, retcol,
allow_none=False,
desc="_simple_select_one_onecol"):
"""Executes a SELECT query on the named table, which is expected to
return a single row, returning a single column from it."
return a single row, returning a single column from it.
Args:
table : string giving the table name
Expand Down
2 changes: 2 additions & 0 deletions synapse/storage/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,8 @@ def _persist_events_txn(self, txn, events_and_contexts, backfilled,
self._store_room_name_txn(txn, event)
elif event.type == EventTypes.Topic:
self._store_room_topic_txn(txn, event)
elif event.type == EventTypes.Message:
self._store_room_message_txn(txn, event)
elif event.type == EventTypes.Redaction:
self._store_redaction(txn, event)

Expand Down
32 changes: 32 additions & 0 deletions synapse/storage/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from ._base import SQLBaseStore
from synapse.util.caches.descriptors import cachedInlineCallbacks
from .engines import PostgresEngine, Sqlite3Engine

import collections
import logging
Expand Down Expand Up @@ -175,6 +176,10 @@ def _store_room_topic_txn(self, txn, event):
},
)

self._store_event_search_txn(
txn, event, "content.topic", event.content["topic"]
)

def _store_room_name_txn(self, txn, event):
if hasattr(event, "content") and "name" in event.content:
self._simple_insert_txn(
Expand All @@ -187,6 +192,33 @@ def _store_room_name_txn(self, txn, event):
}
)

self._store_event_search_txn(
txn, event, "content.name", event.content["name"]
)

def _store_room_message_txn(self, txn, event):
if hasattr(event, "content") and "body" in event.content:
self._store_event_search_txn(
txn, event, "content.body", event.content["body"]
)

def _store_event_search_txn(self, txn, event, key, value):
if isinstance(self.database_engine, PostgresEngine):
sql = (
"INSERT INTO event_search (event_id, room_id, key, vector)"
" VALUES (?,?,?,to_tsvector('english', ?))"
)
elif isinstance(self.database_engine, Sqlite3Engine):
sql = (
"INSERT INTO event_search (event_id, room_id, key, value)"
" VALUES (?,?,?,?)"
)
else:
# This should be unreachable.
raise Exception("Unrecognized database engine")

txn.execute(sql, (event.event_id, event.room_id, key, value,))

@cachedInlineCallbacks()
def get_room_name_and_aliases(self, room_id):
def f(txn):
Expand Down
123 changes: 123 additions & 0 deletions synapse/storage/schema/delta/24/fts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright 2015 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from synapse.storage.prepare_database import get_statements
from synapse.storage.engines import PostgresEngine, Sqlite3Engine

import ujson

logger = logging.getLogger(__name__)


POSTGRES_SQL = """
CREATE TABLE event_search (
event_id TEXT,
room_id TEXT,
key TEXT,
vector tsvector
);
INSERT INTO event_search SELECT
event_id, room_id, 'content.body',
to_tsvector('english', json::json->'content'->>'body')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.message';
INSERT INTO event_search SELECT
event_id, room_id, 'content.name',
to_tsvector('english', json::json->'content'->>'name')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.name';
INSERT INTO event_search SELECT
event_id, room_id, 'content.topic',
to_tsvector('english', json::json->'content'->>'topic')
FROM events NATURAL JOIN event_json WHERE type = 'm.room.topic';
CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
CREATE INDEX event_search_ev_idx ON event_search(event_id);
CREATE INDEX event_search_ev_ridx ON event_search(room_id);
"""


SQLITE_TABLE = (
"CREATE VIRTUAL TABLE event_search USING fts3 ( event_id, room_id, key, value)"
)


def run_upgrade(cur, database_engine, *args, **kwargs):
if isinstance(database_engine, PostgresEngine):
run_postgres_upgrade(cur)
return

if isinstance(database_engine, Sqlite3Engine):
run_sqlite_upgrade(cur)
return


def run_postgres_upgrade(cur):
for statement in get_statements(POSTGRES_SQL.splitlines()):
cur.execute(statement)


def run_sqlite_upgrade(cur):
cur.execute(SQLITE_TABLE)

rowid = -1
while True:
cur.execute(
"SELECT rowid, json FROM event_json"
" WHERE rowid > ?"
" ORDER BY rowid ASC LIMIT 100",
(rowid,)
)

res = cur.fetchall()

if not res:
break

events = [
ujson.loads(js)
for _, js in res
]

rowid = max(rid for rid, _ in res)

rows = []
for ev in events:
if ev["type"] == "m.room.message":
rows.append((
ev["event_id"], ev["room_id"], "content.body",
ev["content"]["body"]
))
if ev["type"] == "m.room.name":
rows.append((
ev["event_id"], ev["room_id"], "content.name",
ev["content"]["name"]
))
if ev["type"] == "m.room.topic":
rows.append((
ev["event_id"], ev["room_id"], "content.topic",
ev["content"]["topic"]
))

if rows:
logger.info(rows)
cur.executemany(
"INSERT INTO event_search (event_id, room_id, key, value)"
" VALUES (?,?,?,?)",
rows
)
Loading

0 comments on commit e0bf025

Please sign in to comment.