Skip to content

Commit

Permalink
Port over Mailpile date search logic, improve search term magic
Browse files Browse the repository at this point in the history
  • Loading branch information
BjarniRunar committed Oct 11, 2021
1 parent 5bcbff7 commit 8072fa6
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 66 deletions.
100 changes: 80 additions & 20 deletions moggie/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,23 @@
from ..storage.records import RecordFile, RecordStore


def explain_ops(ops):
if isinstance(ops, str):
return ops
if ops == IntSet.All:
return 'ALL'

if ops[0] == IntSet.Or:
op = ' OR '
elif ops[0] == IntSet.And:
op = ' AND '
elif ops[0] == IntSet.Sub:
op = ' NOT '
else:
raise ValueError('What op is %s' % ops[0])
return '('+ op.join([explain_ops(term) for term in ops[1:]]) +')'


class PostingListBucket:
"""
A PostingListBucket is an unsorted sequence of binary packed
Expand Down Expand Up @@ -123,9 +140,19 @@ def __init__(self, workdir,
self.maxint = maxint
self.deleted = IntSet()

# Someday, this might be configurable?
# Someday, these might be configurable/pluggable?

from .parse_greedy import greedy_parse_terms
self.parse_terms = greedy_parse_terms
self.magic_map = [
('@', self.magic_emails),
(':', self.magic_terms),
('*', self.magic_candidates)]

from .dates import date_term_magic
self.magic_term_map = {
'date': date_term_magic,
'dates': date_term_magic}

def delete_everything(self, *args):
self.records.delete_everything(*args)
Expand Down Expand Up @@ -195,17 +222,12 @@ def add_results(self, results):
self.records[idx] = plb.blob

def __getitem__(self, keyword):
if '*' in keyword:
matches = self.config.get('partial_matches', 10)
return IntSet.Or(*[
self[kw] for kw in self.candidates(keyword, matches)])
idx = self.keyword_index(keyword)
if idx < self.l2_begin:
raise KeyError('FIXME: Unimplemented')
else:
idx = self.keyword_index(keyword)
if idx < self.l2_begin:
raise KeyError('FIXME: Unimplemented')
else:
plb = PostingListBucket(self.records.get(idx) or b'')
return plb.get(keyword) or IntSet()
plb = PostingListBucket(self.records.get(idx) or b'')
return plb.get(keyword) or IntSet()

def _search(self, term):
if isinstance(term, tuple):
Expand All @@ -223,7 +245,10 @@ def _search(self, term):

raise ValueError('Unknown supported search type: %s' % type(term))

def search(self, terms, mask_deleted=True):
def explain(self, terms):
return explain_ops(self.parse_terms(terms, self.magic_map))

def search(self, terms, mask_deleted=True, cache=False, explain=False):
"""
Search for terms in the index, returning an IntSet.
Expand All @@ -236,13 +261,40 @@ def search(self, terms, mask_deleted=True):
tuples, allowing arbitrarily complex trees of AND/OR/SUB searches.
"""
if isinstance(terms, str):
ops = self.parse_terms(terms)
ops = self.parse_terms(terms, self.magic_map)
else:
ops = terms
if mask_deleted:
return IntSet.Sub(self._search(ops), self.deleted)
rv = IntSet.Sub(self._search(ops), self.deleted)
else:
rv = self._search(ops)
if explain or cache:
rv = (ops, rv)
if cache:
cache_id = self._cache_result(rv)
return (cache_id, rv)
else:
return self._search(ops)
return rv

def magic_terms(self, term):
what = term.split(':')[0].lower()
magic = self.magic_term_map.get(what)
if magic is not None:
return magic(term)

# FIXME: Convert to:me, from:me into e-mail searches

return term

def magic_emails(self, term):
return term # FIXME: A no-op

def magic_candidates(self, term):
matches = self.candidates(term, self.config.get('partial_matches', 10))
if len(matches) > 1:
return tuple([IntSet.Or] + matches)
else:
return matches[0]


if __name__ == '__main__':
Expand Down Expand Up @@ -272,8 +324,8 @@ def search(self, terms, mask_deleted=True):
assert(list(se.search(IntSet.All)) == [1, 2])

# Basic search correctnesss
assert(1 in se.search(['hello', 'world']))
assert(2 not in se.search(['hello', 'world']))
assert(1 in se.search('hello world'))
assert(2 not in se.search('hello world'))
assert([] == list(se.search('notfound')))

# Enable and test partial word searches
Expand All @@ -287,12 +339,20 @@ def search(self, terms, mask_deleted=True):
assert(len(se.candidates('*ell', 10)) == 2) # ell, hell
assert(len(se.candidates('*ell*', 10)) == 4) # ell, hell, hello, hellyeah
assert(len(se.candidates('he*ah', 10)) == 2) # hepe, hellyeah
assert(1 in se.search(['hell*', 'w*ld']))
assert(1 in se.search('hell* w*ld'))

# Test our and/or functionality
assert(list(se.search('hello')) == list(se.search((IntSet.Or, 'world', 'iceland'))))

# Test the explainer and parse_terms with candidate magic
assert(explain_ops(se.parse_terms('* - is:deleted he*o WORLD +Iceland', se.magic_map))
== '(((ALL NOT is:deleted) AND (heo OR hello) AND world) OR iceland)')

# Test the explainer and parse_terms with date range magic
assert(se.explain('dates:2012..2013 OR date:2015')
== '((year:2012 OR year:2013) OR year:2015)')

print('Tests pass OK')
import time
time.sleep(10)
#import time
#time.sleep(10)
se.delete_everything(True, False, True)
72 changes: 34 additions & 38 deletions Attic/mailpile/plugins/dates.py → moggie/search/dates.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,9 @@
import time
import datetime

from mailpile.plugins import PluginManager
from mailpile.i18n import gettext as _
from mailpile.i18n import ngettext as _n
from ..util.intset import IntSet


_plugins = PluginManager(builtin=__name__)


##[ Keywords ]################################################################

def meta_kw_extractor(index, msg_mid, msg, msg_size, msg_ts, **kwargs):
mdate = datetime.date.fromtimestamp(msg_ts)
keywords = [
'%s:year' % mdate.year,
'%s:month' % mdate.month,
'%s:day' % mdate.day,
'%s-%s:yearmonth' % (mdate.year, mdate.month),
'%s-%s-%s:date' % (mdate.year, mdate.month, mdate.day)
]
return keywords

_plugins.register_meta_kw_extractor('dates', meta_kw_extractor)


##[ Search terms ]############################################################

def _adjust(d):
if d[2] > 31:
d[1] += 1
Expand All @@ -47,11 +24,21 @@ def _mk_date(ts):
'd': 1,
'w': 7,
'm': 31,
'q': 91
}
'q': 91}


def search(config, idx, term, hits):

def ts_to_keywords(msg_ts):
mdate = datetime.date.fromtimestamp(msg_ts)
return [
'year:%s' % mdate.year,
'month:%s' % mdate.month,
'day:%s' % mdate.day,
'yearmonth:%s-%s' % (mdate.year, mdate.month),
'date:%s-%s-%s' % (mdate.year, mdate.month, mdate.day)]


def date_term_magic(term):
try:
word = term.split(':', 1)[1].lower()
if '..' in term:
Expand Down Expand Up @@ -92,31 +79,40 @@ def search(config, idx, term, hits):
if start[1:] == [1, 1]:
ny = [start[0], 12, 31]
if ny <= end:
terms.append('%d:year' % start[0])
terms.append('year:%d' % start[0])
start[0] += 1
continue

# Move forward one month?
if start[2] == 1:
nm = [start[0], start[1], 31]
if nm <= end:
terms.append('%d-%d:yearmonth' % (start[0], start[1]))
terms.append('yearmonth:%d-%d' % (start[0], start[1]))
start[1] += 1
_adjust(start)
continue

# Move forward one day...
terms.append('%d-%d-%d:date' % tuple(start))
terms.append('date:%d-%d-%d' % tuple(start))
start[2] += 1
_adjust(start)

rt = []
for t in terms:
rt.extend(hits(t))
return rt
except:
raise ValueError('Invalid date range: %s' % term)
return tuple([IntSet.Or] + terms)
except (ValueError, KeyError, IndexError, TypeError, NameError):
return term


if __name__ == '__main__':
from . import explain_ops

assert(explain_ops(date_term_magic('dates:2012'))
== '(year:2012)')

assert(explain_ops(date_term_magic('dates:2012..2014'))
== '(year:2012 OR year:2013 OR year:2014)')

assert(explain_ops(date_term_magic('dates:2021-10-30..2021-12'))
== ('(date:2021-10-30 OR date:2021-10-31 OR '
'yearmonth:2021-11 OR yearmonth:2021-12)'))

_plugins.register_search_term('dates', search)
_plugins.register_search_term('date', search)
print('Tests pass OK')
23 changes: 15 additions & 8 deletions moggie/search/parse_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,16 @@ def _flat(search):
# operator last time, but the current isn't AND: fix it.
search_stack[-1] = [IntSet.And, _flat(search_stack[-1])]

magic = None
for char in magic_map:
for char, magic in magic_map:
if char in term:
magic = magic_map[char]
if magic is None:
term = magic(term)
if not isinstance(term, str):
break

if isinstance(term, str):
search_stack[-1].append(term.lower())
else:
search_stack[-1].append(_flat(magic(term)))
search_stack[-1].append(_flat(term))
changed = False

# Close all dangling parens
Expand Down Expand Up @@ -124,11 +126,16 @@ def _flat(search):
assert(greedy_parse_terms('ALL - iceland')
== (IntSet.Sub, IntSet.All, 'iceland'))

def swapper(kw):
def swapper_one(kw):
return ':'.join(reversed(kw.split(':')))

def swapper_many(kw):
return (IntSet.Or, kw, ':'.join(reversed(kw.split(':'))))

assert(greedy_parse_terms('yes hel:lo world', {':': swapper})
== (IntSet.And, 'yes', (IntSet.Or, 'hel:lo', 'lo:hel'), 'world'))
assert(greedy_parse_terms('yes hel:lo world', [
(':', swapper_one), # Maps to lo:hel
(':', swapper_many)]) # ORs with hel:lo
== (IntSet.And, 'yes', (IntSet.Or, 'lo:hel', 'hel:lo'), 'world'))

print('Tests passed OK')
import sys
Expand Down

0 comments on commit 8072fa6

Please sign in to comment.