diff --git a/moggie/search/__init__.py b/moggie/search/__init__.py index 1a38babb2..37192445f 100644 --- a/moggie/search/__init__.py +++ b/moggie/search/__init__.py @@ -8,6 +8,23 @@ from ..storage.records import RecordFile, RecordStore +def explain_ops(ops): + if isinstance(ops, str): + return ops + if ops == IntSet.All: + return 'ALL' + + if ops[0] == IntSet.Or: + op = ' OR ' + elif ops[0] == IntSet.And: + op = ' AND ' + elif ops[0] == IntSet.Sub: + op = ' NOT ' + else: + raise ValueError('What op is %s' % ops[0]) + return '('+ op.join([explain_ops(term) for term in ops[1:]]) +')' + + class PostingListBucket: """ A PostingListBucket is an unsorted sequence of binary packed @@ -123,9 +140,19 @@ def __init__(self, workdir, self.maxint = maxint self.deleted = IntSet() - # Someday, this might be configurable? + # Someday, these might be configurable/pluggable? + from .parse_greedy import greedy_parse_terms self.parse_terms = greedy_parse_terms + self.magic_map = [ + ('@', self.magic_emails), + (':', self.magic_terms), + ('*', self.magic_candidates)] + + from .dates import date_term_magic + self.magic_term_map = { + 'date': date_term_magic, + 'dates': date_term_magic} def delete_everything(self, *args): self.records.delete_everything(*args) @@ -195,17 +222,12 @@ def add_results(self, results): self.records[idx] = plb.blob def __getitem__(self, keyword): - if '*' in keyword: - matches = self.config.get('partial_matches', 10) - return IntSet.Or(*[ - self[kw] for kw in self.candidates(keyword, matches)]) + idx = self.keyword_index(keyword) + if idx < self.l2_begin: + raise KeyError('FIXME: Unimplemented') else: - idx = self.keyword_index(keyword) - if idx < self.l2_begin: - raise KeyError('FIXME: Unimplemented') - else: - plb = PostingListBucket(self.records.get(idx) or b'') - return plb.get(keyword) or IntSet() + plb = PostingListBucket(self.records.get(idx) or b'') + return plb.get(keyword) or IntSet() def _search(self, term): if isinstance(term, tuple): @@ -223,7 +245,10 @@ def _search(self, term): raise ValueError('Unknown supported search type: %s' % type(term)) - def search(self, terms, mask_deleted=True): + def explain(self, terms): + return explain_ops(self.parse_terms(terms, self.magic_map)) + + def search(self, terms, mask_deleted=True, cache=False, explain=False): """ Search for terms in the index, returning an IntSet. @@ -236,13 +261,40 @@ def search(self, terms, mask_deleted=True): tuples, allowing arbitrarily complex trees of AND/OR/SUB searches. """ if isinstance(terms, str): - ops = self.parse_terms(terms) + ops = self.parse_terms(terms, self.magic_map) else: ops = terms if mask_deleted: - return IntSet.Sub(self._search(ops), self.deleted) + rv = IntSet.Sub(self._search(ops), self.deleted) + else: + rv = self._search(ops) + if explain or cache: + rv = (ops, rv) + if cache: + cache_id = self._cache_result(rv) + return (cache_id, rv) else: - return self._search(ops) + return rv + + def magic_terms(self, term): + what = term.split(':')[0].lower() + magic = self.magic_term_map.get(what) + if magic is not None: + return magic(term) + + # FIXME: Convert to:me, from:me into e-mail searches + + return term + + def magic_emails(self, term): + return term # FIXME: A no-op + + def magic_candidates(self, term): + matches = self.candidates(term, self.config.get('partial_matches', 10)) + if len(matches) > 1: + return tuple([IntSet.Or] + matches) + else: + return matches[0] if __name__ == '__main__': @@ -272,8 +324,8 @@ def search(self, terms, mask_deleted=True): assert(list(se.search(IntSet.All)) == [1, 2]) # Basic search correctnesss - assert(1 in se.search(['hello', 'world'])) - assert(2 not in se.search(['hello', 'world'])) + assert(1 in se.search('hello world')) + assert(2 not in se.search('hello world')) assert([] == list(se.search('notfound'))) # Enable and test partial word searches @@ -287,12 +339,20 @@ def search(self, terms, mask_deleted=True): assert(len(se.candidates('*ell', 10)) == 2) # ell, hell assert(len(se.candidates('*ell*', 10)) == 4) # ell, hell, hello, hellyeah assert(len(se.candidates('he*ah', 10)) == 2) # hepe, hellyeah - assert(1 in se.search(['hell*', 'w*ld'])) + assert(1 in se.search('hell* w*ld')) # Test our and/or functionality assert(list(se.search('hello')) == list(se.search((IntSet.Or, 'world', 'iceland')))) + # Test the explainer and parse_terms with candidate magic + assert(explain_ops(se.parse_terms('* - is:deleted he*o WORLD +Iceland', se.magic_map)) + == '(((ALL NOT is:deleted) AND (heo OR hello) AND world) OR iceland)') + + # Test the explainer and parse_terms with date range magic + assert(se.explain('dates:2012..2013 OR date:2015') + == '((year:2012 OR year:2013) OR year:2015)') + print('Tests pass OK') - import time - time.sleep(10) + #import time + #time.sleep(10) se.delete_everything(True, False, True) diff --git a/Attic/mailpile/plugins/dates.py b/moggie/search/dates.py similarity index 65% rename from Attic/mailpile/plugins/dates.py rename to moggie/search/dates.py index afd5d7975..5429ddaf7 100644 --- a/Attic/mailpile/plugins/dates.py +++ b/moggie/search/dates.py @@ -1,32 +1,9 @@ import time import datetime -from mailpile.plugins import PluginManager -from mailpile.i18n import gettext as _ -from mailpile.i18n import ngettext as _n +from ..util.intset import IntSet -_plugins = PluginManager(builtin=__name__) - - -##[ Keywords ]################################################################ - -def meta_kw_extractor(index, msg_mid, msg, msg_size, msg_ts, **kwargs): - mdate = datetime.date.fromtimestamp(msg_ts) - keywords = [ - '%s:year' % mdate.year, - '%s:month' % mdate.month, - '%s:day' % mdate.day, - '%s-%s:yearmonth' % (mdate.year, mdate.month), - '%s-%s-%s:date' % (mdate.year, mdate.month, mdate.day) - ] - return keywords - -_plugins.register_meta_kw_extractor('dates', meta_kw_extractor) - - -##[ Search terms ]############################################################ - def _adjust(d): if d[2] > 31: d[1] += 1 @@ -47,11 +24,21 @@ def _mk_date(ts): 'd': 1, 'w': 7, 'm': 31, - 'q': 91 -} + 'q': 91} -def search(config, idx, term, hits): + +def ts_to_keywords(msg_ts): + mdate = datetime.date.fromtimestamp(msg_ts) + return [ + 'year:%s' % mdate.year, + 'month:%s' % mdate.month, + 'day:%s' % mdate.day, + 'yearmonth:%s-%s' % (mdate.year, mdate.month), + 'date:%s-%s-%s' % (mdate.year, mdate.month, mdate.day)] + + +def date_term_magic(term): try: word = term.split(':', 1)[1].lower() if '..' in term: @@ -92,7 +79,7 @@ def search(config, idx, term, hits): if start[1:] == [1, 1]: ny = [start[0], 12, 31] if ny <= end: - terms.append('%d:year' % start[0]) + terms.append('year:%d' % start[0]) start[0] += 1 continue @@ -100,23 +87,32 @@ def search(config, idx, term, hits): if start[2] == 1: nm = [start[0], start[1], 31] if nm <= end: - terms.append('%d-%d:yearmonth' % (start[0], start[1])) + terms.append('yearmonth:%d-%d' % (start[0], start[1])) start[1] += 1 _adjust(start) continue # Move forward one day... - terms.append('%d-%d-%d:date' % tuple(start)) + terms.append('date:%d-%d-%d' % tuple(start)) start[2] += 1 _adjust(start) - rt = [] - for t in terms: - rt.extend(hits(t)) - return rt - except: - raise ValueError('Invalid date range: %s' % term) + return tuple([IntSet.Or] + terms) + except (ValueError, KeyError, IndexError, TypeError, NameError): + return term + + +if __name__ == '__main__': + from . import explain_ops + + assert(explain_ops(date_term_magic('dates:2012')) + == '(year:2012)') + + assert(explain_ops(date_term_magic('dates:2012..2014')) + == '(year:2012 OR year:2013 OR year:2014)') + assert(explain_ops(date_term_magic('dates:2021-10-30..2021-12')) + == ('(date:2021-10-30 OR date:2021-10-31 OR ' + 'yearmonth:2021-11 OR yearmonth:2021-12)')) -_plugins.register_search_term('dates', search) -_plugins.register_search_term('date', search) + print('Tests pass OK') diff --git a/moggie/search/parse_greedy.py b/moggie/search/parse_greedy.py index 3a9d465f9..900fd90d5 100644 --- a/moggie/search/parse_greedy.py +++ b/moggie/search/parse_greedy.py @@ -84,14 +84,16 @@ def _flat(search): # operator last time, but the current isn't AND: fix it. search_stack[-1] = [IntSet.And, _flat(search_stack[-1])] - magic = None - for char in magic_map: + for char, magic in magic_map: if char in term: - magic = magic_map[char] - if magic is None: + term = magic(term) + if not isinstance(term, str): + break + + if isinstance(term, str): search_stack[-1].append(term.lower()) else: - search_stack[-1].append(_flat(magic(term))) + search_stack[-1].append(_flat(term)) changed = False # Close all dangling parens @@ -124,11 +126,16 @@ def _flat(search): assert(greedy_parse_terms('ALL - iceland') == (IntSet.Sub, IntSet.All, 'iceland')) - def swapper(kw): + def swapper_one(kw): + return ':'.join(reversed(kw.split(':'))) + + def swapper_many(kw): return (IntSet.Or, kw, ':'.join(reversed(kw.split(':')))) - assert(greedy_parse_terms('yes hel:lo world', {':': swapper}) - == (IntSet.And, 'yes', (IntSet.Or, 'hel:lo', 'lo:hel'), 'world')) + assert(greedy_parse_terms('yes hel:lo world', [ + (':', swapper_one), # Maps to lo:hel + (':', swapper_many)]) # ORs with hel:lo + == (IntSet.And, 'yes', (IntSet.Or, 'lo:hel', 'hel:lo'), 'world')) print('Tests passed OK') import sys