From 7f48bda64807b36b2f90f3f0b180ca96ec9b7f82 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Sat, 12 Oct 2013 17:19:33 -0700 Subject: [PATCH] begin work on fixing #32 --- examples/example_synonym_file.txt | 1 + ...mExpandingExtendedDismaxQParserPlugin.java | 2 +- test/007-test-query-operators.py | 50 +++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 test/007-test-query-operators.py diff --git a/examples/example_synonym_file.txt b/examples/example_synonym_file.txt index 516bad3..b03d2f9 100644 --- a/examples/example_synonym_file.txt +++ b/examples/example_synonym_file.txt @@ -1,2 +1,3 @@ dog,hound,pooch,canis familiaris,man's best friend back pack=>backpack +e-commerce,electronic commerce,e commerce diff --git a/src/main/java/org/apache/solr/search/SynonymExpandingExtendedDismaxQParserPlugin.java b/src/main/java/org/apache/solr/search/SynonymExpandingExtendedDismaxQParserPlugin.java index 035b025..155212d 100644 --- a/src/main/java/org/apache/solr/search/SynonymExpandingExtendedDismaxQParserPlugin.java +++ b/src/main/java/org/apache/solr/search/SynonymExpandingExtendedDismaxQParserPlugin.java @@ -233,7 +233,7 @@ private static class Const { */ static final String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC"; - static final Pattern COMPLEX_QUERY_OPERATORS_PATTERN = Pattern.compile("(?:\\*|\\b(?:OR|AND|-|\\+)\\b)"); + static final Pattern COMPLEX_QUERY_OPERATORS_PATTERN = Pattern.compile("(?:\\*|\\s-\\b|\\b(?:OR|AND|\\+)\\b)"); } private Map synonymAnalyzers; diff --git a/test/007-test-query-operators.py b/test/007-test-query-operators.py new file mode 100644 index 0000000..d21019c --- /dev/null +++ b/test/007-test-query-operators.py @@ -0,0 +1,50 @@ +# +# Basic unit tests for HON-Lucene-Synonyms +# +# This one tests some of the problems found in issues #28 and #32 +# + +import unittest, solr, urllib + +class TestBasic(unittest.TestCase): + + url = 'http://localhost:8983/solr' + test_data = [ \ + {'id': '1', 'name': "e-commerce"}, \ + {'id': '2', 'name': "electronic commerce"}, \ + ] + solr_connection = None + + def setUp(self): + self.solr_connection = solr.SolrConnection(self.url) + self.solr_connection.delete_query('*:*') + self.solr_connection.add_many(self.test_data) + self.solr_connection.commit() + + def tearDown(self): + self.solr_connection.delete_query('*:*') + self.solr_connection.commit() + + def test_queries(self): + + self.tst_query({}, 'commerce', 2) + self.tst_query({}, 'electronic commerce', 2) + self.tst_query({}, 'e-commerce', 2) + + # means "shouldn't contain the word commerce" + self.tst_query({}, 'e -commerce', 0) + + def tst_query(self, extra_params, query, expected_num_docs): + + params = {'q': query, 'qf' : 'name', 'mm' : '100%', 'defType' : 'synonym_edismax', 'synonyms' : 'true'} + params.update(extra_params) + + response = self.solr_connection.query(**params) + results = response.results + print '\ntesting ',self.url + '/select?' + urllib.urlencode(params),\ + '\n',map(lambda x: x['name'],results),'\nActual: %s, Expected: %s' % (len(results), expected_num_docs) + + self.assertEqual(len(results), expected_num_docs) + +if __name__ == '__main__': + unittest.main()