Skip to content

Commit

Permalink
Add brute force regex query support
Browse files Browse the repository at this point in the history
  • Loading branch information
Christoph Büscher committed Jul 22, 2020
1 parent 397a7c0 commit 08989e7
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,20 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable;
Expand Down Expand Up @@ -207,6 +212,46 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
return wildcardQuery(value + "*", method, context);
}

@Override
public Query regexpQuery(
String value,
int flags,
int maxDeterminizedStates,
@Nullable MultiTermQuery.RewriteMethod method,
QueryShardContext context
) {
if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException(
"[regexp] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false."
);
}
failIfNotIndexed();
RegexpQuery query = new RegexpQuery(new Term(name(), new BytesRef(value)), flags, maxDeterminizedStates) {

@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
return new FilteredTermsEnum(terms.iterator(), false) {

@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
byte[] decoded = VersionEncoder.decodeVersion(term).getBytes();
boolean accepted = compiled.runAutomaton.run(decoded, 0, decoded.length);
// System.out.println(accepted + " : " + VersionEncoder.decodeVersion(term));
if (accepted) {
return AcceptStatus.YES;
}
return AcceptStatus.NO;
}
};
}
};

if (method != null) {
query.setRewriteMethod(method);
}
return query;
}

@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
if (context.allowExpensiveQueries() == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,11 @@ public class VersionStringFieldMapperTests extends ESSingleNodeTestCase {

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
// return pluginList(VersionFieldPlugin.class, LocalStateCompositeXPackPlugin.class, PainlessPlugin.class);
// TODO PainlessPlugin loading doesn't work when test is run through "gradle check"
return pluginList(VersionFieldPlugin.class, LocalStateCompositeXPackPlugin.class);
}

public String setUpIndex(String indexName) throws IOException {
createIndex(
indexName,
Settings.builder().put("index.number_of_shards", 1).build(),
"_doc",
"version",
"type=version",
"foo",
"type=keyword"
);
createIndex(indexName, Settings.builder().put("index.number_of_shards", 1).build(), "_doc", "version", "type=version");
ensureGreen(indexName);

client().prepareIndex(indexName).setId("1").setSource(jsonBuilder().startObject().field("version", "11.1.0").endObject()).get();
Expand Down Expand Up @@ -186,6 +176,44 @@ public void testSort() throws IOException {
assertEquals("21.11.0", hits[5].getSortValues()[0]);
}

public void testRegexQuery() throws Exception {
String indexName = "test_regex";
createIndex(indexName, Settings.builder().put("index.number_of_shards", 1).build(), "_doc", "version", "type=version");
ensureGreen(indexName);

client().prepareIndex(indexName)
.setId("1")
.setSource(jsonBuilder().startObject().field("version", "1.0.0-alpha.2.1.0-rc.1").endObject())
.get();
client().prepareIndex(indexName)
.setId("2")
.setSource(jsonBuilder().startObject().field("version", "1.3.0+build.1234567").endObject())
.get();
client().prepareIndex(indexName)
.setId("3")
.setSource(jsonBuilder().startObject().field("version", "2.1.0-alpha.beta").endObject())
.get();
client().prepareIndex(indexName).setId("4").setSource(jsonBuilder().startObject().field("version", "2.1.0").endObject()).get();
client().prepareIndex(indexName).setId("5").setSource(jsonBuilder().startObject().field("version", "2.33.0").endObject()).get();
client().admin().indices().prepareRefresh(indexName).get();

// regex
SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.regexpQuery("version", "2.*0")).get();
assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("2.1.0", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("2.33.0", response.getHits().getHits()[1].getSourceAsMap().get("version"));

response = client().prepareSearch(indexName).setQuery(QueryBuilders.regexpQuery("version", "<0-10>.<0-10>.*al.*")).get();
assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("1.0.0-alpha.2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version"));

response = client().prepareSearch(indexName).setQuery(QueryBuilders.regexpQuery("version", "1.[0-9].[0-9].*")).get();
assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("1.0.0-alpha.2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("1.3.0+build.1234567", response.getHits().getHits()[1].getSourceAsMap().get("version"));
}

public void testWildcardQuery() throws Exception {
String indexName = "test_wildcard";
createIndex(
Expand Down

0 comments on commit 08989e7

Please sign in to comment.