Skip to content

Commit

Permalink
feat: n-gram analyzer added (#3896)
Browse files Browse the repository at this point in the history
* feat: n-gram analyzer added

* chore: code cleaning

* chore: code cleaning
  • Loading branch information
ymarcon authored Aug 23, 2024
1 parent 85003b6 commit 57cd2e7
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,11 @@
package org.obiba.opal.search.service.impl;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

import java.util.List;
public class AnalyzerFactory {

public class VariablesAnalyzerFactory {

public static Analyzer newAnalyzer() {
return new StandardAnalyzer(new CharArraySet(List.of(" ", "_", ".", ";", ":", ",", "|", "&"), true));
public static Analyzer newVariablesAnalyzer() {
return new VariablesAnalyzer(3, 3);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) 2024 OBiBa. All rights reserved.
*
* This program and the accompanying materials
* are made available under the terms of the GNU Public License v3.0.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.obiba.opal.search.service.impl;

import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;

public class VariablesAnalyzer extends Analyzer {

private final int minGram;
private final int maxGram;

public VariablesAnalyzer(int minGram, int maxGram) {
this.minGram = minGram;
this.maxGram = maxGram;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final StandardTokenizer src = new StandardTokenizer();
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
TokenStream tok = new LowerCaseFilter(src);
tok = new StopFilter(tok, CharArraySet.EMPTY_SET);
tok = new NGramTokenFilter(tok, minGram, maxGram, true);
return new TokenStreamComponents(
r -> {
src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
src.setReader(r);
},
tok);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ public String getIndexType() {
@Override
public void delete() {
File file = getIndexFile();
if (file.exists())
if (file.exists()) {
getIndexFile().delete();

}
}

@Override
Expand All @@ -59,7 +61,6 @@ public void create() {

public Document asDocument(Variable variable) {
Document doc = new Document();
String fullName = getValueTableReference();
doc.add(new TextField("project", table.getDatasource().getName(), Field.Store.YES));
doc.add(new TextField("datasource", table.getDatasource().getName(), Field.Store.YES));
doc.add(new TextField("table", table.getName(), Field.Store.YES));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public boolean hasIndex(ValueTable valueTable) {

IndexWriter newIndexWriter() {
try {
Analyzer analyzer = VariablesAnalyzerFactory.newAnalyzer();
Analyzer analyzer = AnalyzerFactory.newVariablesAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
return new IndexWriter(directory, config);
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public Search.QueryResultDto execute(QuerySettings querySettings) throws SearchE
IndexSearcher searcher = new IndexSearcher(reader);

// Build a QueryParser
Analyzer analyzer = VariablesAnalyzerFactory.newAnalyzer();
Analyzer analyzer = AnalyzerFactory.newVariablesAnalyzer();
QueryParser parser = new QueryParser("name", analyzer);

// Parse a query (search for books with "Lucene" in the title)
Expand Down Expand Up @@ -86,7 +86,11 @@ public Search.QueryResultDto execute(QuerySettings querySettings) throws SearchE
} catch (IOException e) {
throw new SearchException("Variables index access failure", e);
} catch (ParseException e) {
throw new SearchException("Wrong search query syntax", e);
if (log.isTraceEnabled())
log.warn("Wrong search query syntax", e);
else
log.warn("Wrong search query syntax: {}", e.getMessage());
return Search.QueryResultDto.newBuilder().setTotalHits(0).build();
}
}
}
5 changes: 2 additions & 3 deletions opal-ui/src/layouts/MainLayout.vue
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,11 @@ function onHelp() {
}
function onSearch() {
if (!query.value) {
if (!query.value || query.value.length < 3) {
showResults.value = false;
return;
}
const q = query.value.endsWith('*') ? query.value : `${query.value}*`;
searchStore.search(q, 10, ['label', 'label-en']).then((res) => {
searchStore.search(query.value, 10, ['label', 'label-en']).then((res) => {
showResults.value = res.totalHits > 0;
results.value = res;
});
Expand Down

0 comments on commit 57cd2e7

Please sign in to comment.