Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SO-5904: ECL search disable synonym token filter #1205

Merged
merged 1 commit into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,19 @@ public TextPredicate withIgnoreStopwords(boolean ignoreStopwords) {
if (ignoreStopwords) {
return withAnalyzer((analyzer == Analyzers.TOKENIZED_SYNONYMS || analyzer == Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS) ? Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS : Analyzers.TOKENIZED_IGNORE_STOPWORDS);
} else {
return withAnalyzer(analyzer == Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS ? Analyzers.TOKENIZED_SYNONYMS : null);
return withAnalyzer(analyzer == Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS ? Analyzers.TOKENIZED_SYNONYMS : Analyzers.TOKENIZED);
}
}

public TextPredicate withSynonyms(boolean enableSynonyms) {
public TextPredicate withSynonyms(Boolean enableSynonyms) {
// if enableSynonyms is not a valid boolean value keep it unchanged, use the default set in the mapping
if (enableSynonyms == null) {
return this;
}
if (enableSynonyms) {
return withAnalyzer((analyzer == Analyzers.TOKENIZED_IGNORE_STOPWORDS || analyzer == Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS) ? Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS : Analyzers.TOKENIZED_SYNONYMS);
} else {
return withAnalyzer(analyzer == Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS ? Analyzers.TOKENIZED_IGNORE_STOPWORDS : null);
return withAnalyzer(analyzer == Analyzers.TOKENIZED_SYNONYMS_IGNORE_STOPWORDS ? Analyzers.TOKENIZED_IGNORE_STOPWORDS : Analyzers.TOKENIZED);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,11 @@ protected Expression toExpression(final TypedSearchTermClause clause) {

switch (lexicalSearchType) {
case MATCH:
return termMatchExpression(com.b2international.snowowl.core.request.search.TermFilter.match().term(term).build());
return termMatchExpression(com.b2international.snowowl.core.request.search.TermFilter.match().term(term)
// make sure we disable case sensitivity and synonyms
.caseSensitive(false)
.synonyms(false)
.build());
case WILD:
final String regex = term.replace("*", ".*");
return termRegexExpression(regex);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2022 B2i Healthcare Pte Ltd, http://b2i.sg
* Copyright 2022-2023 B2i Healthcare Pte Ltd, http://b2i.sg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,19 +39,21 @@ public final class MatchTermFilter extends TermFilter {

private final boolean ignoreStopwords;
private final boolean caseSensitive;
private final Boolean synonyms;

private final String fuzziness;
private final Integer prefixLength;
private final Integer maxExpansions;

MatchTermFilter(final String term, final Integer minShouldMatch, final boolean ignoreStopwords, final boolean caseSensitive, final String fuzziness, final Integer prefixLength, final Integer maxExpansions) {
MatchTermFilter(final String term, final Integer minShouldMatch, final boolean ignoreStopwords, final boolean caseSensitive, final Boolean synonyms, final String fuzziness, final Integer prefixLength, final Integer maxExpansions) {
if (term == null) {
throw new BadRequestException("'term' filter parameter was null.");
}
this.term = term.trim();
this.minShouldMatch = minShouldMatch;
this.ignoreStopwords = ignoreStopwords;
this.caseSensitive = caseSensitive;
this.synonyms = synonyms;
this.fuzziness = fuzziness;
this.prefixLength = prefixLength;
this.maxExpansions = maxExpansions;
Expand All @@ -73,6 +75,10 @@ public boolean isCaseSensitive() {
return caseSensitive;
}

public Boolean isSynonyms() {
cmark marked this conversation as resolved.
Show resolved Hide resolved
return synonyms;
}

public String getFuzziness() {
return fuzziness;
}
Expand Down Expand Up @@ -121,6 +127,7 @@ public static final class Builder {

private boolean ignoreStopwords;
private boolean caseSensitive;
private Boolean synonyms;

private String fuzziness;
private Integer prefixLength;
Expand All @@ -134,6 +141,7 @@ public static final class Builder {
this.minShouldMatch = from.getMinShouldMatch();
this.ignoreStopwords = from.isIgnoreStopwords();
this.caseSensitive = from.isCaseSensitive();
this.synonyms = from.isSynonyms();
this.fuzziness = from.getFuzziness();
this.prefixLength = from.getPrefixLength();
this.maxExpansions = from.getMaxExpansions();
Expand All @@ -159,6 +167,11 @@ public Builder caseSensitive(boolean caseSensitive) {
return this;
}

public Builder synonyms(Boolean synonyms) {
this.synonyms = synonyms;
return this;
}

public Builder fuzzy() {
return fuzziness("AUTO");
}
Expand All @@ -179,7 +192,7 @@ public Builder maxExpansions(Integer maxExpansions) {
}

public MatchTermFilter build() {
return new MatchTermFilter(term, minShouldMatch, ignoreStopwords, caseSensitive, fuzziness, prefixLength, maxExpansions);
return new MatchTermFilter(term, minShouldMatch, ignoreStopwords, caseSensitive, synonyms, fuzziness, prefixLength, maxExpansions);
}

}
Expand All @@ -188,18 +201,24 @@ public Expression termDisjunctionQuery(String field, String textFieldSuffix, Str
return dismaxWithScoreCategories(
TermFilter.exact().term(getTerm()).caseSensitive(isCaseSensitive()).build().toExpression(field, textFieldSuffix, exactFieldSuffix, prefixFieldSuffix),
matchTextAll(fieldAlias(field, textFieldSuffix), getTerm())
.withIgnoreStopwords(isIgnoreStopwords()),
.withIgnoreStopwords(isIgnoreStopwords())
.withSynonyms(isSynonyms()),
matchBooleanPrefix(fieldAlias(field, textFieldSuffix), getTerm())
.withIgnoreStopwords(isIgnoreStopwords()),
.withIgnoreStopwords(isIgnoreStopwords())
.withSynonyms(isSynonyms()),
matchTextAll(fieldAlias(field, prefixFieldSuffix), getTerm())
.withIgnoreStopwords(isIgnoreStopwords())
);
}

public Expression minShouldMatchTermDisjunctionQuery(String field, String textFieldSuffix, String exactFieldSuffix, String prefixFieldSuffix) {
return dismaxWithScoreCategories(
matchTextAny(fieldAlias(field, textFieldSuffix), getTerm(), getMinShouldMatch()).withIgnoreStopwords(isIgnoreStopwords()),
matchTextAny(fieldAlias(field, prefixFieldSuffix), getTerm(), getMinShouldMatch()).withIgnoreStopwords(isIgnoreStopwords())
matchTextAny(fieldAlias(field, textFieldSuffix), getTerm(), getMinShouldMatch())
.withIgnoreStopwords(isIgnoreStopwords())
.withSynonyms(isSynonyms()),
matchTextAny(fieldAlias(field, prefixFieldSuffix), getTerm(), getMinShouldMatch())
.withIgnoreStopwords(isIgnoreStopwords())
.withSynonyms(isSynonyms())
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public abstract class TermFilter implements Serializable {
*
* <li>All terms present match on a case insensitive, ASCII folded, possessive removed, split text type field (usually the <b>term</b> field) with fuzziness enabled with hardcoded 10 expansions of 1 character difference (Levenshtein distance).
*
* Additionally stopwords can be ignored and case sensitivity can be enabled/disabled.
* Additionally stopwords can be ignored, case sensitivity can be enabled/disabled and synonyms can be included if needed.
*
* @return {@link MatchTermFilter.Builder}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@
package com.b2international.snowowl.snomed.core.ecl;

import static com.b2international.snowowl.test.commons.snomed.RandomSnomedIdentiferGenerator.generateDescriptionId;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;

import java.util.*;

import org.junit.ClassRule;
import org.junit.Test;

import com.b2international.commons.exceptions.BadRequestException;
import com.b2international.commons.exceptions.SyntaxException;
import com.b2international.index.SynonymsRule;
import com.b2international.index.query.Expression;
import com.b2international.index.query.Expressions;
import com.b2international.snowowl.core.date.DateFormats;
Expand All @@ -41,6 +43,11 @@
*/
public class SnomedEclEvaluationRequestPropertyFilterTest extends BaseSnomedEclEvaluationRequestTest {

@ClassRule
public static SynonymsRule synonyms = new SynonymsRule(
"history,previous"
);

@Test
public void concept_activeOnly() throws Exception {
final Expression actual = eval("* {{ c active = true }}");
Expand Down Expand Up @@ -204,6 +211,31 @@ public void termDisjunction() throws Exception {
assertEquals(expected, actual);
}

@Test
public void termMatchSynonymsDisabled() throws Exception {
indexRevision(MAIN, SnomedDescriptionIndexEntry.builder()
.id(generateDescriptionId())
.active(true)
.moduleId(Concepts.MODULE_SCT_CORE)
.term("History related concept")
.conceptId(Concepts.ROOT_CONCEPT)
.typeId(Concepts.SYNONYM)
.build());

indexRevision(MAIN, SnomedDescriptionIndexEntry.builder()
.id(generateDescriptionId())
.active(true)
.moduleId(Concepts.MODULE_SCT_CORE)
.term("Concept with previous word in it")
.conceptId(Concepts.MODULE_SCT_CORE)
.typeId(Concepts.SYNONYM)
.build());

final Expression actual = eval("* {{ term = \"history\" }}");
final Expression expected = SnomedDocument.Expressions.ids(List.of(Concepts.ROOT_CONCEPT));
assertEquals(expected, actual);
}

@Test
public void disjunctionActiveAndModuleId() throws Exception {
final Expression actual = eval("* {{ c active = true OR moduleId = " + Concepts.MODULE_SCT_CORE + " }}");
Expand Down