From fa1f343d7887b53311967f6168099e4979e1eeca Mon Sep 17 00:00:00 2001 From: vanjaftn <92813097+vanjaftn@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:55:00 +0100 Subject: [PATCH] (dsl): Support `Disjunction max` query (#360) --- .../queries/elastic_query_disjunction_max.md | 29 +++++++ .../zio/elasticsearch/HttpExecutorSpec.scala | 38 +++++++++ .../zio/elasticsearch/ElasticPrimitive.scala | 4 + .../zio/elasticsearch/ElasticQuery.scala | 30 ++++++- .../zio/elasticsearch/query/Queries.scala | 33 ++++++++ .../zio/elasticsearch/ElasticQuerySpec.scala | 80 +++++++++++++++++++ website/sidebars.js | 2 + 7 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 docs/overview/queries/elastic_query_disjunction_max.md diff --git a/docs/overview/queries/elastic_query_disjunction_max.md b/docs/overview/queries/elastic_query_disjunction_max.md new file mode 100644 index 000000000..938a07e68 --- /dev/null +++ b/docs/overview/queries/elastic_query_disjunction_max.md @@ -0,0 +1,29 @@ +--- +id: elastic_query_disjunction_max +title: "Disjunction max Query" +--- + +The `Disjunction max` query returns documents that match one or more query clauses. For documents that match multiple query clauses, the relevance score is set to the highest relevance score from all matching query clauses. When the relevance scores of the returned documents are identical, tie breaker parameter can be used for giving more weight to documents that match multiple query clauses. + +In order to use the `Disjunction max` query import the following: +```scala +import zio.elasticsearch.query.DisjunctionMax +import zio.elasticsearch.ElasticQuery.disjunctionMax +``` + +You can create a `Disjunction max` query using the `disjunctionMax` method this way: +```scala +val query: DisjunctionMaxQuery = disjunctionMax(query = term(field = "stringField", value = "test"), queries = exists(field = "intField"), exists(field = "existsField")) +``` + +You can create a [type-safe](https://lambdaworks.github.io/zio-elasticsearch/overview/overview_zio_prelude_schema) `Disjunction max` query using the `disjunctionMax` method this way: +```scala +val query: DisjunctionMaxQuery = disjunctionMax(query = term(field = Document.stringField, value = "test"), queries = exists(field = Document.intField), term(field = Document.termField, value = "test")) +``` + +If you want to change the `tieBreaker`, you can use `tieBreaker` method: +```scala +val queryWithTieBreaker: DisjunctionMaxQuery = disjunctionMax(query = exists(field = "existsField"), queries = ids(values = "1", "2", "3"), term(field = "termField", value = "test")).tieBreaker(0.5f) +``` + +You can find more information about `Disjunction max` query [here](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-dis-max-query.html). diff --git a/modules/integration/src/test/scala/zio/elasticsearch/HttpExecutorSpec.scala b/modules/integration/src/test/scala/zio/elasticsearch/HttpExecutorSpec.scala index 16931dd7c..2be5c7c75 100644 --- a/modules/integration/src/test/scala/zio/elasticsearch/HttpExecutorSpec.scala +++ b/modules/integration/src/test/scala/zio/elasticsearch/HttpExecutorSpec.scala @@ -1161,6 +1161,44 @@ object HttpExecutorSpec extends IntegrationSpec { Executor.execute(ElasticRequest.createIndex(firstSearchIndex)), Executor.execute(ElasticRequest.deleteIndex(firstSearchIndex)).orDie ), + test("search for a document using a disjunction max query") { + checkOnce(genDocumentId, genTestDocument, genDocumentId, genTestDocument) { + (firstDocumentId, firstDocument, secondDocumentId, secondDocument) => + for { + _ <- Executor.execute(ElasticRequest.deleteByQuery(firstSearchIndex, matchAll)) + firstDocumentUpdated = + firstDocument.copy(stringField = s"This is a ${firstDocument.stringField} test.") + secondDocumentUpdated = + secondDocument.copy(stringField = + s"This is a ${secondDocument.stringField} test. It should be in the list before ${firstDocument.stringField}, because it has higher relevance score than ${firstDocument.stringField}" + ) + _ <- Executor.execute( + ElasticRequest + .upsert[TestDocument](firstSearchIndex, firstDocumentId, firstDocumentUpdated) + ) + _ <- + Executor.execute( + ElasticRequest + .upsert[TestDocument](firstSearchIndex, secondDocumentId, secondDocumentUpdated) + .refreshTrue + ) + query = disjunctionMax( + term( + field = TestDocument.stringField, + value = firstDocument.stringField.toLowerCase + ), + matchPhrase( + field = TestDocument.stringField, + value = secondDocument.stringField + ) + ) + res <- Executor.execute(ElasticRequest.search(firstSearchIndex, query)).documentAs[TestDocument] + } yield assert(res)(equalTo(Chunk(secondDocumentUpdated, firstDocumentUpdated))) + } + } @@ around( + Executor.execute(ElasticRequest.createIndex(firstSearchIndex)), + Executor.execute(ElasticRequest.deleteIndex(firstSearchIndex)).orDie + ), test("search for a document using a fuzzy query") { checkOnce(genDocumentId, genTestDocument) { (firstDocumentId, firstDocument) => for { diff --git a/modules/library/src/main/scala/zio/elasticsearch/ElasticPrimitive.scala b/modules/library/src/main/scala/zio/elasticsearch/ElasticPrimitive.scala index 65a73b197..5b1c22e4d 100644 --- a/modules/library/src/main/scala/zio/elasticsearch/ElasticPrimitive.scala +++ b/modules/library/src/main/scala/zio/elasticsearch/ElasticPrimitive.scala @@ -39,6 +39,10 @@ object ElasticPrimitive { def toJson(value: Double): Json = Num(value) } + implicit object ElasticFloat extends ElasticPrimitive[Float] { + def toJson(value: Float): Json = Num(value) + } + implicit object ElasticInt extends ElasticPrimitive[Int] { def toJson(value: Int): Json = Num(value) } diff --git a/modules/library/src/main/scala/zio/elasticsearch/ElasticQuery.scala b/modules/library/src/main/scala/zio/elasticsearch/ElasticQuery.scala index a5c022b65..08003f58e 100644 --- a/modules/library/src/main/scala/zio/elasticsearch/ElasticQuery.scala +++ b/modules/library/src/main/scala/zio/elasticsearch/ElasticQuery.scala @@ -87,6 +87,32 @@ object ElasticQuery { final def contains(field: String, value: String): WildcardQuery[Any] = Wildcard(field = field, value = s"*$value*", boost = None, caseInsensitive = None) + /** + * Constructs a type-safe instance of [[zio.elasticsearch.query.DisjunctionMax]] using the specified parameters. + * + * @param queries + * the rest of the queries to be wrapped inside of disjunction max query + * @tparam S + * document for which field query is executed. An implicit `Schema` instance must be in scope + * @return + * an instance of [[zio.elasticsearch.query.DisjunctionMax]] that represents the `disjunction max` query to be + * performed. + */ + final def disjunctionMax[S: Schema](query: ElasticQuery[S], queries: ElasticQuery[S]*): DisjunctionMaxQuery[S] = + DisjunctionMax[S](queries = query +: Chunk.fromIterable(queries), tieBreaker = None) + + /** + * Constructs an instance of [[zio.elasticsearch.query.DisjunctionMax]] using the specified parameters. + * + * @param queries + * the rest of the queries to be wrapped inside of disjunction max query + * @return + * an instance of [[zio.elasticsearch.query.DisjunctionMax]] that represents the `disjunction max` query to be + * performed. + */ + final def disjunctionMax(query: ElasticQuery[Any], queries: ElasticQuery[Any]*): DisjunctionMaxQuery[Any] = + DisjunctionMax[Any](queries = query +: Chunk.fromIterable(queries), tieBreaker = None) + /** * Constructs a type-safe instance of [[zio.elasticsearch.query.ExistsQuery]], that checks existence of the field, * using the specified parameters. @@ -215,7 +241,7 @@ object ElasticQuery { * @tparam S * document for which field query is executed * @return - * an instance of [[zio.elasticsearch.query.FuzzyQuery]] that represents the fuzzy query to be performed. + * an instance of [[zio.elasticsearch.query.FuzzyQuery]] that represents the `fuzzy` query to be performed. */ final def fuzzy[S](field: Field[S, String], value: String): FuzzyQuery[S] = Fuzzy(field = field.toString, value = value, fuzziness = None, maxExpansions = None, prefixLength = None) @@ -230,7 +256,7 @@ object ElasticQuery { * @param value * text value that will be used for the query * @return - * an instance of [[zio.elasticsearch.query.FuzzyQuery]] that represents the fuzzy query to be performed. + * an instance of [[zio.elasticsearch.query.FuzzyQuery]] that represents the `fuzzy` query to be performed. */ final def fuzzy(field: String, value: String): FuzzyQuery[Any] = Fuzzy(field = field, value = value, fuzziness = None, maxExpansions = None, prefixLength = None) diff --git a/modules/library/src/main/scala/zio/elasticsearch/query/Queries.scala b/modules/library/src/main/scala/zio/elasticsearch/query/Queries.scala index 52f8cf94a..6b30adeb2 100644 --- a/modules/library/src/main/scala/zio/elasticsearch/query/Queries.scala +++ b/modules/library/src/main/scala/zio/elasticsearch/query/Queries.scala @@ -200,6 +200,39 @@ private[elasticsearch] final case class ConstantScore[S](query: ElasticQuery[S], ) } +sealed trait DisjunctionMaxQuery[S] extends ElasticQuery[S] { + + /** + * Sets the `tieBreaker` parameter for the [[zio.elasticsearch.query.DisjunctionMaxQuery]]. The `tieBreaker` value is + * a floating-point factor between 0 and 1.0 that is used to give more weight to documents that match multiple query + * clauses. Default is 0 (which means only the highest score counts). + * + * @param value + * a number to set `tieBreaker` parameter to + * @return + * an instance of the [[zio.elasticsearch.query.DisjunctionMaxQuery]] enriched with the `tieBreaker` parameter. + */ + def tieBreaker(value: Float): DisjunctionMaxQuery[S] +} + +private[elasticsearch] final case class DisjunctionMax[S]( + queries: Chunk[ElasticQuery[S]], + tieBreaker: Option[Float] +) extends DisjunctionMaxQuery[S] { self => + + def tieBreaker(value: Float): DisjunctionMaxQuery[S] = + self.copy(tieBreaker = Some(value)) + + private[elasticsearch] def toJson(fieldPath: Option[String]): Json = { + val disMaxFields = + Chunk( + Some("queries" -> Arr(queries.map(_.toJson(fieldPath)))), + tieBreaker.map("tie_breaker" -> _.toJson) + ).collect { case Some(obj) => obj } + Obj("dis_max" -> Obj(disMaxFields)) + } +} + sealed trait ExistsQuery[S] extends ElasticQuery[S] with HasBoost[ExistsQuery[S]] private[elasticsearch] final case class Exists[S](field: String, boost: Option[Double]) extends ExistsQuery[S] { self => diff --git a/modules/library/src/test/scala/zio/elasticsearch/ElasticQuerySpec.scala b/modules/library/src/test/scala/zio/elasticsearch/ElasticQuerySpec.scala index 74682864a..62188c22a 100644 --- a/modules/library/src/test/scala/zio/elasticsearch/ElasticQuerySpec.scala +++ b/modules/library/src/test/scala/zio/elasticsearch/ElasticQuerySpec.scala @@ -416,6 +416,39 @@ object ElasticQuerySpec extends ZIOSpecDefault { ) ) }, + test("disjunctionMax") { + val query = disjunctionMax(exists("existsField"), ids("1", "2", "3")) + val queryTs = disjunctionMax(exists(TestDocument.stringField), ids("1", "2", "3")) + val queryWithTieBreaker = disjunctionMax(exists("existsField"), ids("1", "2", "3")).tieBreaker(0.5f) + + assert(query)( + equalTo( + DisjunctionMax[Any]( + queries = + Chunk(Exists[Any](field = "existsField", boost = None), Ids[Any](values = Chunk("1", "2", "3"))), + tieBreaker = None + ) + ) + ) && + assert(queryTs)( + equalTo( + DisjunctionMax[TestDocument]( + queries = + Chunk(Exists[Any](field = "stringField", boost = None), Ids[Any](values = Chunk("1", "2", "3"))), + tieBreaker = None + ) + ) + ) && + assert(queryWithTieBreaker)( + equalTo( + DisjunctionMax[Any]( + queries = + Chunk(Exists[Any](field = "existsField", boost = None), Ids[Any](values = Chunk("1", "2", "3"))), + tieBreaker = Some(0.5f) + ) + ) + ) + }, test("exists") { val query = exists("testField") val queryTs = exists(TestDocument.intField) @@ -2507,6 +2540,53 @@ object ElasticQuerySpec extends ZIOSpecDefault { assert(queryWithCaseInsensitive.toJson(fieldPath = None))(equalTo(expectedWithCaseInsensitive.toJson)) && assert(queryWithAllParams.toJson(fieldPath = None))(equalTo(expectedWithAllParams.toJson)) }, + test("disjunctionMax") { + val query = disjunctionMax(exists("existsField"), ids("1", "2", "3")) + val queryTs = disjunctionMax(exists(TestDocument.stringField), ids("1", "2", "3")) + val queryWithTieBreaker = + disjunctionMax(exists("existsField"), ids("1", "2", "3")).tieBreaker(0.5f) + + val expected = + """ + |{ + | "dis_max": { + | "queries": [ + | { "exists": { "field": "existsField" } }, + | { "ids": { "values": ["1", "2", "3"] } } + | ] + | } + |} + |""".stripMargin + + val expectedTs = + """ + |{ + | "dis_max": { + | "queries": [ + | { "exists": { "field": "stringField" } }, + | { "ids": { "values": ["1", "2", "3"] } } + | ] + | } + |} + |""".stripMargin + + val expectedWithTieBreaker = + """ + |{ + | "dis_max": { + | "queries": [ + | { "exists": { "field": "existsField" } }, + | { "ids": { "values": ["1", "2", "3"] } } + | ], + | "tie_breaker": 0.5 + | } + |} + |""".stripMargin + + assert(query.toJson(fieldPath = None))(equalTo(expected.toJson)) && + assert(queryTs.toJson(fieldPath = None))(equalTo(expectedTs.toJson)) && + assert(queryWithTieBreaker.toJson(fieldPath = None))(equalTo(expectedWithTieBreaker.toJson)) + }, test("exists") { val query = exists("testField") val queryTs = exists(TestDocument.dateField) diff --git a/website/sidebars.js b/website/sidebars.js index 318505b5b..53c8cf2d8 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -16,6 +16,7 @@ module.exports = { 'overview/elastic_query', 'overview/queries/elastic_query_bool', 'overview/queries/elastic_query_constant_score', + 'overview/queries/elastic_query_disjunction_max', 'overview/queries/elastic_query_exists', 'overview/queries/elastic_query_function_score', 'overview/queries/elastic_query_fuzzy', @@ -57,6 +58,7 @@ module.exports = { 'overview/aggregations/elastic_aggregation_sum', 'overview/aggregations/elastic_aggregation_terms', 'overview/aggregations/elastic_aggregation_value_count', + 'overview/aggregations/elastic_aggregation_weighted_avg', ], }, {