From e6d2130599ad5f8ddff040ae6264a0e3b16e1946 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Sat, 7 Oct 2023 09:49:29 -0700
Subject: [PATCH 01/11] add z-score and logging for tests

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 build.gradle                                  |   6 +
 .../NormalizationProcessorWorkflow.java       |   1 +
 .../ZScoreNormalizationTechnique.java         | 168 +++++++++++++++++
 .../ZScoreNormalizationTechniqueTests.java    | 172 ++++++++++++++++++
 src/test/resources/log4j2-test.xml            |  13 ++
 5 files changed, 360 insertions(+)
 create mode 100644 src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
 create mode 100644 src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
 create mode 100644 src/test/resources/log4j2-test.xml
diff --git a/build.gradle b/build.gradle
index 853aa85e7..90731feb5 100644
--- a/build.gradle
+++ b/build.gradle
@@ -218,6 +218,12 @@ integTest {
     if (System.getProperty("test.debug") != null) {
         jvmArgs '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005'
     }
+
+    systemProperty 'log4j2.configurationFile', "${projectDir}/src/test/resources/log4j2-test.xml"
+
+    // Set this to true this if you want to see the logs in the terminal test output.
+    // note: if left false the log output will still show in your IDE
+    testLogging.showStandardStreams = true
 }
 
 testClusters.integTest {
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
index 9e0069b21..d5e898185 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
@@ -52,6 +52,7 @@ public void execute(
         final ScoreNormalizationTechnique normalizationTechnique,
         final ScoreCombinationTechnique combinationTechnique
     ) {
+        log.info("Entering normalization processor workflow");
         // save original state
         List<Integer> unprocessedDocIds = unprocessedDocIds(querySearchResults);
 
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
new file mode 100644
index 000000000..6d6fadf2b
--- /dev/null
+++ b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.neuralsearch.processor.normalization;
+
+import com.google.common.primitives.Floats;
+import lombok.ToString;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.opensearch.neuralsearch.processor.CompoundTopDocs;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Implementation of z-score normalization technique for hybrid query
+ * This is currently modeled based on the existing normalization techniques {@link L2ScoreNormalizationTechnique} and {@link MinMaxScoreNormalizationTechnique}
+ * However, this class as well as the original ones require a significant work to improve style and ease of use, see TODO items below
+ */
+/*
+TODO: Some todo items that apply here but also on the original normalization techniques on which it is modeled {@link L2ScoreNormalizationTechnique} and {@link MinMaxScoreNormalizationTechnique}
+1. Random access to abstract list object is a bad practice both stylistically and from performance perspective and should be removed
+2. Identical sub queries and their distribution between shards is currently completely implicit based on ordering and should be explicit based on identifier
+3. Weird calculation of numOfSubQueries instead of having a more explicit indicator
+ */
+@ToString(onlyExplicitlyIncluded = true)
+public class ZScoreNormalizationTechnique implements ScoreNormalizationTechnique {
+    @ToString.Include
+    public static final String TECHNIQUE_NAME = "z_score";
+    private static final float MIN_SCORE = 0.001f;
+    private static final float SINGLE_RESULT_SCORE = 1.0f;
+    @Override
+    public void normalize(List<CompoundTopDocs> queryTopDocs) {
+        // why are we doing that? is List<CompoundTopDocs> the list of subqueries for a single shard? or a global list of all subqueries across shards?
+        // If a subquery comes from each shard then when is it combined? that seems weird that combination will do combination of normalized results that each is normalized just based on shard level result
+        int numOfSubQueries = queryTopDocs.stream()
+                .filter(Objects::nonNull)
+                .filter(topDocs -> topDocs.getTopDocs().size() > 0)
+                .findAny()
+                .get()
+                .getTopDocs()
+                .size();
+
+        // to be done for each subquery
+        float[] sumPerSubquery = findScoreSumPerSubQuery(queryTopDocs, numOfSubQueries);
+        long[] elementsPerSubquery = findNumberOfElementsPerSubQuery(queryTopDocs, numOfSubQueries);
+        float[] meanPerSubQuery = findMeanPerSubquery(sumPerSubquery, elementsPerSubquery);
+        float[] stdPerSubquery = findStdPerSubquery(queryTopDocs, meanPerSubQuery, elementsPerSubquery, numOfSubQueries);
+
+        // do normalization using actual score and z-scores for corresponding sub query
+        for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
+            if (Objects.isNull(compoundQueryTopDocs)) {
+                continue;
+            }
+            List<TopDocs> topDocsPerSubQuery = compoundQueryTopDocs.getTopDocs();
+            for (int j = 0; j < topDocsPerSubQuery.size(); j++) {
+                TopDocs subQueryTopDoc = topDocsPerSubQuery.get(j);
+                for (ScoreDoc scoreDoc : subQueryTopDoc.scoreDocs) {
+                    scoreDoc.score = normalizeSingleScore(scoreDoc.score, stdPerSubquery[j], meanPerSubQuery[j]);
+                }
+            }
+        }
+    }
+
+    static private float[] findScoreSumPerSubQuery(final List<CompoundTopDocs> queryTopDocs, final int numOfScores) {
+        final float[] sumOfScorePerSubQuery = new float[numOfScores];
+        Arrays.fill(sumOfScorePerSubQuery, 0);
+        //TODO: make this better, currently
+        // this is a horrible implementation in particular when it comes to the topDocsPerSubQuery.get(j)
+        // which does a random search on an abstract list type.
+        for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
+            if (Objects.isNull(compoundQueryTopDocs)) {
+                continue;
+            }
+            List<TopDocs> topDocsPerSubQuery = compoundQueryTopDocs.getTopDocs();
+            for (int j = 0; j < topDocsPerSubQuery.size(); j++) {
+                sumOfScorePerSubQuery[j] += sumScoreDocsArray(topDocsPerSubQuery.get(j).scoreDocs);
+            }
+        }
+
+        return sumOfScorePerSubQuery;
+    }
+
+    static private long[] findNumberOfElementsPerSubQuery(final List<CompoundTopDocs> queryTopDocs, final int numOfScores) {
+        final long[] numberOfElementsPerSubQuery = new long[numOfScores];
+        Arrays.fill(numberOfElementsPerSubQuery, 0);
+        //TODO: make this better, currently
+        // this is a horrible implementation in particular when it comes to the topDocsPerSubQuery.get(j)
+        // which does a random search on an abstract list type.
+        for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
+            if (Objects.isNull(compoundQueryTopDocs)) {
+                continue;
+            }
+            List<TopDocs> topDocsPerSubQuery = compoundQueryTopDocs.getTopDocs();
+            for (int j = 0; j < topDocsPerSubQuery.size(); j++) {
+                numberOfElementsPerSubQuery[j] += topDocsPerSubQuery.get(j).totalHits.value;
+            }
+        }
+
+        return numberOfElementsPerSubQuery;
+    }
+
+    static private float[] findMeanPerSubquery(final float[] sumPerSubquery, final long[] elementsPerSubquery) {
+        final float[] meanPerSubQuery = new float[elementsPerSubquery.length];
+        for (int i = 0; i < elementsPerSubquery.length; i++) {
+            if (elementsPerSubquery[i] == 0) {
+                meanPerSubQuery[i] = 0;
+            } else {
+                meanPerSubQuery[i] = sumPerSubquery[i]/elementsPerSubquery[i];
+            }
+        }
+
+        return meanPerSubQuery;
+    }
+
+    static private float[] findStdPerSubquery(final List<CompoundTopDocs> queryTopDocs, final float[] meanPerSubQuery, final long[] elementsPerSubquery, final int numOfScores) {
+        final double[] deltaSumPerSubquery = new double[numOfScores];
+        Arrays.fill(deltaSumPerSubquery, 0);
+
+
+        //TODO: make this better, currently
+        // this is a horrible implementation in particular when it comes to the topDocsPerSubQuery.get(j)
+        // which does a random search on an abstract list type.
+        for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
+            if (Objects.isNull(compoundQueryTopDocs)) {
+                continue;
+            }
+            List<TopDocs> topDocsPerSubQuery = compoundQueryTopDocs.getTopDocs();
+            for (int j = 0; j < topDocsPerSubQuery.size(); j++) {
+                for (ScoreDoc scoreDoc : topDocsPerSubQuery.get(j).scoreDocs) {
+                    deltaSumPerSubquery[j] += Math.pow(scoreDoc.score - meanPerSubQuery[j], 2);
+                }
+            }
+        }
+
+        final float[] stdPerSubQuery = new float[numOfScores];
+        for (int i = 0; i < deltaSumPerSubquery.length; i++) {
+            if (elementsPerSubquery[i] == 0) {
+                stdPerSubQuery[i] = 0;
+            } else {
+                stdPerSubQuery[i] = (float) Math.sqrt(deltaSumPerSubquery[i] / elementsPerSubquery[i]);
+            }
+        }
+
+        return stdPerSubQuery;
+    }
+
+    static private float sumScoreDocsArray(ScoreDoc[] scoreDocs) {
+        float sum = 0;
+        for (ScoreDoc scoreDoc : scoreDocs) {
+            sum += scoreDoc.score;
+        }
+
+        return sum;
+    }
+
+    private static float normalizeSingleScore(final float score, final float standardDeviation, final float mean) {
+        // edge case when there is only one score and min and max scores are same
+        if (Floats.compare(mean, score) == 0) {
+            return SINGLE_RESULT_SCORE;
+        }
+        float normalizedScore = (score - mean) / standardDeviation;
+        return normalizedScore == 0.0f ? MIN_SCORE : normalizedScore;
+    }
+}
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
new file mode 100644
index 000000000..1d0c61373
--- /dev/null
+++ b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.neuralsearch.processor.normalization;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+import org.opensearch.neuralsearch.processor.CompoundTopDocs;
+import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase;
+
+import java.util.List;
+
+public class ZScoreNormalizationTechniqueTests extends OpenSearchQueryTestCase {
+    private static final float DELTA_FOR_ASSERTION = 0.0001f;
+
+    public void testNormalization_whenResultFromOneShardOneSubQuery_thenSuccessful() {
+        ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
+        List<CompoundTopDocs> compoundTopDocs = List.of(
+                new CompoundTopDocs(
+                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                        List.of(
+                                new TopDocs(
+                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
+                                )
+                        )
+                )
+        );
+        normalizationTechnique.normalize(compoundTopDocs);
+
+        CompoundTopDocs expectedCompoundDocs = new CompoundTopDocs(
+                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                        new TopDocs(
+                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
+                        )
+                )
+        );
+        assertNotNull(compoundTopDocs);
+        assertEquals(1, compoundTopDocs.size());
+        assertNotNull(compoundTopDocs.get(0).getTopDocs());
+        assertCompoundTopDocs(
+                new TopDocs(expectedCompoundDocs.getTotalHits(), expectedCompoundDocs.getScoreDocs().toArray(new ScoreDoc[0])),
+                compoundTopDocs.get(0).getTopDocs().get(0)
+        );
+    }
+
+    public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSuccessful() {
+        ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
+        List<CompoundTopDocs> compoundTopDocs = List.of(
+                new CompoundTopDocs(
+                        new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                        List.of(
+                                new TopDocs(
+                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
+                                ),
+                                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                                new TopDocs(
+                                        new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                                        new ScoreDoc[] { new ScoreDoc(3, 0.9f), new ScoreDoc(4, 0.7f), new ScoreDoc(2, 0.1f) }
+                                )
+                        )
+                )
+        );
+        normalizationTechnique.normalize(compoundTopDocs);
+
+        CompoundTopDocs expectedCompoundDocs = new CompoundTopDocs(
+                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                        new TopDocs(
+                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
+                        ),
+                        new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                        new TopDocs(
+                                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                                new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
+                        )
+                )
+        );
+        assertNotNull(compoundTopDocs);
+        assertEquals(1, compoundTopDocs.size());
+        assertNotNull(compoundTopDocs.get(0).getTopDocs());
+        for (int i = 0; i < expectedCompoundDocs.getTopDocs().size(); i++) {
+            assertCompoundTopDocs(expectedCompoundDocs.getTopDocs().get(i), compoundTopDocs.get(0).getTopDocs().get(i));
+        }
+    }
+
+    public void testNormalization_whenResultFromMultipleShardsMultipleSubQueries_thenSuccessful() {
+        ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
+        List<CompoundTopDocs> compoundTopDocs = List.of(
+                new CompoundTopDocs(
+                        new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                        List.of(
+                                new TopDocs(
+                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
+                                ),
+                                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                                new TopDocs(
+                                        new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                                        new ScoreDoc[] { new ScoreDoc(3, 0.9f), new ScoreDoc(4, 0.7f), new ScoreDoc(2, 0.1f) }
+                                )
+                        )
+                ),
+                new CompoundTopDocs(
+                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                        List.of(
+                                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                                new TopDocs(
+                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                        new ScoreDoc[] { new ScoreDoc(7, 2.9f), new ScoreDoc(9, 0.7f) }
+                                )
+                        )
+                )
+        );
+        normalizationTechnique.normalize(compoundTopDocs);
+
+        CompoundTopDocs expectedCompoundDocsShard1 = new CompoundTopDocs(
+                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                        new TopDocs(
+                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
+                        ),
+                        new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                        new TopDocs(
+                                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                                new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
+                        )
+                )
+        );
+
+        CompoundTopDocs expectedCompoundDocsShard2 = new CompoundTopDocs(
+                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                        new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                        new TopDocs(
+                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                                new ScoreDoc[] { new ScoreDoc(7, 1.0f), new ScoreDoc(9, -1.0f) }
+                        )
+                )
+        );
+
+        assertNotNull(compoundTopDocs);
+        assertEquals(2, compoundTopDocs.size());
+        assertNotNull(compoundTopDocs.get(0).getTopDocs());
+        for (int i = 0; i < expectedCompoundDocsShard1.getTopDocs().size(); i++) {
+            assertCompoundTopDocs(expectedCompoundDocsShard1.getTopDocs().get(i), compoundTopDocs.get(0).getTopDocs().get(i));
+        }
+        assertNotNull(compoundTopDocs.get(1).getTopDocs());
+        for (int i = 0; i < expectedCompoundDocsShard2.getTopDocs().size(); i++) {
+            assertCompoundTopDocs(expectedCompoundDocsShard2.getTopDocs().get(i), compoundTopDocs.get(1).getTopDocs().get(i));
+        }
+    }
+
+    private void assertCompoundTopDocs(TopDocs expected, TopDocs actual) {
+        assertEquals(expected.totalHits.value, actual.totalHits.value);
+        assertEquals(expected.totalHits.relation, actual.totalHits.relation);
+        assertEquals(expected.scoreDocs.length, actual.scoreDocs.length);
+        for (int i = 0; i < expected.scoreDocs.length; i++) {
+            assertEquals(expected.scoreDocs[i].score, actual.scoreDocs[i].score, DELTA_FOR_ASSERTION);
+            assertEquals(expected.scoreDocs[i].doc, actual.scoreDocs[i].doc);
+            assertEquals(expected.scoreDocs[i].shardIndex, actual.scoreDocs[i].shardIndex);
+        }
+    }
+}
diff --git a/src/test/resources/log4j2-test.xml b/src/test/resources/log4j2-test.xml
new file mode 100644
index 000000000..32c8f6bc7
--- /dev/null
+++ b/src/test/resources/log4j2-test.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration status="WARN">
+    <Appenders>
+        <Console name="Console" target="SYSTEM_OUT">
+            <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
+        </Console>
+    </Appenders>
+    <Loggers>
+        <Root level="debug">
+            <AppenderRef ref="Console"/>
+        </Root>
+    </Loggers>
+</Configuration>
\ No newline at end of file

From 9cce739cfc9c345746aaf17b31852d34a471c0ca Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Tue, 17 Oct 2023 16:13:57 -0700
Subject: [PATCH 02/11] wire z-score normalization

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 .../processor/normalization/ScoreNormalizationFactory.java    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ScoreNormalizationFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ScoreNormalizationFactory.java
index 667c237c7..c42df96fe 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ScoreNormalizationFactory.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ScoreNormalizationFactory.java
@@ -19,7 +19,9 @@ public class ScoreNormalizationFactory {
         MinMaxScoreNormalizationTechnique.TECHNIQUE_NAME,
         new MinMaxScoreNormalizationTechnique(),
         L2ScoreNormalizationTechnique.TECHNIQUE_NAME,
-        new L2ScoreNormalizationTechnique()
+        new L2ScoreNormalizationTechnique(),
+        ZScoreNormalizationTechnique.TECHNIQUE_NAME,
+        new ZScoreNormalizationTechnique()
     );
 
     /**

From 10b56465ba1b8b43d2c2006f2d287c44b5f75c0b Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Wed, 18 Oct 2023 11:57:49 -0700
Subject: [PATCH 03/11] add IT test

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 ...ithmeticMeanScoreCombinationTechnique.java |   2 +-
 .../processor/combination/ScoreCombiner.java  |   2 -
 .../ZScoreNormalizationTechnique.java         |   4 +-
 .../query/HybridQueryZScoreIT.java            | 211 ++++++++++++++++++
 4 files changed, 213 insertions(+), 6 deletions(-)
 create mode 100644 src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java b/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java
index e656beca3..6a629b056 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java
@@ -43,7 +43,7 @@ public float combine(final float[] scores) {
         float sumOfWeights = 0;
         for (int indexOfSubQuery = 0; indexOfSubQuery < scores.length; indexOfSubQuery++) {
             float score = scores[indexOfSubQuery];
-            if (score >= 0.0) {
+            if (score != 0.0) {
                 float weight = scoreCombinationUtil.getWeightForSubQuery(weights, indexOfSubQuery);
                 score = score * weight;
                 combinedScore += score;
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombiner.java b/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombiner.java
index 0293efae6..5b9cd4378 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombiner.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombiner.java
@@ -26,8 +26,6 @@
 @Log4j2
 public class ScoreCombiner {
 
-    private static final Float ZERO_SCORE = 0.0f;
-
     /**
      * Performs score combination based on input combination technique. Mutates input object by updating combined scores
      * Main steps we're doing for combination:
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
index 6d6fadf2b..bc2ae9a7b 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
@@ -30,7 +30,6 @@
 public class ZScoreNormalizationTechnique implements ScoreNormalizationTechnique {
     @ToString.Include
     public static final String TECHNIQUE_NAME = "z_score";
-    private static final float MIN_SCORE = 0.001f;
     private static final float SINGLE_RESULT_SCORE = 1.0f;
     @Override
     public void normalize(List<CompoundTopDocs> queryTopDocs) {
@@ -162,7 +161,6 @@ private static float normalizeSingleScore(final float score, final float standar
         if (Floats.compare(mean, score) == 0) {
             return SINGLE_RESULT_SCORE;
         }
-        float normalizedScore = (score - mean) / standardDeviation;
-        return normalizedScore == 0.0f ? MIN_SCORE : normalizedScore;
+        return  (score - mean) / standardDeviation;
     }
 }
diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
new file mode 100644
index 000000000..ea14244d7
--- /dev/null
+++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.neuralsearch.query;
+
+import com.google.common.primitives.Floats;
+import lombok.SneakyThrows;
+import org.junit.After;
+import org.junit.Before;
+import org.opensearch.index.query.BoolQueryBuilder;
+import org.opensearch.index.query.QueryBuilders;
+import org.opensearch.index.query.TermQueryBuilder;
+import org.opensearch.knn.index.SpaceType;
+import org.opensearch.neuralsearch.common.BaseNeuralSearchIT;
+import org.opensearch.neuralsearch.processor.normalization.ZScoreNormalizationTechnique;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.IntStream;
+
+import static org.opensearch.neuralsearch.TestUtils.DELTA_FOR_SCORE_ASSERTION;
+import static org.opensearch.neuralsearch.TestUtils.createRandomVector;
+
+public class HybridQueryZScoreIT extends BaseNeuralSearchIT {
+    private static final String TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME = "test-neural-vector-doc-field-index";
+    private static final String TEST_QUERY_TEXT = "greetings";
+    private static final String TEST_QUERY_TEXT4 = "place";
+    private static final String TEST_QUERY_TEXT5 = "welcome";
+    private static final String TEST_DOC_TEXT1 = "Hello world";
+    private static final String TEST_DOC_TEXT2 = "Hi to this place";
+    private static final String TEST_KNN_VECTOR_FIELD_NAME_1 = "test-knn-vector-1";
+    private static final String TEST_KNN_VECTOR_FIELD_NAME_2 = "test-knn-vector-2";
+    private static final String TEST_TEXT_FIELD_NAME_1 = "test-text-field-1";
+
+    private static final int TEST_DIMENSION = 768;
+    private static final SpaceType TEST_SPACE_TYPE = SpaceType.L2;
+    private final float[] testVector1 = createRandomVector(TEST_DIMENSION);
+    private final float[] testVector2 = createRandomVector(TEST_DIMENSION);
+    private final static String RELATION_EQUAL_TO = "eq";
+    private static final String SEARCH_PIPELINE = "phase-results-pipeline";
+
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+        updateClusterSettings();
+        prepareModel();
+        createSearchPipeline(SEARCH_PIPELINE, ZScoreNormalizationTechnique.TECHNIQUE_NAME, DEFAULT_COMBINATION_METHOD, Map.of(PARAM_NAME_WEIGHTS,  "[0.5,0.5]"));
+    }
+
+    @After
+    @SneakyThrows
+    public void tearDown() {
+        super.tearDown();
+        deleteSearchPipeline(SEARCH_PIPELINE);
+        /* this is required to minimize chance of model not being deployed due to open memory CB,
+         * this happens in case we leave model from previous test case. We use new model for every test, and old model
+         * can be undeployed and deleted to free resources after each test case execution.
+         */
+        findDeployedModels().forEach(this::deleteModel);
+    }
+
+    @Override
+    public boolean isUpdateClusterSettings() {
+        return false;
+    }
+
+    @Override
+    protected boolean preserveClusterUponCompletion() {
+        return true;
+    }
+
+    /**
+     * Tests complex query with multiple nested sub-queries:
+     * {
+     *     "query": {
+     *         "hybrid": {
+     *              "queries": [
+     *                  {
+     *                      "bool": {
+     *                          "should": [
+     *                              {
+     *                                  "term": {
+     *                                      "text": "word1"
+     *                                  }
+     *                             },
+     *                             {
+     *                                  "term": {
+     *                                      "text": "word2"
+     *                                   }
+     *                              }
+     *                         ]
+     *                      }
+     *                  },
+     *                  {
+     *                      "term": {
+     *                          "text": "word3"
+     *                      }
+     *                  }
+     *              ]
+     *          }
+     *      }
+     * }
+     */
+    @SneakyThrows
+    public void testComplexQuery_withZScoreNormalization() {
+        initializeIndexIfNotExist();
+
+        TermQueryBuilder termQueryBuilder2 = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT4);
+        TermQueryBuilder termQueryBuilder3 = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT5);
+        BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
+        boolQueryBuilder.should(termQueryBuilder2).should(termQueryBuilder3);
+
+        String modelId = getDeployedModelId();
+        NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
+                TEST_KNN_VECTOR_FIELD_NAME_1,
+                TEST_QUERY_TEXT,
+                modelId,
+                5,
+                null,
+                null
+        );
+
+        HybridQueryBuilder hybridQueryBuilderNeuralThenTerm = new HybridQueryBuilder();
+        hybridQueryBuilderNeuralThenTerm.add(neuralQueryBuilder);
+        hybridQueryBuilderNeuralThenTerm.add(boolQueryBuilder);
+
+
+        final Map<String, Object> searchResponseAsMap = search(
+                TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME,
+                hybridQueryBuilderNeuralThenTerm,
+                null,
+                5,
+                Map.of("search_pipeline", SEARCH_PIPELINE)
+        );
+
+        assertEquals(2, getHitCount(searchResponseAsMap));
+
+        List<Map<String, Object>> hits1NestedList = getNestedHits(searchResponseAsMap);
+        List<String> ids = new ArrayList<>();
+        List<Double> scores = new ArrayList<>();
+        for (Map<String, Object> oneHit : hits1NestedList) {
+            ids.add((String) oneHit.get("_id"));
+            scores.add((Double) oneHit.get("_score"));
+        }
+
+        assertEquals(2, scores.size());
+        // since it's z-score normalized we would expect 1 , -1 to be the corresponding score, by design when there are only two results with z score
+        // furthermore the combination logic with weights should make it doc1Score: (1 * w1 + 0.98 * w2)/(w1 + w2), -1 * w2/w2
+        assertEquals(0.9999, scores.get(0).floatValue(), DELTA_FOR_SCORE_ASSERTION);
+        assertEquals(-1 , scores.get(1).floatValue(), DELTA_FOR_SCORE_ASSERTION);
+
+        // verify that scores are in desc order
+        assertTrue(IntStream.range(0, scores.size() - 1).noneMatch(idx -> scores.get(idx) < scores.get(idx + 1)));
+        // verify that all ids are unique
+        assertEquals(Set.copyOf(ids).size(), ids.size());
+
+        Map<String, Object> total = getTotalHits(searchResponseAsMap);
+        assertNotNull(total.get("value"));
+        assertEquals(2, total.get("value"));
+        assertNotNull(total.get("relation"));
+        assertEquals(RELATION_EQUAL_TO, total.get("relation"));
+    }
+
+    private void initializeIndexIfNotExist() throws IOException {
+        if (!indexExists(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME)) {
+            prepareKnnIndex(
+                TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME,
+                List.of(
+                    new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE),
+                    new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_2, TEST_DIMENSION, TEST_SPACE_TYPE)
+                ),
+                1
+            );
+
+            addKnnDoc(
+                TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME,
+                "1",
+                List.of(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_KNN_VECTOR_FIELD_NAME_2),
+                List.of(Floats.asList(testVector1).toArray(), Floats.asList(testVector1).toArray()),
+                Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
+                Collections.singletonList(TEST_DOC_TEXT1)
+            );
+            addKnnDoc(
+                TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME,
+                "2",
+                List.of(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_KNN_VECTOR_FIELD_NAME_2),
+                List.of(Floats.asList(testVector2).toArray(), Floats.asList(testVector2).toArray()),
+                Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
+                Collections.singletonList(TEST_DOC_TEXT2)
+            );
+            assertEquals(2, getDocCount(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME));
+        }
+    }
+
+    private List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
+        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
+        return (List<Map<String, Object>>) hitsMap.get("hits");
+    }
+
+    private Map<String, Object> getTotalHits(Map<String, Object> searchResponseAsMap) {
+        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
+        return (Map<String, Object>) hitsMap.get("total");
+    }
+
+    private Optional<Float> getMaxScore(Map<String, Object> searchResponseAsMap) {
+        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
+        return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue());
+    }
+}

From 266a34bff39e7e4769d5be1d1a725c0f94f6aba4 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Wed, 18 Oct 2023 20:48:37 -0700
Subject: [PATCH 04/11] fix IT test

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 .../ArithmeticMeanScoreCombinationTechnique.java            | 2 +-
 .../opensearch/neuralsearch/query/HybridQueryZScoreIT.java  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java b/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java
index 6a629b056..e656beca3 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationTechnique.java
@@ -43,7 +43,7 @@ public float combine(final float[] scores) {
         float sumOfWeights = 0;
         for (int indexOfSubQuery = 0; indexOfSubQuery < scores.length; indexOfSubQuery++) {
             float score = scores[indexOfSubQuery];
-            if (score != 0.0) {
+            if (score >= 0.0) {
                 float weight = scoreCombinationUtil.getWeightForSubQuery(weights, indexOfSubQuery);
                 score = score * weight;
                 combinedScore += score;
diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
index ea14244d7..d197fc710 100644
--- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
@@ -146,10 +146,10 @@ public void testComplexQuery_withZScoreNormalization() {
         }
 
         assertEquals(2, scores.size());
-        // since it's z-score normalized we would expect 1 , -1 to be the corresponding score, by design when there are only two results with z score
-        // furthermore the combination logic with weights should make it doc1Score: (1 * w1 + 0.98 * w2)/(w1 + w2), -1 * w2/w2
+        // by design when there are only two results with z score since it's z-score normalized we would expect 1 , -1 to be the corresponding score,
+        // furthermore the combination logic with weights should make it doc1Score: (1 * w1 + 0.98 * w2)/(w1 + w2), doc2Score: -1 ~ 0
         assertEquals(0.9999, scores.get(0).floatValue(), DELTA_FOR_SCORE_ASSERTION);
-        assertEquals(-1 , scores.get(1).floatValue(), DELTA_FOR_SCORE_ASSERTION);
+        assertEquals(0 , scores.get(1).floatValue(), DELTA_FOR_SCORE_ASSERTION);
 
         // verify that scores are in desc order
         assertTrue(IntStream.range(0, scores.size() - 1).noneMatch(idx -> scores.get(idx) < scores.get(idx + 1)));

From 0effd0741818683ba8ad36f158729ac246b0a639 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Sat, 21 Oct 2023 11:35:39 -0700
Subject: [PATCH 05/11] review feedback

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 .../NormalizationProcessorWorkflow.java       |   1 -
 .../ZScoreNormalizationTechnique.java         |  62 +++----
 .../common/BaseNeuralSearchIT.java            |  15 ++
 .../HybridQueryZScoreIT.java                  |  70 ++++----
 .../processor/NormalizationProcessorIT.java   |  16 --
 .../ZScoreNormalizationTechniqueTests.java    | 164 +++++++++---------
 .../neuralsearch/query/HybridQueryIT.java     |  16 --
 7 files changed, 162 insertions(+), 182 deletions(-)
 rename src/test/java/org/opensearch/neuralsearch/{query => processor}/HybridQueryZScoreIT.java (83%)

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
index d5e898185..9e0069b21 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflow.java
@@ -52,7 +52,6 @@ public void execute(
         final ScoreNormalizationTechnique normalizationTechnique,
         final ScoreCombinationTechnique combinationTechnique
     ) {
-        log.info("Entering normalization processor workflow");
         // save original state
         List<Integer> unprocessedDocIds = unprocessedDocIds(querySearchResults);
 
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
index bc2ae9a7b..fc97e8a4b 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechnique.java
@@ -5,15 +5,18 @@
 
 package org.opensearch.neuralsearch.processor.normalization;
 
-import com.google.common.primitives.Floats;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
 import lombok.ToString;
+
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.opensearch.neuralsearch.processor.CompoundTopDocs;
 
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
+import com.google.common.primitives.Floats;
 
 /**
  * Implementation of z-score normalization technique for hybrid query
@@ -24,24 +27,26 @@
 TODO: Some todo items that apply here but also on the original normalization techniques on which it is modeled {@link L2ScoreNormalizationTechnique} and {@link MinMaxScoreNormalizationTechnique}
 1. Random access to abstract list object is a bad practice both stylistically and from performance perspective and should be removed
 2. Identical sub queries and their distribution between shards is currently completely implicit based on ordering and should be explicit based on identifier
-3. Weird calculation of numOfSubQueries instead of having a more explicit indicator
+3. Implicit calculation of numOfSubQueries instead of having a more explicit upstream indicator/metadata regarding it
  */
 @ToString(onlyExplicitlyIncluded = true)
 public class ZScoreNormalizationTechnique implements ScoreNormalizationTechnique {
     @ToString.Include
     public static final String TECHNIQUE_NAME = "z_score";
     private static final float SINGLE_RESULT_SCORE = 1.0f;
+
     @Override
-    public void normalize(List<CompoundTopDocs> queryTopDocs) {
-        // why are we doing that? is List<CompoundTopDocs> the list of subqueries for a single shard? or a global list of all subqueries across shards?
-        // If a subquery comes from each shard then when is it combined? that seems weird that combination will do combination of normalized results that each is normalized just based on shard level result
-        int numOfSubQueries = queryTopDocs.stream()
-                .filter(Objects::nonNull)
-                .filter(topDocs -> topDocs.getTopDocs().size() > 0)
-                .findAny()
-                .get()
-                .getTopDocs()
-                .size();
+    public void normalize(final List<CompoundTopDocs> queryTopDocs) {
+        /*
+           TODO: There is an implicit assumption in this calculation that probably need to be made clearer by passing some metadata with the results.
+           Currently assuming that finding a single non empty shard result will contain all sub query results with 0 hits.
+         */
+        final Optional<CompoundTopDocs> maybeCompoundTopDocs = queryTopDocs.stream()
+            .filter(Objects::nonNull)
+            .filter(topDocs -> topDocs.getTopDocs().size() > 0)
+            .findAny();
+
+        final int numOfSubQueries = maybeCompoundTopDocs.map(compoundTopDocs -> compoundTopDocs.getTopDocs().size()).orElse(0);
 
         // to be done for each subquery
         float[] sumPerSubquery = findScoreSumPerSubQuery(queryTopDocs, numOfSubQueries);
@@ -67,9 +72,7 @@ public void normalize(List<CompoundTopDocs> queryTopDocs) {
     static private float[] findScoreSumPerSubQuery(final List<CompoundTopDocs> queryTopDocs, final int numOfScores) {
         final float[] sumOfScorePerSubQuery = new float[numOfScores];
         Arrays.fill(sumOfScorePerSubQuery, 0);
-        //TODO: make this better, currently
-        // this is a horrible implementation in particular when it comes to the topDocsPerSubQuery.get(j)
-        // which does a random search on an abstract list type.
+        // TODO: make this syntactically clearer regarding performance by avoiding List.get(j) with an abstract List type
         for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
             if (Objects.isNull(compoundQueryTopDocs)) {
                 continue;
@@ -86,9 +89,7 @@ static private float[] findScoreSumPerSubQuery(final List<CompoundTopDocs> query
     static private long[] findNumberOfElementsPerSubQuery(final List<CompoundTopDocs> queryTopDocs, final int numOfScores) {
         final long[] numberOfElementsPerSubQuery = new long[numOfScores];
         Arrays.fill(numberOfElementsPerSubQuery, 0);
-        //TODO: make this better, currently
-        // this is a horrible implementation in particular when it comes to the topDocsPerSubQuery.get(j)
-        // which does a random search on an abstract list type.
+        // TODO: make this syntactically clearer regarding performance by avoiding List.get(j) with an abstract List type
         for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
             if (Objects.isNull(compoundQueryTopDocs)) {
                 continue;
@@ -108,21 +109,22 @@ static private float[] findMeanPerSubquery(final float[] sumPerSubquery, final l
             if (elementsPerSubquery[i] == 0) {
                 meanPerSubQuery[i] = 0;
             } else {
-                meanPerSubQuery[i] = sumPerSubquery[i]/elementsPerSubquery[i];
+                meanPerSubQuery[i] = sumPerSubquery[i] / elementsPerSubquery[i];
             }
         }
 
         return meanPerSubQuery;
     }
 
-    static private float[] findStdPerSubquery(final List<CompoundTopDocs> queryTopDocs, final float[] meanPerSubQuery, final long[] elementsPerSubquery, final int numOfScores) {
+    static private float[] findStdPerSubquery(
+        final List<CompoundTopDocs> queryTopDocs,
+        final float[] meanPerSubQuery,
+        final long[] elementsPerSubquery,
+        final int numOfScores
+    ) {
         final double[] deltaSumPerSubquery = new double[numOfScores];
         Arrays.fill(deltaSumPerSubquery, 0);
-
-
-        //TODO: make this better, currently
-        // this is a horrible implementation in particular when it comes to the topDocsPerSubQuery.get(j)
-        // which does a random search on an abstract list type.
+        // TODO: make this syntactically clearer regarding performance by avoiding List.get(j) with an abstract List type
         for (CompoundTopDocs compoundQueryTopDocs : queryTopDocs) {
             if (Objects.isNull(compoundQueryTopDocs)) {
                 continue;
@@ -147,7 +149,7 @@ static private float[] findStdPerSubquery(final List<CompoundTopDocs> queryTopDo
         return stdPerSubQuery;
     }
 
-    static private float sumScoreDocsArray(ScoreDoc[] scoreDocs) {
+    static private float sumScoreDocsArray(final ScoreDoc[] scoreDocs) {
         float sum = 0;
         for (ScoreDoc scoreDoc : scoreDocs) {
             sum += scoreDoc.score;
@@ -161,6 +163,6 @@ private static float normalizeSingleScore(final float score, final float standar
         if (Floats.compare(mean, score) == 0) {
             return SINGLE_RESULT_SCORE;
         }
-        return  (score - mean) / standardDeviation;
+        return (score - mean) / standardDeviation;
     }
 }
diff --git a/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java b/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java
index 9c24e81fd..e6265724f 100644
--- a/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java
@@ -760,4 +760,19 @@ private String registerModelGroup() {
         assertNotNull(modelGroupId);
         return modelGroupId;
     }
+
+    protected List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
+        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
+        return (List<Map<String, Object>>) hitsMap.get("hits");
+    }
+
+    protected Map<String, Object> getTotalHits(Map<String, Object> searchResponseAsMap) {
+        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
+        return (Map<String, Object>) hitsMap.get("total");
+    }
+
+    protected Optional<Float> getMaxScore(Map<String, Object> searchResponseAsMap) {
+        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
+        return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue());
+    }
 }
diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java b/src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java
similarity index 83%
rename from src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
rename to src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java
index d197fc710..23db97fe2 100644
--- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryZScoreIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java
@@ -3,10 +3,17 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-package org.opensearch.neuralsearch.query;
+package org.opensearch.neuralsearch.processor;
+
+import static org.opensearch.neuralsearch.TestUtils.DELTA_FOR_SCORE_ASSERTION;
+import static org.opensearch.neuralsearch.TestUtils.createRandomVector;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.IntStream;
 
-import com.google.common.primitives.Floats;
 import lombok.SneakyThrows;
+
 import org.junit.After;
 import org.junit.Before;
 import org.opensearch.index.query.BoolQueryBuilder;
@@ -15,13 +22,10 @@
 import org.opensearch.knn.index.SpaceType;
 import org.opensearch.neuralsearch.common.BaseNeuralSearchIT;
 import org.opensearch.neuralsearch.processor.normalization.ZScoreNormalizationTechnique;
+import org.opensearch.neuralsearch.query.HybridQueryBuilder;
+import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
 
-import java.io.IOException;
-import java.util.*;
-import java.util.stream.IntStream;
-
-import static org.opensearch.neuralsearch.TestUtils.DELTA_FOR_SCORE_ASSERTION;
-import static org.opensearch.neuralsearch.TestUtils.createRandomVector;
+import com.google.common.primitives.Floats;
 
 public class HybridQueryZScoreIT extends BaseNeuralSearchIT {
     private static final String TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME = "test-neural-vector-doc-field-index";
@@ -46,7 +50,12 @@ public void setUp() throws Exception {
         super.setUp();
         updateClusterSettings();
         prepareModel();
-        createSearchPipeline(SEARCH_PIPELINE, ZScoreNormalizationTechnique.TECHNIQUE_NAME, DEFAULT_COMBINATION_METHOD, Map.of(PARAM_NAME_WEIGHTS,  "[0.5,0.5]"));
+        createSearchPipeline(
+            SEARCH_PIPELINE,
+            ZScoreNormalizationTechnique.TECHNIQUE_NAME,
+            DEFAULT_COMBINATION_METHOD,
+            Map.of(PARAM_NAME_WEIGHTS, "[0.5,0.5]")
+        );
     }
 
     @After
@@ -114,25 +123,24 @@ public void testComplexQuery_withZScoreNormalization() {
 
         String modelId = getDeployedModelId();
         NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
-                TEST_KNN_VECTOR_FIELD_NAME_1,
-                TEST_QUERY_TEXT,
-                modelId,
-                5,
-                null,
-                null
+            TEST_KNN_VECTOR_FIELD_NAME_1,
+            TEST_QUERY_TEXT,
+            modelId,
+            5,
+            null,
+            null
         );
 
         HybridQueryBuilder hybridQueryBuilderNeuralThenTerm = new HybridQueryBuilder();
         hybridQueryBuilderNeuralThenTerm.add(neuralQueryBuilder);
         hybridQueryBuilderNeuralThenTerm.add(boolQueryBuilder);
 
-
         final Map<String, Object> searchResponseAsMap = search(
-                TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME,
-                hybridQueryBuilderNeuralThenTerm,
-                null,
-                5,
-                Map.of("search_pipeline", SEARCH_PIPELINE)
+            TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME,
+            hybridQueryBuilderNeuralThenTerm,
+            null,
+            5,
+            Map.of("search_pipeline", SEARCH_PIPELINE)
         );
 
         assertEquals(2, getHitCount(searchResponseAsMap));
@@ -146,10 +154,11 @@ public void testComplexQuery_withZScoreNormalization() {
         }
 
         assertEquals(2, scores.size());
-        // by design when there are only two results with z score since it's z-score normalized we would expect 1 , -1 to be the corresponding score,
+        // by design when there are only two results with z score since it's z-score normalized we would expect 1 , -1 to be the
+        // corresponding score,
         // furthermore the combination logic with weights should make it doc1Score: (1 * w1 + 0.98 * w2)/(w1 + w2), doc2Score: -1 ~ 0
         assertEquals(0.9999, scores.get(0).floatValue(), DELTA_FOR_SCORE_ASSERTION);
-        assertEquals(0 , scores.get(1).floatValue(), DELTA_FOR_SCORE_ASSERTION);
+        assertEquals(0, scores.get(1).floatValue(), DELTA_FOR_SCORE_ASSERTION);
 
         // verify that scores are in desc order
         assertTrue(IntStream.range(0, scores.size() - 1).noneMatch(idx -> scores.get(idx) < scores.get(idx + 1)));
@@ -193,19 +202,4 @@ private void initializeIndexIfNotExist() throws IOException {
             assertEquals(2, getDocCount(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME));
         }
     }
-
-    private List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return (List<Map<String, Object>>) hitsMap.get("hits");
-    }
-
-    private Map<String, Object> getTotalHits(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return (Map<String, Object>) hitsMap.get("total");
-    }
-
-    private Optional<Float> getMaxScore(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue());
-    }
 }
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java
index 3cd71e5a1..79db226e1 100644
--- a/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java
@@ -12,7 +12,6 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 import java.util.stream.IntStream;
 
@@ -341,21 +340,6 @@ private void initializeIndexIfNotExist(String indexName) throws IOException {
         }
     }
 
-    private List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return (List<Map<String, Object>>) hitsMap.get("hits");
-    }
-
-    private Map<String, Object> getTotalHits(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return (Map<String, Object>) hitsMap.get("total");
-    }
-
-    private Optional<Float> getMaxScore(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue());
-    }
-
     private void assertQueryResults(Map<String, Object> searchResponseAsMap, int totalExpectedDocQty, boolean assertMinScore) {
         assertQueryResults(searchResponseAsMap, totalExpectedDocQty, assertMinScore, Range.between(0.5f, 1.0f));
     }
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
index 1d0c61373..45e350dbb 100644
--- a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
@@ -5,83 +5,88 @@
 
 package org.opensearch.neuralsearch.processor.normalization;
 
+import java.util.List;
+
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHits;
 import org.opensearch.neuralsearch.processor.CompoundTopDocs;
 import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase;
 
-import java.util.List;
-
 public class ZScoreNormalizationTechniqueTests extends OpenSearchQueryTestCase {
     private static final float DELTA_FOR_ASSERTION = 0.0001f;
 
+    /**
+     * Z score will check the relative distance from the center of distribution and hence can also be negative.
+     * When only two values are available their z-score numbers will be 1 and -1 correspondingly.
+     * For more information regarding z-score you can check this link
+     * https://www.z-table.com/
+     *
+     */
     public void testNormalization_whenResultFromOneShardOneSubQuery_thenSuccessful() {
         ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
         List<CompoundTopDocs> compoundTopDocs = List.of(
-                new CompoundTopDocs(
+            new CompoundTopDocs(
+                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                    new TopDocs(
                         new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                        List.of(
-                                new TopDocs(
-                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
-                                )
-                        )
+                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
+                    )
                 )
+            )
         );
         normalizationTechnique.normalize(compoundTopDocs);
 
+        // since we only have two scores of 0.5 and 0.2 their z-score numbers will be 1 and -1
         CompoundTopDocs expectedCompoundDocs = new CompoundTopDocs(
-                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                List.of(
-                        new TopDocs(
-                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
-                        )
-                )
+            new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+            List.of(
+                new TopDocs(new TotalHits(2, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) })
+            )
         );
         assertNotNull(compoundTopDocs);
         assertEquals(1, compoundTopDocs.size());
         assertNotNull(compoundTopDocs.get(0).getTopDocs());
         assertCompoundTopDocs(
-                new TopDocs(expectedCompoundDocs.getTotalHits(), expectedCompoundDocs.getScoreDocs().toArray(new ScoreDoc[0])),
-                compoundTopDocs.get(0).getTopDocs().get(0)
+            new TopDocs(expectedCompoundDocs.getTotalHits(), expectedCompoundDocs.getScoreDocs().toArray(new ScoreDoc[0])),
+            compoundTopDocs.get(0).getTopDocs().get(0)
         );
     }
 
     public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSuccessful() {
         ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
         List<CompoundTopDocs> compoundTopDocs = List.of(
-                new CompoundTopDocs(
+            new CompoundTopDocs(
+                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                    new TopDocs(
+                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
+                    ),
+                    new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                    new TopDocs(
                         new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                        List.of(
-                                new TopDocs(
-                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
-                                ),
-                                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
-                                new TopDocs(
-                                        new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                                        new ScoreDoc[] { new ScoreDoc(3, 0.9f), new ScoreDoc(4, 0.7f), new ScoreDoc(2, 0.1f) }
-                                )
-                        )
+                        new ScoreDoc[] { new ScoreDoc(3, 0.9f), new ScoreDoc(4, 0.7f), new ScoreDoc(2, 0.1f) }
+                    )
                 )
+            )
         );
         normalizationTechnique.normalize(compoundTopDocs);
 
         CompoundTopDocs expectedCompoundDocs = new CompoundTopDocs(
-                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                List.of(
-                        new TopDocs(
-                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
-                        ),
-                        new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
-                        new TopDocs(
-                                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                                new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
-                        )
+            new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+            List.of(
+                new TopDocs(
+                    new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                    new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
+                ),
+                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                new TopDocs(
+                    new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                    new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
                 )
+            )
         );
         assertNotNull(compoundTopDocs);
         assertEquals(1, compoundTopDocs.size());
@@ -94,57 +99,54 @@ public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSucce
     public void testNormalization_whenResultFromMultipleShardsMultipleSubQueries_thenSuccessful() {
         ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
         List<CompoundTopDocs> compoundTopDocs = List.of(
-                new CompoundTopDocs(
+            new CompoundTopDocs(
+                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                    new TopDocs(
+                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
+                    ),
+                    new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                    new TopDocs(
                         new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                        List.of(
-                                new TopDocs(
-                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                        new ScoreDoc[] { new ScoreDoc(2, 0.5f), new ScoreDoc(4, 0.2f) }
-                                ),
-                                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
-                                new TopDocs(
-                                        new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                                        new ScoreDoc[] { new ScoreDoc(3, 0.9f), new ScoreDoc(4, 0.7f), new ScoreDoc(2, 0.1f) }
-                                )
-                        )
-                ),
-                new CompoundTopDocs(
+                        new ScoreDoc[] { new ScoreDoc(3, 0.9f), new ScoreDoc(4, 0.7f), new ScoreDoc(2, 0.1f) }
+                    )
+                )
+            ),
+            new CompoundTopDocs(
+                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                List.of(
+                    new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                    new TopDocs(
                         new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                        List.of(
-                                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
-                                new TopDocs(
-                                        new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                        new ScoreDoc[] { new ScoreDoc(7, 2.9f), new ScoreDoc(9, 0.7f) }
-                                )
-                        )
+                        new ScoreDoc[] { new ScoreDoc(7, 2.9f), new ScoreDoc(9, 0.7f) }
+                    )
                 )
+            )
         );
         normalizationTechnique.normalize(compoundTopDocs);
 
         CompoundTopDocs expectedCompoundDocsShard1 = new CompoundTopDocs(
-                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                List.of(
-                        new TopDocs(
-                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
-                        ),
-                        new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
-                        new TopDocs(
-                                new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                                new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
-                        )
+            new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+            List.of(
+                new TopDocs(
+                    new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                    new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
+                ),
+                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                new TopDocs(
+                    new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                    new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
                 )
+            )
         );
 
         CompoundTopDocs expectedCompoundDocsShard2 = new CompoundTopDocs(
-                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                List.of(
-                        new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
-                        new TopDocs(
-                                new TotalHits(2, TotalHits.Relation.EQUAL_TO),
-                                new ScoreDoc[] { new ScoreDoc(7, 1.0f), new ScoreDoc(9, -1.0f) }
-                        )
-                )
+            new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+            List.of(
+                new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
+                new TopDocs(new TotalHits(2, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] { new ScoreDoc(7, 1.0f), new ScoreDoc(9, -1.0f) })
+            )
         );
 
         assertNotNull(compoundTopDocs);
diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
index eec6955ff..229374730 100644
--- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
@@ -13,7 +13,6 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 import java.util.stream.IntStream;
 
@@ -267,19 +266,4 @@ private void initializeIndexIfNotExist(String indexName) throws IOException {
             assertEquals(3, getDocCount(TEST_MULTI_DOC_INDEX_NAME));
         }
     }
-
-    private List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return (List<Map<String, Object>>) hitsMap.get("hits");
-    }
-
-    private Map<String, Object> getTotalHits(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return (Map<String, Object>) hitsMap.get("total");
-    }
-
-    private Optional<Float> getMaxScore(Map<String, Object> searchResponseAsMap) {
-        Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
-        return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue());
-    }
 }

From 21954c91a86e0bba98048ed4b8aeae8868b8a7a5 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Fri, 17 Nov 2023 12:41:54 -0800
Subject: [PATCH 06/11] minor typo fixes and checkstyle

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 build.gradle | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.gradle b/build.gradle
index 90731feb5..c7070b446 100644
--- a/build.gradle
+++ b/build.gradle
@@ -19,7 +19,7 @@ apply plugin: "com.diffplug.spotless"
 apply plugin: 'io.freefair.lombok'
 
 def pluginName = 'opensearch-neural-search'
-def pluginDescription = 'A plugin that adds dense neural retrieval into the OpenSearch ecosytem'
+def pluginDescription = 'A plugin that adds dense neural retrieval into the OpenSearch ecosystem'
 def projectPath = 'org.opensearch'
 def pathToPlugin = 'neuralsearch.plugin'
 def pluginClassName = 'NeuralSearch'

From 8d7c3d959f841d00b22bbac1ad24629f812b4c43 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Fri, 17 Nov 2023 13:04:59 -0800
Subject: [PATCH 07/11] add explanation on test score calculation

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 .../ZScoreNormalizationTechniqueTests.java         | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
index 45e350dbb..24af40172 100644
--- a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
@@ -17,7 +17,8 @@ public class ZScoreNormalizationTechniqueTests extends OpenSearchQueryTestCase {
     private static final float DELTA_FOR_ASSERTION = 0.0001f;
 
     /**
-     * Z score will check the relative distance from the center of distribution and hence can also be negative.
+     * Z score will check the relative distance from the center of distribution in units of standard deviation
+     * and hence can also be negative. It is using the formula of (score - mean_score)/std
      * When only two values are available their z-score numbers will be 1 and -1 correspondingly.
      * For more information regarding z-score you can check this link
      * https://www.z-table.com/
@@ -54,6 +55,15 @@ public void testNormalization_whenResultFromOneShardOneSubQuery_thenSuccessful()
         );
     }
 
+    /**
+     * Z score will check the relative distance from the center of distribution in units of standard deviation
+     * and hence can also be negative. It is using the formula of (score - mean_score)/std
+     * When only two values are available their z-score numbers will be 1 and -1 correspondingly as we see in the first query that returns only two document scores.
+     * When we have more than two documents scores as in the second query the distribution will not be binary and will have different results based on where the center of gravity of the distribution is.
+     * For more information regarding z-score you can check this link
+     * https://www.z-table.com/
+     *
+     */
     public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSuccessful() {
         ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
         List<CompoundTopDocs> compoundTopDocs = List.of(
@@ -79,11 +89,13 @@ public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSucce
             List.of(
                 new TopDocs(
                     new TotalHits(2, TotalHits.Relation.EQUAL_TO),
+                    // Calculated based on the formula (score - mean_score)/std
                     new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
                 ),
                 new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
                 new TopDocs(
                     new TotalHits(3, TotalHits.Relation.EQUAL_TO),
+                        // Calculated based on the formula (score - mean_score)/std for the values of mean_score = (0.9 + 0.7 + 0.1)/3 ~ 0.56, std = sqrt(((0.9 - 0.56)^2 + (0.7 - 0.56)^2 + (0.1 - 0.56)^2)/3)
                     new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
                 )
             )

From b2543364ba7793a58d8038314409f186825f0582 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Fri, 17 Nov 2023 14:42:16 -0800
Subject: [PATCH 08/11] fix issues due to merge

---
 .../org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java | 3 +--
 .../org/opensearch/neuralsearch/plugin/NeuralSearchTests.java  | 1 +
 .../opensearch/neuralsearch/processor/HybridQueryZScoreIT.java | 1 +
 .../normalization/ZScoreNormalizationTechniqueTests.java       | 3 ++-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java b/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java
index efe6e5573..6b213a9fd 100644
--- a/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/common/BaseNeuralSearchIT.java
@@ -772,7 +772,6 @@ private String registerModelGroup() {
         return modelGroupId;
     }
 
-
     protected List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
         Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
         return (List<Map<String, Object>>) hitsMap.get("hits");
@@ -787,7 +786,7 @@ protected Optional<Float> getMaxScore(Map<String, Object> searchResponseAsMap) {
         Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
         return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue());
     }
-  
+
     /**
      * Enumeration for types of pipeline processors, used to lookup resources like create
      * processor request as those are type specific
diff --git a/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java b/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java
index 69791681e..fe84559c6 100644
--- a/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java
+++ b/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java
@@ -66,6 +66,7 @@ public void testProcessors() {
             null,
             mock(IngestService.class),
             null,
+            null,
             null
         );
         Map<String, Processor.Factory> processors = plugin.getProcessors(processorParams);
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java b/src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java
index 23db97fe2..6c939e1e9 100644
--- a/src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/HybridQueryZScoreIT.java
@@ -125,6 +125,7 @@ public void testComplexQuery_withZScoreNormalization() {
         NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
             TEST_KNN_VECTOR_FIELD_NAME_1,
             TEST_QUERY_TEXT,
+            "",
             modelId,
             5,
             null,
diff --git a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
index 24af40172..702aead59 100644
--- a/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
+++ b/src/test/java/org/opensearch/neuralsearch/processor/normalization/ZScoreNormalizationTechniqueTests.java
@@ -95,7 +95,8 @@ public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSucce
                 new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
                 new TopDocs(
                     new TotalHits(3, TotalHits.Relation.EQUAL_TO),
-                        // Calculated based on the formula (score - mean_score)/std for the values of mean_score = (0.9 + 0.7 + 0.1)/3 ~ 0.56, std = sqrt(((0.9 - 0.56)^2 + (0.7 - 0.56)^2 + (0.1 - 0.56)^2)/3)
+                    // Calculated based on the formula (score - mean_score)/std for the values of mean_score = (0.9 + 0.7 + 0.1)/3 ~ 0.56,
+                    // std = sqrt(((0.9 - 0.56)^2 + (0.7 - 0.56)^2 + (0.1 - 0.56)^2)/3)
                     new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
                 )
             )

From 9a19fe72f3bf5c82b406788a75f63f36e16bd184 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Sat, 18 Nov 2023 23:07:33 -0800
Subject: [PATCH 09/11] add changelog

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b92edd850..4e54cee1c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.11...2.x)
 ### Features
+adding z-score normalization for hybrid query [#470](https://github.com/opensearch-project/neural-search/pull/470)
 ### Enhancements
 ### Bug Fixes
 ### Infrastructure

From 2b84c48ad4279d57f98006a3a6211807f9566218 Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Fri, 8 Dec 2023 19:41:37 -0800
Subject: [PATCH 10/11] add combiner with negative score support

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 ...mbinationWithNegativeSupportTechnique.java | 56 +++++++++++++++++++
 .../combination/ScoreCombinationFactory.java  |  4 +-
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationWithNegativeSupportTechnique.java

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationWithNegativeSupportTechnique.java b/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationWithNegativeSupportTechnique.java
new file mode 100644
index 000000000..07ab5151c
--- /dev/null
+++ b/src/main/java/org/opensearch/neuralsearch/processor/combination/ArithmeticMeanScoreCombinationWithNegativeSupportTechnique.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.neuralsearch.processor.combination;
+
+import lombok.ToString;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Abstracts combination of scores based on arithmetic mean method
+ */
+@ToString(onlyExplicitlyIncluded = true)
+public class ArithmeticMeanScoreCombinationWithNegativeSupportTechnique implements ScoreCombinationTechnique {
+    @ToString.Include
+    public static final String TECHNIQUE_NAME = "arithmetic_mean_with_negatives_support";
+    public static final String PARAM_NAME_WEIGHTS = "weights";
+    private static final Set<String> SUPPORTED_PARAMS = Set.of(PARAM_NAME_WEIGHTS);
+    private static final Float ZERO_SCORE = 0.0f;
+    private final List<Float> weights;
+    private final ScoreCombinationUtil scoreCombinationUtil;
+
+    public ArithmeticMeanScoreCombinationWithNegativeSupportTechnique(final Map<String, Object> params, final ScoreCombinationUtil combinationUtil) {
+        scoreCombinationUtil = combinationUtil;
+        scoreCombinationUtil.validateParams(params, SUPPORTED_PARAMS);
+        weights = scoreCombinationUtil.getWeights(params);
+    }
+
+    /**
+     * Arithmetic mean method for combining scores.
+     * score = (weight1*score1 + weight2*score2 +...+ weightN*scoreN)/(weight1 + weight2 + ... + weightN)
+     *
+     * Zero (0.0) scores are excluded from number of scores N
+     */
+    @Override
+    public float combine(final float[] scores) {
+        scoreCombinationUtil.validateIfWeightsMatchScores(scores, weights);
+        float combinedScore = 0.0f;
+        float sumOfWeights = 0;
+        for (int indexOfSubQuery = 0; indexOfSubQuery < scores.length; indexOfSubQuery++) {
+            float score = scores[indexOfSubQuery];
+            float weight = scoreCombinationUtil.getWeightForSubQuery(weights, indexOfSubQuery);
+            score = score * weight;
+            combinedScore += score;
+            sumOfWeights += weight;
+        }
+        if (sumOfWeights == 0.0f) {
+            return ZERO_SCORE;
+        }
+        return combinedScore / sumOfWeights;
+    }
+}
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombinationFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombinationFactory.java
index f05d24823..217d69573 100644
--- a/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombinationFactory.java
+++ b/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombinationFactory.java
@@ -26,7 +26,9 @@ public class ScoreCombinationFactory {
         HarmonicMeanScoreCombinationTechnique.TECHNIQUE_NAME,
         params -> new HarmonicMeanScoreCombinationTechnique(params, scoreCombinationUtil),
         GeometricMeanScoreCombinationTechnique.TECHNIQUE_NAME,
-        params -> new GeometricMeanScoreCombinationTechnique(params, scoreCombinationUtil)
+        params -> new GeometricMeanScoreCombinationTechnique(params, scoreCombinationUtil),
+        ArithmeticMeanScoreCombinationWithNegativeSupportTechnique.TECHNIQUE_NAME,
+        params -> new ArithmeticMeanScoreCombinationWithNegativeSupportTechnique(params, scoreCombinationUtil)
     );
 
     /**

From 4843b7b419adcdabd0a273c5172ea0031adedfec Mon Sep 17 00:00:00 2001
From: Samuel Herman <sherman8915@gmail.com>
Date: Fri, 8 Dec 2023 19:52:39 -0800
Subject: [PATCH 11/11] adding scripts for BEIR testing

Signed-off-by: Samuel Herman <sherman8915@gmail.com>
---
 scripts/Dockerfile                 |   9 +
 scripts/cleanup-previous-docker.sh |  16 +
 scripts/docker-compose.yml         |  48 +++
 scripts/model_setup_beir.md        | 462 +++++++++++++++++++++++++++++
 4 files changed, 535 insertions(+)
 create mode 100644 scripts/Dockerfile
 create mode 100755 scripts/cleanup-previous-docker.sh
 create mode 100644 scripts/docker-compose.yml
 create mode 100644 scripts/model_setup_beir.md

diff --git a/scripts/Dockerfile b/scripts/Dockerfile
new file mode 100644
index 000000000..985796015
--- /dev/null
+++ b/scripts/Dockerfile
@@ -0,0 +1,9 @@
+# Build with the following command
+# docker build --tag="opensearch-zscore-test:2.11.0" .
+FROM opensearchproject/opensearch:2.11.0
+# Remove previous neural search plugin and install new one
+RUN /usr/share/opensearch/bin/opensearch-plugin remove opensearch-neural-search
+# Make sure the build is preset, to create it `./gradlew clean assemble -Dopensearch.version="2.11.0"`
+# Then copy it locally `cp ../build/distributions/opensearch-neural-search-2.11.0.0-SNAPSHOT.zip .`
+COPY opensearch-neural-search-2.11.0.0-SNAPSHOT.zip /usr/share/opensearch/
+RUN /usr/share/opensearch/bin/opensearch-plugin install -b file:/usr/share/opensearch/opensearch-neural-search-2.11.0.0-SNAPSHOT.zip
diff --git a/scripts/cleanup-previous-docker.sh b/scripts/cleanup-previous-docker.sh
new file mode 100755
index 000000000..48d797f31
--- /dev/null
+++ b/scripts/cleanup-previous-docker.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+#### Procedure to cleanup all previous experiments with docker compose ####
+
+# Stop the container(s) using the following command:
+docker-compose down
+
+# Delete all containers using the following command:
+docker rm -f $(docker ps -a -q)
+
+# Delete all volumes using the following command:
+docker volume rm $(docker volume ls -q)
+
+# Restart the containers using the following command:
+docker-compose up
+
diff --git a/scripts/docker-compose.yml b/scripts/docker-compose.yml
new file mode 100644
index 000000000..2b0d0c14c
--- /dev/null
+++ b/scripts/docker-compose.yml
@@ -0,0 +1,48 @@
+version: '3'
+services:
+  opensearch-node1: # This is also the hostname of the container within the Docker network (i.e. https://opensearch-node1/)
+    #image: opensearchproject/opensearch:2.11.0
+    image: opensearch-zscore-test:2.11.0
+    container_name: opensearch-node1
+    environment:
+      - cluster.name=opensearch-cluster # Name the cluster
+      - node.name=opensearch-node1 # Name the node that will run in this container
+      - discovery.seed_hosts=opensearch-node1 # Nodes to look for when discovering the cluster
+      - cluster.initial_cluster_manager_nodes=opensearch-node1 # Nodes eligibile to serve as cluster manager
+      - bootstrap.memory_lock=true # Disable JVM heap memory swapping
+      - plugins.security.disabled=true # Disable security plugin so it's easy to test
+      - "DISABLE_INSTALL_DEMO_CONFIG=true" # Disable security plugin so it's easy to test
+      - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" # Set min and max JVM heap sizes to at least 50% of system RAM
+    ulimits:
+      memlock:
+        soft: -1 # Set memlock to unlimited (no soft or hard limit)
+        hard: -1
+      nofile:
+        soft: 65536 # Maximum number of open files for the opensearch user - set to at least 65536
+        hard: 65536
+    volumes:
+      - opensearch-data1:/usr/share/opensearch/data # Creates volume called opensearch-data1 and mounts it to the container
+    ports:
+      - 9200:9200 # REST API
+      - 9600:9600 # Performance Analyzer
+    networks:
+      - opensearch-net # All of the containers will join the same Docker bridge network
+  opensearch-dashboards:
+    image: opensearchproject/opensearch-dashboards:2.11.0 # Make sure the version of opensearch-dashboards matches the version of opensearch installed on other nodes
+    container_name: opensearch-dashboards
+    ports:
+      - 5601:5601 # Map host port 5601 to container port 5601
+    expose:
+      - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
+    environment:
+      - 'OPENSEARCH_HOSTS=["http://opensearch-node1:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query (Plain HTTP,  security disabled)
+      - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security dashboards plugin in OpenSearch Dashboards
+    networks:
+      - opensearch-net
+
+volumes:
+  opensearch-data1:
+
+networks:
+  opensearch-net:
+
diff --git a/scripts/model_setup_beir.md b/scripts/model_setup_beir.md
new file mode 100644
index 000000000..2e26890a8
--- /dev/null
+++ b/scripts/model_setup_beir.md
@@ -0,0 +1,462 @@
+```
+# First create the model group to use
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "plugins.ml_commons.model_access_control_enabled" : true 
+  }
+}
+
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "plugins.ml_commons.only_run_on_ml_node" : false 
+  }
+}
+
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "plugins.ml_commons.allow_registering_model_via_url" : true 
+  }
+}
+
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "plugins.ml_commons.allow_registering_model_via_local_file" : true 
+  }
+}
+
+# Or everything at the same time
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "plugins.ml_commons.model_access_control_enabled" : true
+    "plugins.ml_commons.only_run_on_ml_node" : false,
+    "plugins.ml_commons.allow_registering_model_via_url" : true,
+    "plugins.ml_commons.allow_registering_model_via_local_file" : true 
+  }
+}
+
+POST /_plugins/_ml/model_groups/_register
+{
+    "name": "test_model_group_public",
+    "description": "This is a public model group",
+    "model_access_mode": "public"
+}
+
+# returned model_group_id = eRS4y4sBRT_C2Oekj20-
+
+# register model
+POST /_plugins/_ml/models/_register
+{
+  "name": "huggingface/sentence-transformers/all-MiniLM-L12-v2",
+  "version": "1.0.1",
+  "model_format": "TORCH_SCRIPT",
+  "model_group_id": "eRS4y4sBRT_C2Oekj20-"
+}
+# returned task_id=exS6y4sBRT_C2OekYW2T
+
+GET /_plugins/_ml/tasks/exS6y4sBRT_C2OekYW2T
+# returned model_id=fBS6y4sBRT_C2Oeka22E
+
+POST /_plugins/_ml/models/fBS6y4sBRT_C2Oeka22E/_load
+# returned task_id=fRS7y4sBRT_C2Oekp20I
+
+GET /_plugins/_ml/tasks/fRS7y4sBRT_C2Oekp20I
+
+PUT _ingest/pipeline/nlp-pipeline
+{
+  "description": "An example neural search pipeline",
+  "processors" : [
+    {
+      "text_embedding": {
+        "model_id": "fBS6y4sBRT_C2Oeka22E",
+        "field_map": {
+           "passage_text": "passage_embedding"
+        }
+      }
+    }
+  ]
+}
+
+DELETE /scifact
+
+PUT /scifact
+{
+    "settings": {
+        "index.knn": true,
+        "default_pipeline": "nlp-pipeline"
+    },
+    "mappings": {
+        "properties": {
+            "passage_embedding": {
+                "type": "knn_vector",
+                "dimension": 384,
+                "method": {
+                    "name":"hnsw",
+                    "engine":"lucene",
+                    "space_type": "l2",
+                    "parameters":{
+                        "m":16,
+                        "ef_construction": 512
+                    }
+                }
+            },
+            "passage_text": { 
+                "type": "text"            
+            },
+            "passage_key": { 
+                "type": "text"            
+            },
+            "passage_title": { 
+                "type": "text"            
+            }
+        }
+    }
+}
+
+PUT /_search/pipeline/norm-minmax-pipeline
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "min_max"
+        },
+        "combination": {
+          "technique": "arithmetic_mean",
+          "parameters": {
+            "weights": [
+              1.0
+            ]
+          }
+        }
+      }
+    }
+  ]
+}
+
+PUT /_search/pipeline/norm-minmax-pipeline-hybrid
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "min_max"
+        },
+        "combination": {
+          "technique": "arithmetic_mean",
+          "parameters": {
+            "weights": [
+              0.4,
+              0.3,
+              0.3
+            ]
+          }
+        }
+      }
+    }
+  ]
+}
+
+GET scifact/_search?search_pipeline=norm-minmax-pipeline
+```
+
+Can also be created outside the console via curl
+```bash
+PORT=50365
+HOST=localhost
+URL="$HOST:$PORT"
+
+curl -XPUT -H "Content-Type: application/json" $URL/_ingest/pipeline/nlp-pipeline -d '
+{
+  "description": "An example neural search pipeline",
+  "processors" : [
+    {
+      "text_embedding": {
+        "model_id": "AXA30IsByAqY8FkWHdIF",
+        "field_map": {
+           "passage_text": "passage_embedding"
+        }
+      }
+    }
+  ]
+}'
+
+curl -XDELETE $URL/$INDEX
+
+curl -XPUT -H "Content-Type: application/json" $URL/scifact -d '
+{
+    "settings": {
+        "index.knn": true,
+        "default_pipeline": "nlp-pipeline"
+    },
+    "mappings": {
+        "properties": {
+            "passage_embedding": {
+                "type": "knn_vector",
+                "dimension": 384,
+                "method": {
+                    "name":"hnsw",
+                    "engine":"lucene",
+                    "space_type": "l2",
+                    "parameters":{
+                        "m":16,
+                        "ef_construction": 512
+                    }
+                }
+            },
+            "passage_text": { 
+                "type": "text"            
+            },
+            "passage_key": { 
+                "type": "text"            
+            },
+            "passage_title": { 
+                "type": "text"            
+            }
+        }
+    }
+}'
+
+curl -XPUT -H "Content-Type: application/json" $URL/_search/pipeline/norm-minmax-pipeline-hybrid -d '
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "min_max"
+        },
+        "combination": {
+          "technique": "arithmetic_mean",
+          "parameters": {
+            "weights": [
+              0.4,
+              0.3,
+              0.3
+            ]
+          }
+        }
+      }
+    }
+  ]
+}'
+
+curl -XPUT -H "Content-Type: application/json" $URL/_search/pipeline/norm-zscore-pipeline-hybrid -d '
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "z_score"
+        },
+        "combination": {
+          "technique": "arithmetic_mean",
+          "parameters": {
+            "weights": [
+              0.4,
+              0.3,
+              0.3
+            ]
+          }
+        }
+      }
+    }
+  ]
+}'
+
+```
+
+To use later with
+```bash
+PORT=50365
+MODEL_ID="AXA30IsByAqY8FkWHdIF"
+pipenv run python test_opensearch.py --dataset=scifact --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index="scifact" --operation=ingest
+pipenv run python test_opensearch.py --dataset=scifact --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index="scifact" --operation=evaluate --method=bm25
+pipenv run python test_opensearch.py --dataset=scifact --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index="scifact" --operation=evaluate --method=neural --pipelines=norm-minmax-pipeline --os_model_id=$MODEL_ID
+pipenv run python test_opensearch.py --dataset=scifact --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index="scifact" --operation=evaluate --method=hybrid --pipelines=norm-minmax-pipeline-hybrid --os_model_id=$MODEL_ID
+pipenv run python test_opensearch.py --dataset=scifact --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index="scifact" --operation=evaluate --method=hybrid --pipelines=norm-zscore-pipeline-hybrid --os_model_id=$MODEL_ID
+```
+
+To follow the Amazon approach
+# Via the opensearch dashboard console
+```
+PUT /_cluster/settings
+{
+  "persistent" : {
+    "plugins.ml_commons.model_access_control_enabled" : true,
+    "plugins.ml_commons.only_run_on_ml_node" : false,
+    "plugins.ml_commons.allow_registering_model_via_url" : true,
+    "plugins.ml_commons.allow_registering_model_via_local_file" : true 
+  }
+}
+
+POST /_plugins/_ml/model_groups/_register
+{
+    "name": "test_model_group_public",
+    "description": "This is a public model group",
+    "model_access_mode": "public"
+}
+
+# returned model_group_id = eRS4y4sBRT_C2Oekj20-
+# register model
+POST /_plugins/_ml/models/_register
+{
+  "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
+  "version": "1.0.1",
+  "model_format": "TORCH_SCRIPT",
+  "model_group_id": "eRS4y4sBRT_C2Oekj20-"
+}
+# returned task_id=exS6y4sBRT_C2OekYW2T
+
+GET /_plugins/_ml/tasks/exS6y4sBRT_C2OekYW2T
+# returned model_id=fBS6y4sBRT_C2Oeka22E
+
+POST /_plugins/_ml/models/fBS6y4sBRT_C2Oeka22E/_load
+# returned task_id=fRS7y4sBRT_C2Oekp20I
+
+GET /_plugins/_ml/tasks/fRS7y4sBRT_C2Oekp20I
+# Wait until successful
+```
+
+# via shell
+```bash
+PORT=9200
+HOST=localhost
+URL="$HOST:$PORT"
+INDEX="quora"
+DATASET="quora"
+MODEL_ID="dLrx6IsB4n5WT8oPiuAq"
+
+curl -XPUT -H "Content-Type: application/json" $URL/_ingest/pipeline/nlp-pipeline -d '
+{
+  "description": "An example neural search pipeline",
+  "processors" : [
+    {
+      "text_embedding": {
+        "model_id": "dLrx6IsB4n5WT8oPiuAq",
+        "field_map": {
+           "passage_text": "passage_embedding"
+        }
+      }
+    }
+  ]
+}'
+
+curl -XDELETE $URL/$INDEX
+
+curl -XPUT -H "Content-Type: application/json" $URL/$INDEX -d '
+{
+    "settings": {
+        "index.knn": true,
+        "default_pipeline": "nlp-pipeline",
+        "number_of_shards": 4
+    },
+    "mappings": {
+        "properties": {
+            "passage_embedding": {
+                "type": "knn_vector",
+                "dimension": 768,
+                "method": {
+                    "name": "hnsw",
+                    "engine": "nmslib",
+                    "space_type": "innerproduct",
+                    "parameters":{}
+                }
+            },
+            "passage_text": {
+                "type": "text"
+            },
+            "title_key": {
+                "type": "text", "analyzer" : "english"
+            },
+            "text_key": {
+                "type": "text", "analyzer" : "english"
+            }
+        }
+    }
+}'
+
+curl -XPUT -H "Content-Type: application/json" $URL/_search/pipeline/norm-minmax-pipeline-hybrid -d '
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "min_max"
+        },
+        "combination": {
+          "technique": "arithmetic_mean"
+        }
+      }
+    }
+  ]
+}'
+
+curl -XPUT -H "Content-Type: application/json" $URL/_search/pipeline/norm-ltwo-pipeline-hybrid -d '
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "l2"
+        },
+        "combination": {
+          "technique": "arithmetic_mean"
+        }
+      }
+    }
+  ]
+}'
+
+curl -XPUT -H "Content-Type: application/json" $URL/_search/pipeline/norm-zscore-pipeline-hybrid -d '
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "z_score"
+        },
+        "combination": {
+          "technique": "arithmetic_mean"
+        }
+      }
+    }
+  ]
+}'
+
+curl -XPUT -H "Content-Type: application/json" $URL/_search/pipeline/norm-zscore--with-negatives-pipeline-hybrid -d '
+{
+  "description": "Post processor for hybrid search",
+  "phase_results_processors": [
+    {
+      "normalization-processor": {
+        "normalization": {
+          "technique": "z_score"
+        },
+        "combination": {
+          "technique": "arithmetic_mean_with_negatives_support"
+        }
+      }
+    }
+  ]
+}'
+
+
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX --operation=ingest
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX  --operation=evaluate --method=bm25 > /tmp/bm25-results.log
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX  --operation=evaluate --method=neural --pipelines=norm-minmax-pipeline-hybrid --os_model_id=$MODEL_ID > /tmp/neural-results.log
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX  --operation=evaluate --method=hybrid --pipelines=norm-minmax-pipeline-hybrid --os_model_id=$MODEL_ID > /tmp/min-max-results.log
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX  --operation=evaluate --method=hybrid --pipelines=norm-ltwo-pipeline-hybrid --os_model_id=$MODEL_ID > /tmp/l2-results.log
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX  --operation=evaluate --method=hybrid --pipelines=norm-zscore-pipeline-hybrid --os_model_id=$MODEL_ID > /tmp/zscore-results.log
+pipenv run python test_opensearch.py --dataset=$DATASET --dataset_url="https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip" --os_host=localhost --os_port=$PORT --os_index=$INDEX  --operation=evaluate --method=hybrid --pipelines=norm-zscore--with-negatives-pipeline-hybrid --os_model_id=$MODEL_ID > /tmp/zscore-with-negatives-results.log
+```
\ No newline at end of file