Adds Javadocs,renames methods, adds UTs for the remove_target_field f…

…unctionality In this commit I added more JavaDocs on methods and the initial class to help developers understand more in depth how the ByField Processor works. I added four more UTs that interact with the sourceMap modifcation feature I also implemented the feedback given regarding naming and the uneeded singular parameter in the RerankProcessorFactory Signed-off-by: Brian Flores <[email protected]>
opensearch-project · Oct 15, 2024 · abb0101 · abb0101
1 parent 7cbcec9
commit abb0101
Show file tree

Hide file tree

Showing 4 changed files with 385 additions and 58 deletions.
diff --git a/src/main/java/org/opensearch/neuralsearch/processor/factory/RerankProcessorFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/factory/RerankProcessorFactory.java
@@ -42,11 +42,6 @@ public class RerankProcessorFactory implements Processor.Factory<SearchResponseP
     private final MLCommonsClientAccessor clientAccessor;
     private final ClusterService clusterService;
 
-    public RerankProcessorFactory(ClusterService clusterService) {
-        this.clusterService = clusterService;
-        this.clientAccessor = null;
-    }
-
     @Override
     public SearchResponseProcessor create(
         final Map<String, Processor.Factory<SearchResponseProcessor>> processorFactories,
@@ -59,6 +54,8 @@ public SearchResponseProcessor create(
         RerankType type = findRerankType(config);
         boolean includeQueryContextFetcher = ContextFetcherFactory.shouldIncludeQueryContextFetcher(type);
 
+        // Currently the createFetchers method requires that you provide a context map, this branch makes sure we can ignore this on
+        // processors that don't need the context map
         List<ContextSourceFetcher> contextFetchers = processorRequiresContext(type)
             ? ContextFetcherFactory.createFetchers(config, includeQueryContextFetcher, tag, clusterService)
             : Collections.emptyList();
@@ -133,9 +130,8 @@ public static boolean shouldIncludeQueryContextFetcher(RerankType type) {
 
         /**
          * Create necessary queryContextFetchers for this processor
-         *
-         * @param config                     processor config object. Look for "context" field to find fetchers
-         * @param includeQueryContextFetcher should I include the queryContextFetcher?
+         * @param config Processor config object. Look for "context" field to find fetchers
+         * @param includeQueryContextFetcher Should I include the queryContextFetcher?
          * @return list of contextFetchers for the processor to use
          */
         public static List<ContextSourceFetcher> createFetchers(

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/rerank/ByFieldRerankProcessor.java
@@ -20,6 +20,42 @@
 import java.util.Map;
 import java.util.Optional;
 
+/**
+ * A reranking processor that reorders search results based on the content of a specified field.
+ * <p>
+ * The ByFieldRerankProcessor extends the RescoringRerankProcessor to provide field-based reranking
+ * capabilities. It allows for reordering of search results by considering the content of a
+ * designated target field within each document.
+ * <p>
+ * Key features:
+ * <ul>
+ *   <li>Reranks search results based on a specified target field</li>
+ *   <li>Optionally removes the target field from the final search results</li>
+ *   <li>Supports nested field structures using dot notation</li>
+ * </ul>
+ * <p>
+ * The processor uses the following configuration parameters:
+ * <ul>
+ *   <li>{@code target_field}: The field to be used for reranking (required)</li>
+ *   <li>{@code remove_target_field}: Whether to remove the target field from the final results (optional, default: false)</li>
+ * </ul>
+ * <p>
+ * Usage example:
+ * <pre>
+ * {
+ *   "rerank": {
+ *     "by_field": {
+ *       "target_field": "document.relevance_score",
+ *       "remove_target_field": true
+ *     }
+ *   }
+ * }
+ * </pre>
+ * <p>
+ * This processor is particularly useful in scenarios where additional, document-specific
+ * information stored in a field can be used to improve the relevance of search results
+ * beyond the initial scoring.
+ */
 public class ByFieldRerankProcessor extends RescoringRerankProcessor {
 
     public static final String TARGET_FIELD = "target_field";
@@ -29,14 +65,16 @@ public class ByFieldRerankProcessor extends RescoringRerankProcessor {
     protected final boolean removeTargetField;
 
     /**
-     * Constructor. pass through to RerankProcessor constructor.
+     * Constructor to pass values to the RerankProcessor constructor.
+     *
+     * @param description           The description of the processor
+     * @param tag                   The processor's identifier
+     * @param ignoreFailure         If true, OpenSearch ignores any failure of this processor and
+     *                              continues to run the remaining processors in the search pipeline.
      *
-     * @param description
-     * @param tag
-     * @param ignoreFailure
-     * @param targetField           the field you want to replace your score with
-     * @param removeTargetField
-     * @param contextSourceFetchers
+     * @param targetField           The field you want to replace your <code>_score</code> with
+     * @param removeTargetField     A flag to let you delete the target_field for better visualization (i.e. removes a duplicate value)
+     * @param contextSourceFetchers  Context from some source and puts it in a map for a reranking processor to use <b> (Unused in ByFieldRerankProcessor)</b>
      */
     public ByFieldRerankProcessor(
         String description,
@@ -55,30 +93,31 @@ public ByFieldRerankProcessor(
     public void rescoreSearchResponse(SearchResponse response, Map<String, Object> rerankingContext, ActionListener<List<Float>> listener) {
         SearchHit[] searchHits = response.getHits().getHits();
 
-        if (!searchHitsHaveValidForm(searchHits, listener)) {
+        if (!validateSearchHits(searchHits, listener)) {
             return;
         }
 
         List<Float> scores = new ArrayList<>(searchHits.length);
 
         for (SearchHit hit : searchHits) {
-            Tuple<? extends MediaType, Map<String, Object>> typeAndSourceMap = getMapTuple(hit);
-            Map<String, Object> sourceAsMap = typeAndSourceMap.v2();
+            Tuple<? extends MediaType, Map<String, Object>> mediaTypeAndSourceMapTuple = getMediaTypeAndSourceMapTuple(hit);
+            Map<String, Object> sourceAsMap = mediaTypeAndSourceMapTuple.v2();
 
-            Object val = getValueFromMap(sourceAsMap, targetField).get();
+            Object val = getValueFromSource(sourceAsMap, targetField).get();
             scores.add(((Number) val).floatValue());
 
             sourceAsMap.put("previous_score", hit.getScore());
             if (removeTargetField) {
-                removeTargetFieldFromMap(sourceAsMap);
+                removeTargetFieldFromSource(sourceAsMap);
             }
 
             try {
-                XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent());
+                XContentBuilder builder = XContentBuilder.builder(mediaTypeAndSourceMapTuple.v1().xContent());
                 builder.map(sourceAsMap);
                 hit.sourceRef(BytesReference.bytes(builder));
             } catch (IOException e) {
-                throw new RuntimeException(e);
+                listener.onFailure(new RuntimeException(e));
+                return;
             }
         }
 
@@ -90,11 +129,11 @@ public void rescoreSearchResponse(SearchResponse response, Map<String, Object> r
      * to remove. It is implemented recursively to delete empty maps as a result of removing the
      * targetField
      * <hr>
-     * <b>This method assumes that the path to the mapping exists as checked by {@link #searchHitsHaveValidForm(SearchHit[], ActionListener)}</b>
+     * <b>This method assumes that the path to the mapping exists as checked by {@link #validateSearchHits(SearchHit[], ActionListener)}</b>
      * As such no error cehcking is done in the methods implementing this functionality
      * @param sourceAsMap the map of maps that contains the <code>targetField</code>
      */
-    private void removeTargetFieldFromMap(Map<String, Object> sourceAsMap) {
+    private void removeTargetFieldFromSource(Map<String, Object> sourceAsMap) {
         String[] keys = targetField.split("\\.");
         exploreMapAndRemove(sourceAsMap, keys, 0);
     }
@@ -105,7 +144,7 @@ private void removeTargetFieldFromMap(Map<String, Object> sourceAsMap) {
      * be deleted. The consequence of this, is having to delete all subsequent empty maps , this is
      * accounted for by the last check to see that the mapping should be removed.
      * <hr>
-     * <b>This method assumes that the path to the mapping exists as checked by {@link #searchHitsHaveValidForm(SearchHit[], ActionListener)}</b>
+     * <b>This method assumes that the path to the mapping exists as checked by {@link #validateSearchHits(SearchHit[], ActionListener)}</b>
      * As such no error cehcking is done in the methods implementing this functionality
      * @param sourceAsMap the map of maps that contains the <code>targetField</code>
      * @param keys The keys used to traverse the nested map
@@ -129,7 +168,21 @@ private void exploreMapAndRemove(Map<String, Object> sourceAsMap, String[] keys,
         }
     }
 
-    private boolean searchHitsHaveValidForm(SearchHit[] searchHits, ActionListener<List<Float>> listener) {
+    /**
+     * This is the preflight check for the ByField ReRank Processor. It checks that
+     * every Search Hit in the array from a given search Response has all the following
+     * for each SearchHit
+     * <ul>
+     *     <li>Has a <code>_source</code> mapping</li>
+     *     <li>Has a valid mapping for <code>target_field</code></li>
+     *     <li>That value for the mapping is a valid number</li>
+     * </ul>
+     * When just one of the conditions fail the exception will be thrown to the listener.
+     * @param searchHits from the ByField ReRank Processor
+     * @param listener returns an error to the listener in case on of the conditions fail
+     * @return The status indicating that the SearchHits are in correct form to perform the Rerank
+     */
+    private boolean validateSearchHits(SearchHit[] searchHits, ActionListener<List<Float>> listener) {
         for (int i = 0; i < searchHits.length; i++) {
             SearchHit hit = searchHits[i];
 
@@ -140,15 +193,15 @@ private boolean searchHitsHaveValidForm(SearchHit[] searchHits, ActionListener<L
                 return false;
             }
 
-            Map<String, Object> sourceMap = getMapTuple(hit).v2();
-            if (!containsMapping(sourceMap, targetField)) {
+            Map<String, Object> sourceMap = getMediaTypeAndSourceMapTuple(hit).v2();
+            if (!mappingExistsInSource(sourceMap, targetField)) {
                 listener.onFailure(
                     new IllegalArgumentException("The field to rerank [" + targetField + "] is not found at hit [" + i + "]")
                 );
                 return false;
             }
 
-            Optional<Object> val = getValueFromMap(sourceMap, targetField);
+            Optional<Object> val = getValueFromSource(sourceMap, targetField);
             if (val.isEmpty()) {
                 listener.onFailure(
                     new IllegalArgumentException("The field to rerank [" + targetField + "] is found to be null at hit [" + i + "]")
@@ -172,13 +225,13 @@ private boolean searchHitsHaveValidForm(SearchHit[] searchHits, ActionListener<L
      * When the targetField has the form (key[.key]) it will iterate through
      * the map to see if a mapping exists.
      *
-     * @param map         the map you want to iterate through
-     * @param pathToValue the path to take to get the desired mapping
+     * @param sourceAsMap The Source map (a map of maps) to iterate through
+     * @param pathToValue The path to take to get the desired mapping
      * @return A possible result within an optional
      */
-    private Optional<Object> getValueFromMap(Map<String, Object> map, String pathToValue) {
+    private Optional<Object> getValueFromSource(Map<String, Object> sourceAsMap, String pathToValue) {
         String[] keys = pathToValue.split("\\.");
-        Optional<Object> currentValue = Optional.of(map);
+        Optional<Object> currentValue = Optional.of(sourceAsMap);
 
         for (String key : keys) {
             currentValue = currentValue.flatMap(value -> {
@@ -194,18 +247,25 @@ private Optional<Object> getValueFromMap(Map<String, Object> map, String pathToV
         return currentValue;
     }
 
-    private boolean containsMapping(Map<String, Object> map, String pathToValue) {
-        return getValueFromMap(map, pathToValue).isPresent();
+    /**
+     * Determines whether there exists a value that has a mapping according to the pathToValue. This is particularly
+     * useful when the source map is a map of maps and when the pathToValue is of the form key[.key]
+     * @param sourceAsMap the source field converted to a map
+     * @param pathToValue A string of the form key[.key] indicating what keys to apply to the sourceMap
+     * @return Whether the mapping using the pathToValue exists
+     */
+    private boolean mappingExistsInSource(Map<String, Object> sourceAsMap, String pathToValue) {
+        return getValueFromSource(sourceAsMap, pathToValue).isPresent();
     }
 
     /**
      * This helper method is used to retrieve the <code>_source</code> mapping (via v2()) and
-     * any metadata associated in this mapping (via v2()).
+     * any metadata associated in this mapping (via v1()).
      *
      * @param hit The searchHit that is expected to have a <code>_source</code> mapping
-     * @return Object that contains metadata on the mapping v1() and the actual contents v2()
+     * @return Object that contains metadata (MediaType) on the mapping v1() and the actual contents (sourceMap) v2()
      */
-    private static Tuple<? extends MediaType, Map<String, Object>> getMapTuple(SearchHit hit) {
+    private static Tuple<? extends MediaType, Map<String, Object>> getMediaTypeAndSourceMapTuple(SearchHit hit) {
         BytesReference sourceRef = hit.getSourceRef();
         return XContentHelper.convertToMap(sourceRef, false, (MediaType) null);
     }

diff --git a/src/main/java/org/opensearch/neuralsearch/processor/rerank/RerankProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/rerank/RerankProcessor.java
@@ -109,6 +109,17 @@ public void processResponseAsync(
         }
     }
 
+    /**
+     * There are scenarios where ranking occurs without needing context. Currently, these are the processors don't require
+     * the context mapping
+     * <ul>
+     *     <li>
+     *         ByFieldRerankProcessor - Uses the search response to get value to rescore by
+     *     </li>
+     * </ul>
+     * @param subType The kind of rerank processor
+     * @return Whether a rerank subtype needs context to perform the rescore search response action.
+     */
     public static boolean processorRequiresContext(RerankType subType) {
         return !processorsWithNoContext.contains(subType);
     }