Skip to content

Commit

Permalink
Adds Javadocs,renames methods, adds UTs for the remove_target_field f…
Browse files Browse the repository at this point in the history
…unctionality

In this commit I added more JavaDocs on methods and the initial class to help developers understand more in depth how the ByField Processor works. I added four more UTs that interact with the sourceMap modifcation feature I also implemented the feedback given regarding naming and the uneeded singular parameter in the RerankProcessorFactory

Signed-off-by: Brian Flores <[email protected]>
  • Loading branch information
brianf-aws committed Oct 15, 2024
1 parent 7cbcec9 commit abb0101
Show file tree
Hide file tree
Showing 4 changed files with 385 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ public class RerankProcessorFactory implements Processor.Factory<SearchResponseP
private final MLCommonsClientAccessor clientAccessor;
private final ClusterService clusterService;

public RerankProcessorFactory(ClusterService clusterService) {
this.clusterService = clusterService;
this.clientAccessor = null;
}

@Override
public SearchResponseProcessor create(
final Map<String, Processor.Factory<SearchResponseProcessor>> processorFactories,
Expand All @@ -59,6 +54,8 @@ public SearchResponseProcessor create(
RerankType type = findRerankType(config);
boolean includeQueryContextFetcher = ContextFetcherFactory.shouldIncludeQueryContextFetcher(type);

// Currently the createFetchers method requires that you provide a context map, this branch makes sure we can ignore this on
// processors that don't need the context map
List<ContextSourceFetcher> contextFetchers = processorRequiresContext(type)
? ContextFetcherFactory.createFetchers(config, includeQueryContextFetcher, tag, clusterService)
: Collections.emptyList();
Expand Down Expand Up @@ -133,9 +130,8 @@ public static boolean shouldIncludeQueryContextFetcher(RerankType type) {

/**
* Create necessary queryContextFetchers for this processor
*
* @param config processor config object. Look for "context" field to find fetchers
* @param includeQueryContextFetcher should I include the queryContextFetcher?
* @param config Processor config object. Look for "context" field to find fetchers
* @param includeQueryContextFetcher Should I include the queryContextFetcher?
* @return list of contextFetchers for the processor to use
*/
public static List<ContextSourceFetcher> createFetchers(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,42 @@
import java.util.Map;
import java.util.Optional;

/**
* A reranking processor that reorders search results based on the content of a specified field.
* <p>
* The ByFieldRerankProcessor extends the RescoringRerankProcessor to provide field-based reranking
* capabilities. It allows for reordering of search results by considering the content of a
* designated target field within each document.
* <p>
* Key features:
* <ul>
* <li>Reranks search results based on a specified target field</li>
* <li>Optionally removes the target field from the final search results</li>
* <li>Supports nested field structures using dot notation</li>
* </ul>
* <p>
* The processor uses the following configuration parameters:
* <ul>
* <li>{@code target_field}: The field to be used for reranking (required)</li>
* <li>{@code remove_target_field}: Whether to remove the target field from the final results (optional, default: false)</li>
* </ul>
* <p>
* Usage example:
* <pre>
* {
* "rerank": {
* "by_field": {
* "target_field": "document.relevance_score",
* "remove_target_field": true
* }
* }
* }
* </pre>
* <p>
* This processor is particularly useful in scenarios where additional, document-specific
* information stored in a field can be used to improve the relevance of search results
* beyond the initial scoring.
*/
public class ByFieldRerankProcessor extends RescoringRerankProcessor {

public static final String TARGET_FIELD = "target_field";
Expand All @@ -29,14 +65,16 @@ public class ByFieldRerankProcessor extends RescoringRerankProcessor {
protected final boolean removeTargetField;

/**
* Constructor. pass through to RerankProcessor constructor.
* Constructor to pass values to the RerankProcessor constructor.
*
* @param description The description of the processor
* @param tag The processor's identifier
* @param ignoreFailure If true, OpenSearch ignores any failure of this processor and
* continues to run the remaining processors in the search pipeline.
*
* @param description
* @param tag
* @param ignoreFailure
* @param targetField the field you want to replace your score with
* @param removeTargetField
* @param contextSourceFetchers
* @param targetField The field you want to replace your <code>_score</code> with
* @param removeTargetField A flag to let you delete the target_field for better visualization (i.e. removes a duplicate value)
* @param contextSourceFetchers Context from some source and puts it in a map for a reranking processor to use <b> (Unused in ByFieldRerankProcessor)</b>
*/
public ByFieldRerankProcessor(
String description,
Expand All @@ -55,30 +93,31 @@ public ByFieldRerankProcessor(
public void rescoreSearchResponse(SearchResponse response, Map<String, Object> rerankingContext, ActionListener<List<Float>> listener) {
SearchHit[] searchHits = response.getHits().getHits();

if (!searchHitsHaveValidForm(searchHits, listener)) {
if (!validateSearchHits(searchHits, listener)) {
return;
}

List<Float> scores = new ArrayList<>(searchHits.length);

for (SearchHit hit : searchHits) {
Tuple<? extends MediaType, Map<String, Object>> typeAndSourceMap = getMapTuple(hit);
Map<String, Object> sourceAsMap = typeAndSourceMap.v2();
Tuple<? extends MediaType, Map<String, Object>> mediaTypeAndSourceMapTuple = getMediaTypeAndSourceMapTuple(hit);
Map<String, Object> sourceAsMap = mediaTypeAndSourceMapTuple.v2();

Object val = getValueFromMap(sourceAsMap, targetField).get();
Object val = getValueFromSource(sourceAsMap, targetField).get();
scores.add(((Number) val).floatValue());

sourceAsMap.put("previous_score", hit.getScore());
if (removeTargetField) {
removeTargetFieldFromMap(sourceAsMap);
removeTargetFieldFromSource(sourceAsMap);
}

try {
XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent());
XContentBuilder builder = XContentBuilder.builder(mediaTypeAndSourceMapTuple.v1().xContent());
builder.map(sourceAsMap);
hit.sourceRef(BytesReference.bytes(builder));
} catch (IOException e) {
throw new RuntimeException(e);
listener.onFailure(new RuntimeException(e));
return;
}
}

Expand All @@ -90,11 +129,11 @@ public void rescoreSearchResponse(SearchResponse response, Map<String, Object> r
* to remove. It is implemented recursively to delete empty maps as a result of removing the
* targetField
* <hr>
* <b>This method assumes that the path to the mapping exists as checked by {@link #searchHitsHaveValidForm(SearchHit[], ActionListener)}</b>
* <b>This method assumes that the path to the mapping exists as checked by {@link #validateSearchHits(SearchHit[], ActionListener)}</b>
* As such no error cehcking is done in the methods implementing this functionality
* @param sourceAsMap the map of maps that contains the <code>targetField</code>
*/
private void removeTargetFieldFromMap(Map<String, Object> sourceAsMap) {
private void removeTargetFieldFromSource(Map<String, Object> sourceAsMap) {
String[] keys = targetField.split("\\.");
exploreMapAndRemove(sourceAsMap, keys, 0);
}
Expand All @@ -105,7 +144,7 @@ private void removeTargetFieldFromMap(Map<String, Object> sourceAsMap) {
* be deleted. The consequence of this, is having to delete all subsequent empty maps , this is
* accounted for by the last check to see that the mapping should be removed.
* <hr>
* <b>This method assumes that the path to the mapping exists as checked by {@link #searchHitsHaveValidForm(SearchHit[], ActionListener)}</b>
* <b>This method assumes that the path to the mapping exists as checked by {@link #validateSearchHits(SearchHit[], ActionListener)}</b>
* As such no error cehcking is done in the methods implementing this functionality
* @param sourceAsMap the map of maps that contains the <code>targetField</code>
* @param keys The keys used to traverse the nested map
Expand All @@ -129,7 +168,21 @@ private void exploreMapAndRemove(Map<String, Object> sourceAsMap, String[] keys,
}
}

private boolean searchHitsHaveValidForm(SearchHit[] searchHits, ActionListener<List<Float>> listener) {
/**
* This is the preflight check for the ByField ReRank Processor. It checks that
* every Search Hit in the array from a given search Response has all the following
* for each SearchHit
* <ul>
* <li>Has a <code>_source</code> mapping</li>
* <li>Has a valid mapping for <code>target_field</code></li>
* <li>That value for the mapping is a valid number</li>
* </ul>
* When just one of the conditions fail the exception will be thrown to the listener.
* @param searchHits from the ByField ReRank Processor
* @param listener returns an error to the listener in case on of the conditions fail
* @return The status indicating that the SearchHits are in correct form to perform the Rerank
*/
private boolean validateSearchHits(SearchHit[] searchHits, ActionListener<List<Float>> listener) {
for (int i = 0; i < searchHits.length; i++) {
SearchHit hit = searchHits[i];

Expand All @@ -140,15 +193,15 @@ private boolean searchHitsHaveValidForm(SearchHit[] searchHits, ActionListener<L
return false;
}

Map<String, Object> sourceMap = getMapTuple(hit).v2();
if (!containsMapping(sourceMap, targetField)) {
Map<String, Object> sourceMap = getMediaTypeAndSourceMapTuple(hit).v2();
if (!mappingExistsInSource(sourceMap, targetField)) {
listener.onFailure(
new IllegalArgumentException("The field to rerank [" + targetField + "] is not found at hit [" + i + "]")
);
return false;
}

Optional<Object> val = getValueFromMap(sourceMap, targetField);
Optional<Object> val = getValueFromSource(sourceMap, targetField);
if (val.isEmpty()) {
listener.onFailure(
new IllegalArgumentException("The field to rerank [" + targetField + "] is found to be null at hit [" + i + "]")
Expand All @@ -172,13 +225,13 @@ private boolean searchHitsHaveValidForm(SearchHit[] searchHits, ActionListener<L
* When the targetField has the form (key[.key]) it will iterate through
* the map to see if a mapping exists.
*
* @param map the map you want to iterate through
* @param pathToValue the path to take to get the desired mapping
* @param sourceAsMap The Source map (a map of maps) to iterate through
* @param pathToValue The path to take to get the desired mapping
* @return A possible result within an optional
*/
private Optional<Object> getValueFromMap(Map<String, Object> map, String pathToValue) {
private Optional<Object> getValueFromSource(Map<String, Object> sourceAsMap, String pathToValue) {
String[] keys = pathToValue.split("\\.");
Optional<Object> currentValue = Optional.of(map);
Optional<Object> currentValue = Optional.of(sourceAsMap);

for (String key : keys) {
currentValue = currentValue.flatMap(value -> {
Expand All @@ -194,18 +247,25 @@ private Optional<Object> getValueFromMap(Map<String, Object> map, String pathToV
return currentValue;
}

private boolean containsMapping(Map<String, Object> map, String pathToValue) {
return getValueFromMap(map, pathToValue).isPresent();
/**
* Determines whether there exists a value that has a mapping according to the pathToValue. This is particularly
* useful when the source map is a map of maps and when the pathToValue is of the form key[.key]
* @param sourceAsMap the source field converted to a map
* @param pathToValue A string of the form key[.key] indicating what keys to apply to the sourceMap
* @return Whether the mapping using the pathToValue exists
*/
private boolean mappingExistsInSource(Map<String, Object> sourceAsMap, String pathToValue) {
return getValueFromSource(sourceAsMap, pathToValue).isPresent();
}

/**
* This helper method is used to retrieve the <code>_source</code> mapping (via v2()) and
* any metadata associated in this mapping (via v2()).
* any metadata associated in this mapping (via v1()).
*
* @param hit The searchHit that is expected to have a <code>_source</code> mapping
* @return Object that contains metadata on the mapping v1() and the actual contents v2()
* @return Object that contains metadata (MediaType) on the mapping v1() and the actual contents (sourceMap) v2()
*/
private static Tuple<? extends MediaType, Map<String, Object>> getMapTuple(SearchHit hit) {
private static Tuple<? extends MediaType, Map<String, Object>> getMediaTypeAndSourceMapTuple(SearchHit hit) {
BytesReference sourceRef = hit.getSourceRef();
return XContentHelper.convertToMap(sourceRef, false, (MediaType) null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ public void processResponseAsync(
}
}

/**
* There are scenarios where ranking occurs without needing context. Currently, these are the processors don't require
* the context mapping
* <ul>
* <li>
* ByFieldRerankProcessor - Uses the search response to get value to rescore by
* </li>
* </ul>
* @param subType The kind of rerank processor
* @return Whether a rerank subtype needs context to perform the rescore search response action.
*/
public static boolean processorRequiresContext(RerankType subType) {
return !processorsWithNoContext.contains(subType);
}
Expand Down
Loading

0 comments on commit abb0101

Please sign in to comment.