-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Correct service time parameter in ARS formula #70283
Changes from all commits
71e01c6
15e952c
fd5be9b
70c00bc
1b245e7
5f3d827
5dafd98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.search.routing; | ||
|
||
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; | ||
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse; | ||
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; | ||
import org.elasticsearch.action.search.SearchResponse; | ||
import org.elasticsearch.client.Client; | ||
import org.elasticsearch.cluster.node.DiscoveryNode; | ||
import org.elasticsearch.cluster.routing.OperationRouting; | ||
import org.elasticsearch.common.collect.ImmutableOpenMap; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.node.ResponseCollectorService.ComputedNodeStats; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
|
||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; | ||
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; | ||
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; | ||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
@ESIntegTestCase.ClusterScope(numClientNodes = 1, numDataNodes = 3) | ||
public class SearchReplicaSelectionIT extends ESIntegTestCase { | ||
|
||
@Override | ||
public Settings nodeSettings(int nodeOrdinal) { | ||
return Settings.builder().put(super.nodeSettings(nodeOrdinal)) | ||
.put(OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING.getKey(), true).build(); | ||
} | ||
|
||
public void testNodeSelection() { | ||
// We grab a client directly to avoid using a randomizing client that might set a search preference. | ||
Client client = internalCluster().coordOnlyNodeClient(); | ||
|
||
client.admin().indices().prepareCreate("test") | ||
.setSettings(Settings.builder() | ||
.put(SETTING_NUMBER_OF_SHARDS, 1) | ||
.put(SETTING_NUMBER_OF_REPLICAS, 2)) | ||
.get(); | ||
ensureGreen(); | ||
|
||
client.prepareIndex("test").setSource("field", "value").get(); | ||
refresh(); | ||
|
||
// Before we've gathered stats for all nodes, we should try each node once. | ||
Set<String> nodeIds = new HashSet<>(); | ||
SearchResponse searchResponse = client.prepareSearch().setQuery(matchAllQuery()).get(); | ||
assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); | ||
nodeIds.add(searchResponse.getHits().getAt(0).getShard().getNodeId()); | ||
|
||
searchResponse = client.prepareSearch().setQuery(matchAllQuery()).get(); | ||
assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); | ||
nodeIds.add(searchResponse.getHits().getAt(0).getShard().getNodeId()); | ||
|
||
searchResponse = client.prepareSearch().setQuery(matchAllQuery()).get(); | ||
assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); | ||
nodeIds.add(searchResponse.getHits().getAt(0).getShard().getNodeId()); | ||
|
||
assertEquals(3, nodeIds.size()); | ||
|
||
// Now after more searches, we should select the node with the best ARS rank. | ||
for (int i = 0; i < 5; i++) { | ||
client.prepareSearch().setQuery(matchAllQuery()).get(); | ||
} | ||
|
||
ClusterStateResponse clusterStateResponse = client.admin().cluster().prepareState().get(); | ||
ImmutableOpenMap<String, DiscoveryNode> coordinatingNodes = clusterStateResponse.getState().nodes().getCoordinatingOnlyNodes(); | ||
assertEquals(1, coordinatingNodes.size()); | ||
|
||
String coordinatingNodeId = coordinatingNodes.valuesIt().next().getId(); | ||
NodesStatsResponse statsResponse = client.admin().cluster().prepareNodesStats() | ||
.setAdaptiveSelection(true) | ||
.get(); | ||
NodeStats nodeStats = statsResponse.getNodesMap().get(coordinatingNodeId); | ||
assertNotNull(nodeStats); | ||
assertEquals(3, nodeStats.getAdaptiveSelectionStats().getComputedStats().size()); | ||
|
||
String bestNodeId = null; | ||
double bestRank = Double.POSITIVE_INFINITY; | ||
for (Map.Entry<String, ComputedNodeStats> entry : nodeStats.getAdaptiveSelectionStats().getComputedStats().entrySet()) { | ||
double rank = entry.getValue().rank(1L); | ||
if (rank < bestRank) { | ||
bestNodeId = entry.getKey(); | ||
bestRank = rank; | ||
} | ||
} | ||
|
||
searchResponse = client.prepareSearch().setQuery(matchAllQuery()).get(); | ||
assertEquals(bestNodeId, searchResponse.getHits().getAt(0).getShard().getNodeId()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,10 @@ | |
*/ | ||
public final class ResponseCollectorService implements ClusterStateListener { | ||
|
||
private static final double ALPHA = 0.3; | ||
/** | ||
* The weight parameter used for all moving averages of parameters. | ||
*/ | ||
public static final double ALPHA = 0.3; | ||
|
||
private final ConcurrentMap<String, NodeStatistics> nodeIdToStats = ConcurrentCollections.newConcurrentMap(); | ||
|
||
|
@@ -161,12 +164,12 @@ private double innerRank(long outstandingRequests) { | |
|
||
// EWMA of response time | ||
double rS = responseTime / FACTOR; | ||
// EWMA of service time | ||
double muBarS = serviceTime / FACTOR; | ||
// EWMA of service time. We match the paper's notation, which | ||
// defines service time as the inverse of service rate (muBarS). | ||
double muBarSInverse = serviceTime / FACTOR; | ||
|
||
// The final formula | ||
double rank = rS - (1.0 / muBarS) + (Math.pow(qHatS, queueAdjustmentFactor) / muBarS); | ||
return rank; | ||
return rS - muBarSInverse + Math.pow(qHatS, queueAdjustmentFactor) * muBarSInverse; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change looks good. But I wonder if the I wonder if you know the reasoning behind how the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also found this surprising and don't understand the reasoning. I am guessing it is meant as a loose approximation to the number of clients, since by default every node can serve as a coordinating node. Since we were dividing by service time before, this I can see a couple options. We could avoid making changes to In any case, I will make sure we track this through an issue or other means. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My worry is primarily that if we split changes here, we end up with some customers seeing multiple changes to the behavior over releases. On the other hand, my intuition on this matter is not really strong enough that I think it is a show-stopper to merge this first and then deal with the other part later. It seems likely enough that The dedicated role for coordinating nodes might come in handy here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change identified a few aspects of ARS that could use improvement (including the 'stats adjustment' @jimczi mentioned above). So there will likely be more changes even apart from |
||
} | ||
|
||
public double rank(long outstandingRequests) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.action.admin.cluster.node.stats; | ||
|
||
import org.elasticsearch.node.ResponseCollectorService.ComputedNodeStats; | ||
import org.elasticsearch.test.ESTestCase; | ||
|
||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
/** | ||
* Tests the core calculation used for ranking nodes in adaptive replica selection. | ||
*/ | ||
public class ComputedNodeStatsTests extends ESTestCase { | ||
|
||
public void testBasicInvariants() { | ||
// When queue size estimate is 0, the rank should equal response time. | ||
ComputedNodeStats stats = createStats(0, 150, 100); | ||
assertThat(stats.rank(0), equalTo(150.0)); | ||
|
||
stats = createStats(0, 20, 19); | ||
assertThat(stats.rank(0), equalTo(20.0)); | ||
} | ||
|
||
public void testParameterScaling() { | ||
// With non-zero queue size estimate, a larger service time should result in a larger rank. | ||
ComputedNodeStats first = createStats(0, 150, 100); | ||
ComputedNodeStats second = createStats(0, 150, 120); | ||
assertTrue(first.rank(1) < second.rank(1)); | ||
|
||
first = createStats(1, 200, 199); | ||
second = createStats(1, 200, 200); | ||
assertTrue(first.rank(3) < second.rank(3)); | ||
|
||
// A larger response time should always result in a larger rank. | ||
first = createStats(2, 150, 100); | ||
second = createStats(2, 200, 100); | ||
assertTrue(first.rank(1) < second.rank(1)); | ||
|
||
first = createStats(2, 150, 150); | ||
second = createStats(2, 200, 150); | ||
assertTrue(first.rank(1) < second.rank(1)); | ||
|
||
// More queued requests should always result in a larger rank. | ||
first = createStats(2, 150, 100); | ||
second = createStats(3, 150, 100); | ||
assertTrue(first.rank(1) < second.rank(1)); | ||
|
||
// More pending requests should always result in a larger rank. | ||
first = createStats(2, 150, 100); | ||
second = createStats(2, 150, 100); | ||
assertTrue(first.rank(0) < second.rank(1)); | ||
} | ||
|
||
private ComputedNodeStats createStats(int queueSize, int responseTimeMillis, int serviceTimeMillis) { | ||
return new ComputedNodeStats("node0", 5, queueSize, 1_000_000 * responseTimeMillis, 1_000_000 * serviceTimeMillis); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed offline, the "adjustment" seems odd. We should think about updating the node statistics explicitly. That's not in the scope of this PR but that would be a good follow up as discussed offline.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jtibshirani
I'm curious, in
NodeStatistics
:why not
serviceTime
also use typeExponentiallyWeightedMovingAverage
, but adjust it here? It seems the effect is the same.