From 2827393d64a099302bdca62c7ef000618ae65c0a Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 20 Aug 2019 14:45:33 +0100 Subject: [PATCH 1/3] Primary AAE and extended tick Tick extended due to overlap with core vnode_inactivity_timeout. Future overlap is possible, but is flagged in configuration comment. AAE only acts between primary vnodes, but this can be reverted to acting between fallbacks by switching the flag to off. --- priv/riak_kv.schema | 18 ++++++++++++++++-- src/riak_kv_vnode.erl | 17 ++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/priv/riak_kv.schema b/priv/riak_kv.schema index 05c0a87604..de5b7f1c08 100644 --- a/priv/riak_kv.schema +++ b/priv/riak_kv.schema @@ -79,10 +79,14 @@ %% @doc Frequency to prompt exchange per vnode %% The number of milliseconds which the vnode must wait between self-pokes to -%% maybe prompt the next exchange. Default is 2 minutes. +%% maybe prompt the next exchange. Default is 2 minutes 30 seconds. +%% Note if this is to be reduced below this value the riak_core +%% vnode_inactivity_timeout should also be reduced or handoffs may be +%% blocked. To be safe the vnode_inactivity_timeout must be < 0.5 * the +%% tictacaae_exchangetick. {mapping, "tictacaae_exchangetick", "riak_kv.tictacaae_exchangetick", [ {datatype, integer}, - {default, 120000}, + {default, 150000}, hidden ]}. @@ -95,6 +99,16 @@ hidden ]}. +%% @doc Exchange only between primary vnodes +%% Setting this to false allows Tictac AAE exchanges between both primary and +%% fallback vnodes. +{mapping, "tictacaae_primaryonly", "riak_kv.tictacaae_primaryonly", [ + {datatype, flag}, + {default, on}, + hidden +]}. + + %% @doc Pool Strategy - should a single node_worker_pool or multiple pools be %% used for queueing potentially longer-running "background" queries {mapping, "worker_pool_strategy", "riak_kv.worker_pool_strategy", [ diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 52011b953d..27c420a132 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -1115,7 +1115,22 @@ handle_command(tictacaae_exchangepoke, _Sender, State) -> tictac_deltacount = 0, tictac_startqueue = Now}}; [{Local, Remote, {DocIdx, N}}|Rest] -> - PL = riak_core_apl:get_apl(<<(DocIdx-1):160/integer>>, N, riak_kv), + PrimaryOnly = + app_helper:get_env(riak_kv, tictacaae_primaryonly, true), + % By default TictacAAE exchanges are run only between primary + % vnodes, and not between fallback and vnodes. Changing this + % to false will allow fallback vnodes to be populated via AAE, + % increasing the workload during failure scenarios, but also + % reducing the potential for entropy in long-term failures + PlLup = <<(DocIdx-1):160/integer>>, + PL = + case PrimaryOnly of + true -> + PL0 = riak_core_apl:get_primary_apl(PlLup, N, riak_kv), + [{PIdx, PN} || {{PIdx, PN}, primary} <- PL0]; + false -> + riak_core_apl:get_apl(PlLup, N, riak_kv) + end, case {lists:keyfind(Local, 1, PL), lists:keyfind(Remote, 1, PL)} of {{Local, LN}, {Remote, RN}} -> IndexN = {DocIdx, N}, From 8610ac6d27633bbcfe4e508c5d47958a76db23d2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Wed, 21 Aug 2019 10:29:34 +0100 Subject: [PATCH 2/3] Make log more helpful to operator --- src/riak_kv_vnode.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 27c420a132..c74d40dc25 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -1149,7 +1149,8 @@ handle_command(tictacaae_exchangepoke, _Sender, State) -> _ -> lager:warning("Proposed exchange between ~w and ~w " ++ "not currently supported within " ++ - "preflist for IndexN=~w", + "preflist for IndexN=~w possibly due to " ++ + "node failure", [Local, Remote, {DocIdx, N}]) end, {noreply, State#state{tictac_exchangequeue = Rest}} From 2edf0c2a59bc8dce10615d5c241ea52bff5a5ed3 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 23 Aug 2019 11:17:32 +0100 Subject: [PATCH 3/3] Clarify comments --- src/riak_kv_vnode.erl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index c74d40dc25..800e084e49 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -291,8 +291,10 @@ maybe_create_hashtrees(true, State=#state{idx=Index, upgrade_hashtree=Upgrade, -spec maybe_start_aaecontroller(active|passive, state()) -> state(). %% @doc -%% Start an AAE controller if riak_kv has been consfigured to use cached -%% tictac tree based AAE +%% Start an AAE controller if riak_kv has been configured to use cached +%% tictac tree based AAE. Note that a controller will always start, and +%% receive updates, even if the vnode is not a primary (and will not be +%% involved in exchanges). maybe_start_aaecontroller(passive, State) -> State#state{tictac_aae=false, aae_controller=undefined}; maybe_start_aaecontroller(active, State=#state{mod=Mod, @@ -1118,10 +1120,10 @@ handle_command(tictacaae_exchangepoke, _Sender, State) -> PrimaryOnly = app_helper:get_env(riak_kv, tictacaae_primaryonly, true), % By default TictacAAE exchanges are run only between primary - % vnodes, and not between fallback and vnodes. Changing this + % vnodes, and not between fallback vnodes. Changing this % to false will allow fallback vnodes to be populated via AAE, % increasing the workload during failure scenarios, but also - % reducing the potential for entropy in long-term failures + % reducing the potential for entropy in long-term failures. PlLup = <<(DocIdx-1):160/integer>>, PL = case PrimaryOnly of