From 4013d18e0af787da8a0fc4ac8521908358b5db48 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 18 Nov 2021 16:25:39 +0000 Subject: [PATCH 1/5] Update tictac_aae test for prompted repairs --- tests/verify_tictac_aae.erl | 159 ++++++++++++++++++++++++++++++------ 1 file changed, 132 insertions(+), 27 deletions(-) diff --git a/tests/verify_tictac_aae.erl b/tests/verify_tictac_aae.erl index 86dcbc3e2..3f6e868a8 100644 --- a/tests/verify_tictac_aae.erl +++ b/tests/verify_tictac_aae.erl @@ -43,9 +43,9 @@ % I would hope this would come from the testing framework some day % to use the test in small and large scenarios. --define(DEFAULT_RING_SIZE, 8). +-define(DEFAULT_RING_SIZE, 16). -define(AAE_THROTTLE_LIMITS, [{-1, 0}, {100, 10}]). --define(CFG_NOREBUILD(PrimaryOnly, InitialSkip), +-define(CFG_NOREBUILD(PrimaryOnly, InitialSkip, MaxResults, ExTick, KR), [{riak_kv, [ % Speedy AAE configuration @@ -56,10 +56,13 @@ % store {tictacaae_rebuildwait, 4}, {tictacaae_rebuilddelay, 3600}, - {tictacaae_exchangetick, 5 * 1000}, % 5 seconds + {tictacaae_exchangetick, ExTick * 1000}, {tictacaae_rebuildtick, 3600000}, % don't tick for an hour! {tictacaae_primaryonly, PrimaryOnly}, - {tictacaae_stepinitialtick, InitialSkip} + {tictacaae_stepinitialtick, InitialSkip}, + {tictacaae_maxresults, MaxResults}, + {tictacaae_repairloops, 4}, + {tictacaae_enablekeyrange, KR} ]}, {riak_core, [ @@ -81,28 +84,34 @@ {tictacaae_rebuildtick, 15 * 1000}, % Check for rebuilds! {max_aae_queue_time, 0}, {tictacaae_stepinitialtick, false}, - {log_readrepair, true} + {log_readrepair, true}, + {tictacaae_enablekeyrange, true} ]}, {riak_core, [ {ring_creation_size, ?DEFAULT_RING_SIZE} ]}] ). --define(NUM_NODES, 3). --define(NUM_KEYS, 1000). +-define(NUM_NODES, 4). +-define(NUM_KEYS, 2000). -define(BUCKET, <<"test_bucket">>). +-define(ALT_BUCKET1, <<"alt_bucket1">>). +-define(ALT_BUCKET2, <<"alt_bucket2">>). +-define(ALT_BUCKET3, <<"alt_bucket3">>). +-define(ALT_BUCKET4, <<"alt_bucket4">>). -define(N_VAL, 3). +-define(STATS_DELAY, 1000). confirm() -> - lager:info("Test with no rebuilds - and no startup skip"), - Nodes1 = rt:build_cluster(?NUM_NODES, ?CFG_NOREBUILD(true, false)), - ok = verify_aae_norebuild(Nodes1), + lager:info("Test with no rebuilds - and no startup skip and no key ranges"), + Nodes1 = rt:build_cluster(?NUM_NODES, ?CFG_NOREBUILD(true, false, 64, 15, false)), + ok = verify_aae_norebuild(Nodes1, true), rt:clean_cluster(Nodes1), - lager:info("Test with no rebuilds - but with startup skip"), - Nodes2 = rt:build_cluster(?NUM_NODES, ?CFG_NOREBUILD(true, true)), - ok = verify_aae_norebuild(Nodes2), + lager:info("Test with no rebuilds - but with startup skip and key ranges"), + Nodes2 = rt:build_cluster(?NUM_NODES, ?CFG_NOREBUILD(true, true, 64, 10, true)), + ok = verify_aae_norebuild(Nodes2, false), rt:clean_cluster(Nodes2), lager:info("Test with rebuilds"), @@ -111,24 +120,114 @@ confirm() -> rt:clean_cluster(Nodes3), lager:info("Test with no rebuilds - and AAE on fallbacks"), - Nodes4 = rt:build_cluster(?NUM_NODES, ?CFG_NOREBUILD(false, false)), - ok = verify_aae_norebuild(Nodes4), - pass. - + Nodes4 = rt:build_cluster(?NUM_NODES, ?CFG_NOREBUILD(false, false, 128, 10, false)), + ok = verify_aae_norebuild(Nodes4, false), + rt:clean_cluster(Nodes4), + + OldVsn = previous, + lager:info("Building previous version cluster ~p", [OldVsn]), + [Nodes5] = + rt:build_clusters([{?NUM_NODES, OldVsn, ?CFG_NOREBUILD(true, false, 64, 15, false)}]), + + [NodeToUpgrade|_Rest] = Nodes5, + + {riak_kv, _, RiakVer} = + lists:keyfind(riak_kv, + 1, + rpc:call(NodeToUpgrade, application, loaded_applications, [])), + + case RiakVer of + RiakVer when RiakVer >= "riak_kv-3.0.9" -> + lager:info("Skipping upgrade test - previous ~s > 3.0.8", [RiakVer]), + pass; + RiakVer -> + lager:info("Running upgrade test with previous version ~s", [RiakVer]), + rt:upgrade(NodeToUpgrade, current), + rt:wait_for_service(NodeToUpgrade, riak_kv), + + ?assertNot(check_capability(NodeToUpgrade)), + + ok = verify_aae_norebuild(Nodes5, false), + + CheckFun = + fun(StatName) -> + proplists:get_value(StatName, + verify_riak_stats:get_stats(NodeToUpgrade, ?STATS_DELAY)) + end, + ?assertEqual(0, CheckFun(<<"tictacaae_bucket_total">>)), + ?assertEqual(0, CheckFun(<<"tictacaae_modtime_total">>)), + ?assertNotEqual(0, CheckFun(<<"tictacaae_exchange_total">>)), + + pass + end. + + +check_capability(Node) -> + rpc:call(Node, + riak_core_capability, + get, + [{riak_kv, tictacaae_prompted_repairs}, false]). verify_aae_norebuild(Nodes) -> + verify_aae_norebuild(Nodes, false). + +verify_aae_norebuild(Nodes, CheckTypeStats) -> lager:info("Tictac AAE tests without rebuilding trees"), Node1 = hd(Nodes), % Recovery without tree rebuilds % Test recovery from too few replicas written - KV1 = test_data(1, 1000), + KV1 = test_data(1, ?NUM_KEYS), test_less_than_n_writes(Node1, KV1), - % Test recovery when replicas are different + RepSN = <<"read_repairs_total">>, + Repairs = + lists:sum( + lists:map( + fun(N) -> + proplists:get_value(RepSN, + verify_riak_stats:get_stats(N, ?STATS_DELAY)) + end, + Nodes)), + ?assertMatch(?NUM_KEYS, Repairs), + KV2 = [{K, <>} || {K, V} <- KV1], + lager:info("Writing additional n=1 data to require more repairs"), + write_data(Node1, KV2, [{n_val, 1}], ?ALT_BUCKET1), + write_data(Node1, KV2, [{n_val, 1}], ?ALT_BUCKET2), + write_data(Node1, KV2, [{n_val, 1}], ?ALT_BUCKET3), + write_data(Node1, KV2, [{n_val, 1}], ?ALT_BUCKET4), + lager:info("Updating data on n=1"), test_less_than_n_mods(Node1, KV2), + lager:info("Verifying alternative bucket data"), + verify_data(Node1, KV2, ?ALT_BUCKET1), + verify_data(Node1, KV2, ?ALT_BUCKET2), + verify_data(Node1, KV2, ?ALT_BUCKET3), + verify_data(Node1, KV2, ?ALT_BUCKET4), + + case CheckTypeStats of + true -> + B_SN = <<"tictacaae_bucket_total">>, + MT_SN = <<"tictacaae_modtime_total">>, + E_SN = <<"tictacaae_exchange_total">>, + VerifyFun = + fun(StatName) -> + fun(Node) -> + V = proplists:get_value(StatName, + verify_riak_stats:get_stats(Node, ?STATS_DELAY)), + ?assertNotEqual(0, V) + end + end, + + lists:foreach(VerifyFun(B_SN), Nodes), + lists:foreach(VerifyFun(MT_SN), Nodes), + lists:foreach(VerifyFun(E_SN), Nodes), + ok; + false -> + ok + end, + ok. verify_aae_rebuild(Nodes) -> @@ -136,7 +235,7 @@ verify_aae_rebuild(Nodes) -> Node1 = hd(Nodes), % Test recovery from too few replicas written - KV1 = test_data(1, 1000), + KV1 = test_data(1, ?NUM_KEYS), test_less_than_n_writes(Node1, KV1), % Test recovery when replicas are different @@ -144,15 +243,15 @@ verify_aae_rebuild(Nodes) -> test_less_than_n_mods(Node1, KV2), % Test recovery from too few replicas written - KV3 = test_data(1001, 2000), + KV3 = test_data(?NUM_KEYS + 1, 2 * ?NUM_KEYS), test_less_than_n_writes(Node1, KV3), % Test recovery when replicas are different KV4 = [{K, <>} || {K, V} <- KV3], test_less_than_n_mods(Node1, KV4), - lager:info("Writing 1000 objects"), - KV5 = test_data(2001, 3000), + lager:info("Writing ~w objects", [?NUM_KEYS]), + KV5 = test_data(1 + 2 * ?NUM_KEYS, 3 * ?NUM_KEYS), write_data(Node1, KV5), % Test recovery from single partition loss. @@ -188,7 +287,7 @@ get_preflist(Node, B, K) -> Pl. to_key(N) -> - list_to_binary(io_lib:format("K~4..0B", [N])). + list_to_binary(io_lib:format("K~8..0B", [N])). test_data(Start, End) -> Keys = [to_key(N) || N <- lists:seq(Start, End)], @@ -198,14 +297,17 @@ write_data(Node, KVs) -> write_data(Node, KVs, []). write_data(Node, KVs, Opts) -> + write_data(Node, KVs, Opts, ?BUCKET). + +write_data(Node, KVs, Opts, Bucket) -> PB = rt:pbc(Node), [begin O = - case riakc_pb_socket:get(PB, ?BUCKET, K) of + case riakc_pb_socket:get(PB, Bucket, K) of {ok, Prev} -> riakc_obj:update_value(Prev, V); _ -> - riakc_obj:new(?BUCKET, K, V) + riakc_obj:new(Bucket, K, V) end, ?assertMatch(ok, riakc_pb_socket:put(PB, O, Opts)) end || {K, V} <- KVs], @@ -214,11 +316,14 @@ write_data(Node, KVs, Opts) -> % @doc Verifies that the data is eventually restored to the expected set. verify_data(Node, KeyValues) -> + verify_data(Node, KeyValues, ?BUCKET). + +verify_data(Node, KeyValues, Bucket) -> lager:info("Verify all replicas are eventually correct"), PB = rt:pbc(Node), CheckFun = fun() -> - Matches = [verify_replicas(Node, ?BUCKET, K, V, ?N_VAL) + Matches = [verify_replicas(Node, Bucket, K, V, ?N_VAL) || {K, V} <- KeyValues], CountTrues = fun(true, G) -> G+1; (false, G) -> G end, NumGood = lists:foldl(CountTrues, 0, Matches), From a991d7c1b9f841fbc3c8998fe6a019523f3b34b3 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 19 Nov 2021 01:13:18 +0000 Subject: [PATCH 2/5] Create verify_tictac_aae_load.erl --- tests/verify_tictac_aae_load.erl | 219 +++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 tests/verify_tictac_aae_load.erl diff --git a/tests/verify_tictac_aae_load.erl b/tests/verify_tictac_aae_load.erl new file mode 100644 index 000000000..0552da26a --- /dev/null +++ b/tests/verify_tictac_aae_load.erl @@ -0,0 +1,219 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Verification of Active Anti Entropy. +%% The basic guarantee of AAE is this: Even without the read repairs that will +%% happen when data is accessed, inconsistencies between the replicas of a +%% KV object will be repaired eventually. The test tries hard not to +%% explicitly check for when the AAE trees are built or when exchanges are run +%% in an effort to remain decoupled from the implementation. Instead, it +%% simply configures AAE to build/rebuild and run exchanges between the data +%% partitions. It then performs direct vnode reads on all replicas and verify +%% they eventually match. +%% +%% Data recovery after the following scenarios is tested: +%% +%% - Data for a partition completely disappears. +%% - Less than N replicas are written +%% - Less than N replicas are updated +%% +%% Also, a sanity check is done to make sure AAE repairs go away eventually +%% if there is no activity. That was an actual early AAE bug. + +-module(verify_tictac_aae_load). +-export([confirm/0]). +-include_lib("eunit/include/eunit.hrl"). + +% I would hope this would come from the testing framework some day +% to use the test in small and large scenarios. +-define(DEFAULT_RING_SIZE, 8). +-define(EXCHANGE_TICK, 120). +-define(MAX_RESULTS, 128). +-define(REPAIR_LOOPS, 4). +-define(KEY_RANGE, false). + +-define(CFG, + [{riak_kv, + [ + {anti_entropy, {off, []}}, + {tictacaae_active, active}, + {tictacaae_exchangetick, ?EXCHANGE_TICK * 1000}, + {tictacaae_rebuildtick, 3600000}, % don't tick for an hour! + {tictacaae_maxresults, ?MAX_RESULTS}, + {tictacaae_repairloops, ?REPAIR_LOOPS}, + {tictacaae_enablekeyrange, ?KEY_RANGE} + ]}, + {riak_core, + [ + {ring_creation_size, ?DEFAULT_RING_SIZE} + ]}] + ). + +-define(NUM_NODES, 4). +-define(PRELOAD_KEYS_PERBUCKET, 100000). +-define(N1_KEYS_PERBUCKET, 250). +-define(N2_KEYS_SINGLEBUCKET, 1000). +-define(ALT_BUCKET1, <<"alt_bucket1">>). +-define(ALT_BUCKET2, <<"alt_bucket2">>). +-define(ALT_BUCKET3, <<"alt_bucket3">>). +-define(ALT_BUCKET4, <<"alt_bucket4">>). +-define(N_VAL, 3). +-define(STATS_DELAY, 1000). +-define(TEST_VERSION, current). +-define(VERIFY_DELAY, 10000). +-define(MICRO, 1000000). + +confirm() -> + + lager:info("Not to be considered as a functional test"), + lager:info("Useful only for comparing repair performance"), + lager:info("e.g. between current and previous"), + + [Nodes] = rt:build_clusters([{?NUM_NODES, ?TEST_VERSION, ?CFG}]), + ok = verify_aae_defaults(Nodes), + + pass. + + +verify_aae_defaults(Nodes) -> + lager:info("Tictac AAE tests for large load to time recovery"), + Node1 = hd(Nodes), + + % Recovery without tree rebuilds + + % Test recovery from too few replicas written + lager:info("Generating ~w Keys/Values", [?PRELOAD_KEYS_PERBUCKET div 2]), + KV1 = test_data(1, ?PRELOAD_KEYS_PERBUCKET div 2), + write_data(Node1, KV1, [{n_val, 3}], ?ALT_BUCKET1), + write_data(Node1, KV1, [{n_val, 3}], ?ALT_BUCKET2), + write_data(Node1, KV1, [{n_val, 3}], ?ALT_BUCKET3), + write_data(Node1, KV1, [{n_val, 3}], ?ALT_BUCKET4), + + lager:info("Generating ~w Keys/Values", [?PRELOAD_KEYS_PERBUCKET div 2]), + KV2 = + test_data(1 + ?PRELOAD_KEYS_PERBUCKET div 2, ?PRELOAD_KEYS_PERBUCKET), + write_data(Node1, KV2, [{n_val, 3}], ?ALT_BUCKET1), + write_data(Node1, KV2, [{n_val, 3}], ?ALT_BUCKET2), + write_data(Node1, KV2, [{n_val, 3}], ?ALT_BUCKET3), + write_data(Node1, KV2, [{n_val, 3}], ?ALT_BUCKET4), + + SW0 = os:timestamp(), + lager:info("Start to introduce discrepancy"), + + KV3 = test_data(?PRELOAD_KEYS_PERBUCKET + 1, + ?PRELOAD_KEYS_PERBUCKET + ?N1_KEYS_PERBUCKET), + write_data(Node1, KV3, [{n_val, 1}], ?ALT_BUCKET1), + write_data(Node1, KV3, [{n_val, 1}], ?ALT_BUCKET2), + write_data(Node1, KV3, [{n_val, 1}], ?ALT_BUCKET3), + write_data(Node1, KV3, [{n_val, 1}], ?ALT_BUCKET4), + KV4 = test_data(?PRELOAD_KEYS_PERBUCKET + ?N1_KEYS_PERBUCKET + 1, + ?PRELOAD_KEYS_PERBUCKET + ?N1_KEYS_PERBUCKET + ?N2_KEYS_SINGLEBUCKET), + write_data(Node1, KV4, [{n_val, 2}], ?ALT_BUCKET1), + + SW1 = os:timestamp(), + lager:info("Discrepancies written in ~w s", [timer:now_diff(SW1, SW0) div ?MICRO]), + lager:info("Writes completed - attempting verify"), + + verify_data(Node1, KV3 ++ KV4, ?ALT_BUCKET1), + verify_data(Node1, KV3, ?ALT_BUCKET2), + verify_data(Node1, KV3, ?ALT_BUCKET3), + verify_data(Node1, KV3, ?ALT_BUCKET3), + + SW2 = os:timestamp(), + lager:info("Verification complete in ~w s", [timer:now_diff(SW2, SW1) div ?MICRO]), + lager:info("Overall time ~w s", [timer:now_diff(SW2, SW0) div ?MICRO]), + + ok. + + +to_key(N) -> + list_to_binary(io_lib:format("K~9..0B", [N])). + +test_data(Start, End) -> + Keys = [to_key(N) || N <- lists:seq(Start, End)], + [{K, K} || K <- Keys]. + + +write_data(Node, KVs, Opts, Bucket) -> + lager:info("Loading batch of ~w keys to ~s bucket", [length(KVs), Bucket]), + PB = rt:pbc(Node), + [begin + O = + case riakc_pb_socket:get(PB, Bucket, K) of + {ok, Prev} -> + riakc_obj:update_value(Prev, V); + _ -> + riakc_obj:new(Bucket, K, V) + end, + ?assertMatch(ok, riakc_pb_socket:put(PB, O, Opts)) + end || {K, V} <- KVs], + riakc_pb_socket:stop(PB), + ok. + + +verify_data(Node, KeyValues, Bucket) -> + lager:info("Verify all replicas are eventually correct"), + PB = rt:pbc(Node), + CheckFun = + fun() -> + Matches = [verify_replicas(Node, Bucket, K, V, ?N_VAL) + || {K, V} <- KeyValues], + CountTrues = fun(true, G) -> G+1; (false, G) -> G end, + NumGood = lists:foldl(CountTrues, 0, Matches), + Num = length(KeyValues), + case Num == NumGood of + true -> true; + false -> + lager:info("Data not yet correct: ~p mismatches", + [Num-NumGood]), + false + end + end, + MaxTime = rt_config:get(rt_max_wait_time), + Delay = ?VERIFY_DELAY, % every two seconds until max time. + Retry = MaxTime div Delay, + ok = + case rt:wait_until(CheckFun, Retry, Delay) of + ok -> + lager:info("Data is now correct. Yay!"); + fail -> + lager:error("AAE failed to fix data"), + aae_failed_to_fix_data + end, + riakc_pb_socket:stop(PB), + ok. + +merge_values(O) -> + Vals = riak_object:get_values(O), + lists:foldl(fun(NV, V) -> + case size(NV) > size(V) of + true -> NV; + _ -> V + end + end, <<>>, Vals). + +verify_replicas(Node, B, K, V, N) -> + Replies = [rt:get_replica(Node, B, K, I, N) + || I <- lists:seq(1,N)], + Vals = [merge_values(O) || {ok, O} <- Replies], + Expected = [V || _ <- lists:seq(1, N)], + Vals == Expected. + + + From 555e2dba99d8688a9b05678869d6fe3405967d09 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 19 Nov 2021 01:48:04 +0000 Subject: [PATCH 3/5] Update verify_tictac_aae_load.erl Change default settings to load more, and verify less. Change tick to give enough time for test to complete --- tests/verify_tictac_aae_load.erl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/verify_tictac_aae_load.erl b/tests/verify_tictac_aae_load.erl index 0552da26a..5e7de7fee 100644 --- a/tests/verify_tictac_aae_load.erl +++ b/tests/verify_tictac_aae_load.erl @@ -40,13 +40,16 @@ -export([confirm/0]). -include_lib("eunit/include/eunit.hrl"). -% I would hope this would come from the testing framework some day -% to use the test in small and large scenarios. -define(DEFAULT_RING_SIZE, 8). --define(EXCHANGE_TICK, 120). + +% Amend defaults for +-define(EXCHANGE_TICK, 60). -define(MAX_RESULTS, 128). -define(REPAIR_LOOPS, 4). -define(KEY_RANGE, false). +-define(RANGE_BOOST, 2). + +-define(TEST_VERSION, current). -define(CFG, [{riak_kv, @@ -57,7 +60,8 @@ {tictacaae_rebuildtick, 3600000}, % don't tick for an hour! {tictacaae_maxresults, ?MAX_RESULTS}, {tictacaae_repairloops, ?REPAIR_LOOPS}, - {tictacaae_enablekeyrange, ?KEY_RANGE} + {tictacaae_enablekeyrange, ?KEY_RANGE}, + {tictacaae_rangeboost, ?RANGE_BOOST} ]}, {riak_core, [ @@ -66,16 +70,17 @@ ). -define(NUM_NODES, 4). --define(PRELOAD_KEYS_PERBUCKET, 100000). --define(N1_KEYS_PERBUCKET, 250). --define(N2_KEYS_SINGLEBUCKET, 1000). +-define(PRELOAD_KEYS_PERBUCKET, 125000). +-define(N1_KEYS_PERBUCKET, 200). +-define(N2_KEYS_SINGLEBUCKET, 800). + -define(ALT_BUCKET1, <<"alt_bucket1">>). -define(ALT_BUCKET2, <<"alt_bucket2">>). -define(ALT_BUCKET3, <<"alt_bucket3">>). -define(ALT_BUCKET4, <<"alt_bucket4">>). + -define(N_VAL, 3). -define(STATS_DELAY, 1000). --define(TEST_VERSION, current). -define(VERIFY_DELAY, 10000). -define(MICRO, 1000000). From c4868c5fea98bf89009338988d820a6b78bafa1f Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 19 Nov 2021 02:59:40 +0000 Subject: [PATCH 4/5] Update verify_tictac_aae_load.erl Change defaults --- tests/verify_tictac_aae_load.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/verify_tictac_aae_load.erl b/tests/verify_tictac_aae_load.erl index 5e7de7fee..0a3b52e2d 100644 --- a/tests/verify_tictac_aae_load.erl +++ b/tests/verify_tictac_aae_load.erl @@ -70,9 +70,9 @@ ). -define(NUM_NODES, 4). --define(PRELOAD_KEYS_PERBUCKET, 125000). --define(N1_KEYS_PERBUCKET, 200). --define(N2_KEYS_SINGLEBUCKET, 800). +-define(PRELOAD_KEYS_PERBUCKET, 150000). +-define(N1_KEYS_PERBUCKET, 500). +-define(N2_KEYS_SINGLEBUCKET, 2000). -define(ALT_BUCKET1, <<"alt_bucket1">>). -define(ALT_BUCKET2, <<"alt_bucket2">>). From 171476caa2b2c2ba3a65906ca2c488089d4a0df6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Fri, 19 Nov 2021 12:51:18 +0000 Subject: [PATCH 5/5] New defaults Running tests with these settings and switching MAX_RESULTS to 256 when switching TEST_VERSION to previous. The comparison now shows 60-70% reduction in time to repair the delta (i.e. 3 x faster) --- tests/verify_tictac_aae_load.erl | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/tests/verify_tictac_aae_load.erl b/tests/verify_tictac_aae_load.erl index 0a3b52e2d..c680993a5 100644 --- a/tests/verify_tictac_aae_load.erl +++ b/tests/verify_tictac_aae_load.erl @@ -17,24 +17,7 @@ %% under the License. %% %% ------------------------------------------------------------------- -%% @doc Verification of Active Anti Entropy. -%% The basic guarantee of AAE is this: Even without the read repairs that will -%% happen when data is accessed, inconsistencies between the replicas of a -%% KV object will be repaired eventually. The test tries hard not to -%% explicitly check for when the AAE trees are built or when exchanges are run -%% in an effort to remain decoupled from the implementation. Instead, it -%% simply configures AAE to build/rebuild and run exchanges between the data -%% partitions. It then performs direct vnode reads on all replicas and verify -%% they eventually match. -%% -%% Data recovery after the following scenarios is tested: -%% -%% - Data for a partition completely disappears. -%% - Less than N replicas are written -%% - Less than N replicas are updated -%% -%% Also, a sanity check is done to make sure AAE repairs go away eventually -%% if there is no activity. That was an actual early AAE bug. +%% @doc Verification of Active Anti Entropy performance. -module(verify_tictac_aae_load). -export([confirm/0]). @@ -43,7 +26,7 @@ -define(DEFAULT_RING_SIZE, 8). % Amend defaults for --define(EXCHANGE_TICK, 60). +-define(EXCHANGE_TICK, 30). -define(MAX_RESULTS, 128). -define(REPAIR_LOOPS, 4). -define(KEY_RANGE, false). @@ -71,8 +54,8 @@ -define(NUM_NODES, 4). -define(PRELOAD_KEYS_PERBUCKET, 150000). --define(N1_KEYS_PERBUCKET, 500). --define(N2_KEYS_SINGLEBUCKET, 2000). +-define(N1_KEYS_PERBUCKET, 2500). +-define(N2_KEYS_SINGLEBUCKET, 15000). -define(ALT_BUCKET1, <<"alt_bucket1">>). -define(ALT_BUCKET2, <<"alt_bucket2">>).