From abaa77062e27688f984ba8cb914ca69a9cb5e2e6 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 10 Mar 2016 13:29:08 +0000 Subject: [PATCH 01/47] message store supervisor --- src/rabbit_msg_store_vhost_sup.erl | 50 ++++++++++++++++++++++++++++++ src/rabbit_variable_queue.erl | 30 +++++++++++------- 2 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 src/rabbit_msg_store_vhost_sup.erl diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl new file mode 100644 index 000000000000..299efdc42803 --- /dev/null +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -0,0 +1,50 @@ +-module(rabbit_msg_store_vhost_sup). + +-behaviour(supervisor2). + +-export([start_link/4, init/1, add_vhost/2, client_init/5, start_vhost/5]). + +start_link(Name, Dir, ClientRefs, StartupFunState) -> + supervisor2:start_link({local, Name}, ?MODULE, + [Name, Dir, ClientRefs, StartupFunState]). + +init([Name, Dir, ClientRefs, StartupFunState]) -> + {ok, {{simple_one_for_one, 0, 1}, + [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_vhost, + [Name, Dir, ClientRefs, StartupFunState]}, + transient, infinity, supervisor, [rabbit_msg_store]}]}}. + + +add_vhost(Name, VHost) -> + supervisor2:start_child(Name, [VHost]). + +start_vhost(Name, Dir, ClientRefs, StartupFunState, VHost) -> + VHostName = get_vhost_name(Name, VHost), + VHostDir = get_vhost_dir(Dir, VHost), + ok = rabbit_file:ensure_dir(VHostDir), + io:format("Store dir ~p~n", [VHostDir]), + rabbit_msg_store:start_link(VHostName, VHostDir, + ClientRefs, StartupFunState). + + +client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> + VHostName = maybe_start_vhost(Server, VHost), + rabbit_msg_store:client_init(VHostName, Ref, MsgOnDiskFun, CloseFDsFun). + +maybe_start_vhost(Server, VHost) -> + VHostName = get_vhost_name(Server, VHost), + Trace = try throw(42) catch 42 -> erlang:get_stacktrace() end, + io:format("Search for ~p~n ~p~n", [VHostName, Trace]), + case whereis(VHostName) of + undefined -> add_vhost(Server, VHost); + _ -> ok + end, + VHostName. + +get_vhost_name(Name, VHost) -> + binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, VHost/binary>>, utf8). + +get_vhost_dir(Dir, VHost) -> + VhostEncoded = http_uri:encode(binary_to_list(VHost)), + filename:join([Dir, VhostEncoded]). + diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 03381be31135..6c97e85bc8e7 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -468,10 +468,10 @@ stop() -> ok = rabbit_queue_index:stop(). start_msg_store(Refs, StartFunState) -> - ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store, + ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store_vhost_sup, [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined, {fun (ok) -> finished end, ok}]), - ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store, + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store_vhost_sup, [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), Refs, StartFunState]). @@ -492,22 +492,26 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, new, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) -> IndexState = rabbit_queue_index:init(QueueName, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), + VHost = QueueName#resource.virtual_host, init(IsDurable, IndexState, 0, 0, [], case IsDurable of true -> msg_store_client_init(?PERSISTENT_MSG_STORE, - MsgOnDiskFun, AsyncCallback); + MsgOnDiskFun, AsyncCallback, VHost); false -> undefined end, - msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, AsyncCallback)); + msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, + AsyncCallback, VHost)); %% We can be recovering a transient queue if it crashed init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) -> {PRef, RecoveryTerms} = process_recovery_terms(Terms), + VHost = QueueName#resource.virtual_host, {PersistentClient, ContainsCheckFun} = case IsDurable of true -> C = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef, - MsgOnDiskFun, AsyncCallback), + MsgOnDiskFun, AsyncCallback, + VHost), {C, fun (MsgId) when is_binary(MsgId) -> rabbit_msg_store:contains(MsgId, C); (#basic_message{is_persistent = Persistent}) -> @@ -516,7 +520,8 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, false -> {undefined, fun(_MsgId) -> false end} end, TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, - undefined, AsyncCallback), + undefined, AsyncCallback, + VHost), {DeltaCount, DeltaBytes, IndexState} = rabbit_queue_index:recover( QueueName, RecoveryTerms, @@ -1188,14 +1193,17 @@ with_immutable_msg_store_state(MSCState, IsPersistent, Fun) -> end), Res. -msg_store_client_init(MsgStore, MsgOnDiskFun, Callback) -> +msg_store_client_init(MsgStore, MsgOnDiskFun, Callback, VHost) -> msg_store_client_init(MsgStore, rabbit_guid:gen(), MsgOnDiskFun, - Callback). + Callback, VHost). -msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback) -> +msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback, VHost) -> CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE), - rabbit_msg_store:client_init(MsgStore, Ref, MsgOnDiskFun, - fun () -> Callback(?MODULE, CloseFDsFun) end). + rabbit_msg_store_vhost_sup:client_init(MsgStore, Ref, MsgOnDiskFun, + fun () -> + Callback(?MODULE, CloseFDsFun) + end, + VHost). msg_store_write(MSCState, IsPersistent, MsgId, Msg) -> with_immutable_msg_store_state( From 80436dfaef0366d48f1e8b1293e1a12a5e922c79 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 10 Mar 2016 16:39:05 +0000 Subject: [PATCH 02/47] Starting and restarting app --- src/rabbit_msg_store_vhost_sup.erl | 20 ++++++++++++++------ src/rabbit_variable_queue.erl | 13 +++++++++++-- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 299efdc42803..a4ae8469dc4f 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -2,7 +2,7 @@ -behaviour(supervisor2). --export([start_link/4, init/1, add_vhost/2, client_init/5, start_vhost/5]). +-export([start_link/4, init/1, add_vhost/2, client_init/5, start_vhost/5, successfully_recovered_state/2]). start_link(Name, Dir, ClientRefs, StartupFunState) -> supervisor2:start_link({local, Name}, ?MODULE, @@ -22,7 +22,6 @@ start_vhost(Name, Dir, ClientRefs, StartupFunState, VHost) -> VHostName = get_vhost_name(Name, VHost), VHostDir = get_vhost_dir(Dir, VHost), ok = rabbit_file:ensure_dir(VHostDir), - io:format("Store dir ~p~n", [VHostDir]), rabbit_msg_store:start_link(VHostName, VHostDir, ClientRefs, StartupFunState). @@ -34,7 +33,6 @@ client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> maybe_start_vhost(Server, VHost) -> VHostName = get_vhost_name(Server, VHost), Trace = try throw(42) catch 42 -> erlang:get_stacktrace() end, - io:format("Search for ~p~n ~p~n", [VHostName, Trace]), case whereis(VHostName) of undefined -> add_vhost(Server, VHost); _ -> ok @@ -42,9 +40,19 @@ maybe_start_vhost(Server, VHost) -> VHostName. get_vhost_name(Name, VHost) -> - binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, VHost/binary>>, utf8). + VhostEncoded = encode_vhost(VHost), + binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, "_", VhostEncoded/binary>>, utf8). get_vhost_dir(Dir, VHost) -> - VhostEncoded = http_uri:encode(binary_to_list(VHost)), - filename:join([Dir, VhostEncoded]). + VhostEncoded = encode_vhost(VHost), + binary_to_list(filename:join([Dir, VhostEncoded])). +encode_vhost(VHost) -> + base64:encode(VHost). + +successfully_recovered_state(Name, VHost) -> + VHostName = get_vhost_name(Name, VHost), + rabbit_msg_store:successfully_recovered_state(VHostName). + +% force_recovery +% transform_dir \ No newline at end of file diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 6c97e85bc8e7..a4dc6e4c0da7 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -468,12 +468,20 @@ stop() -> ok = rabbit_queue_index:stop(). start_msg_store(Refs, StartFunState) -> + VHosts = rabbit_vhost:list(), ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store_vhost_sup, [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined, {fun (ok) -> finished end, ok}]), ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store_vhost_sup, [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), - Refs, StartFunState]). + Refs, StartFunState]), + lists:foreach( + fun(VHost) -> + rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE, VHost), + rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE, VHost) + end, + VHosts), + ok. stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), @@ -525,7 +533,8 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, {DeltaCount, DeltaBytes, IndexState} = rabbit_queue_index:recover( QueueName, RecoveryTerms, - rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE), + rabbit_msg_store_vhost_sup:successfully_recovered_state( + ?PERSISTENT_MSG_STORE, VHost), ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), init(IsDurable, IndexState, DeltaCount, DeltaBytes, RecoveryTerms, PersistentClient, TransientClient). From ec15e971cfc8a44b5279f5b8350663cb373b4c1c Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 6 Apr 2016 12:28:02 +0100 Subject: [PATCH 03/47] Migration algorithm --- src/rabbit_variable_queue.erl | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index a4dc6e4c0da7..16e5c513a5a2 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -344,6 +344,7 @@ %%---------------------------------------------------------------------------- -rabbit_upgrade({multiple_routing_keys, local, []}). +-rabbit_upgrade({move_messages_to_vhost_store, local, []}). -type seq_id() :: non_neg_integer(). @@ -2671,3 +2672,41 @@ transform_storage(TransformFun) -> transform_store(Store, TransformFun) -> rabbit_msg_store:force_recovery(rabbit_mnesia:dir(), Store), rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). + +move_messages_to_vhost_store() -> + Queues = list_persistent_queues(), + OldStore = run_old_persistent_store(), + Migrations = spawn_for_each(fun(Queue) -> + migrate_queue(Queue, OldStore) + end, Queues), + wait(Migrations), + delete_old_store(OldStore). + +migrate_queue(Queue, OldStore) -> + OldStoreClient = get_client(OldStore), + NewStoreClient = get_new_store_client(Queue), + walk_queue_index( + fun(MessageIdInStore) -> + Msg = get_msg_from_store(OldStoreClient), + put_message_to_store(Msg, NewStoreClient) + end, + Queue). + + +get_new_store_client(Queue) -> + Vhost = queue_vhost(Queue), + Store = run_persistent_store(Vhost), + get_client(Store). + + +list_persistent_queues() -> + Node = node(), + mnesia:async_dirty( + fun () -> + qlc:e(qlc:q([Q || Q = #amqqueue{name = Name, + pid = Pid} + <- mnesia:table(rabbit_durable_queue), + node(Pid) == Node, + mnesia:read(rabbit_queue, Name, read) =:= []])) + end). + From d1bffd00f326c4e43cbfaadc20a306d1a598ac9a Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Tue, 19 Apr 2016 17:02:52 +0100 Subject: [PATCH 04/47] Work in progress: Migration to vhost based message store --- src/rabbit_variable_queue.erl | 59 ++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 16e5c513a5a2..91ad41b8e00e 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2675,16 +2675,19 @@ transform_store(Store, TransformFun) -> move_messages_to_vhost_store() -> Queues = list_persistent_queues(), - OldStore = run_old_persistent_store(), + % Maybe recover old store. + {RecoveryTerms, StartFunState} = start_recovery_terms(Queues), + OldStore = run_old_persistent_store(RecoveryTerms, StartFunState), + NewStoreSup = start_new_store_sup(), Migrations = spawn_for_each(fun(Queue) -> - migrate_queue(Queue, OldStore) + migrate_queue(Queue, OldStore, NewStoreSup) end, Queues), wait(Migrations), delete_old_store(OldStore). -migrate_queue(Queue, OldStore) -> - OldStoreClient = get_client(OldStore), - NewStoreClient = get_new_store_client(Queue), +migrate_queue(Queue, OldStore, NewStoreSup) -> + OldStoreClient = get_old_client(OldStore), + NewStoreClient = get_new_store_client(Queue, NewStoreSup), walk_queue_index( fun(MessageIdInStore) -> Msg = get_msg_from_store(OldStoreClient), @@ -2693,11 +2696,22 @@ migrate_queue(Queue, OldStore) -> Queue). -get_new_store_client(Queue) -> +get_new_store_client(Queue, NewStoreSup) -> Vhost = queue_vhost(Queue), - Store = run_persistent_store(Vhost), - get_client(Store). + get_new_client(NewStoreSup, Vhost). +get_new_client(NewStoreSup, VHost) -> + rabbit_msg_store_vhost_sup:client_init(NewStoreSup, + rabbit_guid:gen(), + fun(_,_) -> ok end, + fun() -> ok end, + VHost). + +get_old_client(OldStore) -> + rabbit_msg_store:client_init(OldStore, + rabbit_guid:gen(), + fun(_,_) -> ok end, + fun() -> ok end). list_persistent_queues() -> Node = node(), @@ -2710,3 +2724,32 @@ list_persistent_queues() -> mnesia:read(rabbit_queue, Name, read) =:= []])) end). +start_recovery_terms(Queues) -> + {AllTerms, StartFunState} = rabbit_queue_index:start(Queues), + Refs = [Ref || Terms <- AllTerms, + Terms /= non_clean_shutdown, + begin + Ref = proplists:get_value(persistent_ref, Terms), + Ref =/= undefined + end], + {Refs, StartFunState}. + +run_old_persistent_store(Refs, StartFunState) -> + OldStoreName = old_persistent_msg_store. + ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, + [OldStoreName, rabbit_mnesia:dir(), + Refs, StartFunState]), + OldStoreName. + +run_persistent_store(Vhost) -> + + + ?PERSISTENT_MSG_STORE. + +start_new_store_sup() -> + % Start persistent store sup without recovery. + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store_vhost_sup, + [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), + undefined, {fun (ok) -> finished end, ok}]), + ?PERSISTENT_MSG_STORE. + From b3fc71af45e53633c69a045e65cfe01e110aa9c5 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Tue, 19 Apr 2016 18:07:17 +0100 Subject: [PATCH 05/47] wip --- src/rabbit_variable_queue.erl | 44 +++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 91ad41b8e00e..01c412fd60d0 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2689,17 +2689,44 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> OldStoreClient = get_old_client(OldStore), NewStoreClient = get_new_store_client(Queue, NewStoreSup), walk_queue_index( - fun(MessageIdInStore) -> - Msg = get_msg_from_store(OldStoreClient), - put_message_to_store(Msg, NewStoreClient) + fun(MessageIdInStore, OldC) -> + case rabbit_msg_store:read(MessageIdInStore, OldStoreClient) of + {{ok, Msg}, OldC1} -> + ok = rabbit_msg_store:write(MessageIdInStore, Msg, NewStoreClient), + OldC1; + _ -> OldC + end end, Queue). +spawn_for_each(Fun, List) -> + Ref = erlang:make_ref(), + Self = self(), + Processes = lists:map( + fun(El) -> + spawn_link( + fun() -> + Fun(El), + Self ! {ok, self(), Ref} + end) + end, + List), + {Ref, Processes}. + +wait({Ref, Processes}) -> + lists:foreach( + fun(Proc) -> + receive {ok, Proc, Ref} -> ok + end + end, + Processes). get_new_store_client(Queue, NewStoreSup) -> Vhost = queue_vhost(Queue), get_new_client(NewStoreSup, Vhost). +queue_vhost(#amqqueue{name = #resource{virtual_host = VHost}}) -> VHost. + get_new_client(NewStoreSup, VHost) -> rabbit_msg_store_vhost_sup:client_init(NewStoreSup, rabbit_guid:gen(), @@ -2735,7 +2762,7 @@ start_recovery_terms(Queues) -> {Refs, StartFunState}. run_old_persistent_store(Refs, StartFunState) -> - OldStoreName = old_persistent_msg_store. + OldStoreName = old_persistent_msg_store, ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, [OldStoreName, rabbit_mnesia:dir(), Refs, StartFunState]), @@ -2753,3 +2780,12 @@ start_new_store_sup() -> undefined, {fun (ok) -> finished end, ok}]), ?PERSISTENT_MSG_STORE. +delete_old_store(OldStore) -> + gen_server:stop(OldStore), + rabbit_file:recursive_delete( + filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])). + + + + + From 492e23bf188820756e07076c4539692376cfcbc6 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 20 Apr 2016 17:54:24 +0100 Subject: [PATCH 06/47] wip: migrating to vhost base messge store --- src/rabbit_msg_store.erl | 4 +- src/rabbit_queue_index.erl | 16 +++--- src/rabbit_variable_queue.erl | 96 ++++++++++++++++++++++------------- 3 files changed, 70 insertions(+), 46 deletions(-) diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index 8e2b1c0d4907..f033dea5f0c3 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -718,7 +718,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> Dir = filename:join(BaseDir, atom_to_list(Server)), - {ok, IndexModule} = application:get_env(msg_store_index_module), + {ok, IndexModule} = application:get_env(rabbit,msg_store_index_module), rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]), AttemptFileSummaryRecovery = @@ -758,7 +758,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> DyingIndex = ets:new(rabbit_msg_store_dying_client_index, [set, public, {keypos, #dying_client.client_ref}]), - {ok, FileSizeLimit} = application:get_env(msg_store_file_size_limit), + {ok, FileSizeLimit} = application:get_env(rabbit,msg_store_file_size_limit), {ok, GCPid} = rabbit_msg_store_gc:start_link( #gc_state { dir = Dir, diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 8b96bbffbda9..3e9d92f5c11c 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -23,6 +23,7 @@ read/3, next_segment_boundary/1, bounds/1, start/1, stop/0]). -export([add_queue_ttl/0, avoid_zeroes/0, store_msg_size/0, store_msg/0]). +-export([scan_queue_segments/3]). -define(CLEAN_FILENAME, "clean.dot"). @@ -660,20 +661,19 @@ queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> end. queue_index_walker_reader(QueueName, Gatherer) -> - State = blank_state(QueueName), - ok = scan_segments( + ok = scan_queue_segments( fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) when is_binary(MsgId) -> gatherer:sync_in(Gatherer, {MsgId, 1}); (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, _IsAcked, Acc) -> Acc - end, ok, State), + end, ok, QueueName), ok = gatherer:finish(Gatherer). -scan_segments(Fun, Acc, State) -> - State1 = #qistate { segments = Segments, dir = Dir } = - recover_journal(State), +scan_queue_segments(Fun, Acc, QueueName) -> + State = #qistate { segments = Segments, dir = Dir } = + recover_journal(blank_state(QueueName)), Result = lists:foldr( fun (Seg, AccN) -> segment_entries_foldr( @@ -682,8 +682,8 @@ scan_segments(Fun, Acc, State) -> Fun(reconstruct_seq_id(Seg, RelSeq), MsgOrId, MsgProps, IsPersistent, IsDelivered, IsAcked, AccM) end, AccN, segment_find_or_new(Seg, Dir, Segments)) - end, Acc, all_segment_nums(State1)), - {_SegmentCounts, _State} = terminate(State1), + end, Acc, all_segment_nums(State)), + {_SegmentCounts, _State} = terminate(State), Result. %%---------------------------------------------------------------------------- diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 01c412fd60d0..8460e7ffa905 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -33,6 +33,9 @@ %% exported for testing only -export([start_msg_store/2, stop_msg_store/0, init/6]). +-export([move_messages_to_vhost_store/0]). +-include_lib("stdlib/include/qlc.hrl"). + %%---------------------------------------------------------------------------- %% Messages, and their position in the queue, can be in memory or on %% disk, or both. Persistent messages will have both message and @@ -334,8 +337,11 @@ }). -define(HEADER_GUESS_SIZE, 100). %% see determine_persist_to/2 +-define(PERSISTENT_MSG_STORE_SUP, msg_store_persistent_vhost). +-define(TRANSIENT_MSG_STORE_SUP, msg_store_transient_vhost). -define(PERSISTENT_MSG_STORE, msg_store_persistent). -define(TRANSIENT_MSG_STORE, msg_store_transient). + -define(QUEUE, lqueue). -include("rabbit.hrl"). @@ -344,7 +350,8 @@ %%---------------------------------------------------------------------------- -rabbit_upgrade({multiple_routing_keys, local, []}). --rabbit_upgrade({move_messages_to_vhost_store, local, []}). +% -rabbit_upgrade({move_messages_to_vhost_store, local, []}). requires mnesia, requires rabbit_sup, requires worker_pool, requires fhc +-compile(export_all). -type seq_id() :: non_neg_integer(). @@ -452,6 +459,8 @@ %% Public API %%---------------------------------------------------------------------------- + + start(DurableQueues) -> {AllTerms, StartFunState} = rabbit_queue_index:start(DurableQueues), start_msg_store( @@ -470,23 +479,23 @@ stop() -> start_msg_store(Refs, StartFunState) -> VHosts = rabbit_vhost:list(), - ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store_vhost_sup, - [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), + ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, + [?TRANSIENT_MSG_STORE_SUP, rabbit_mnesia:dir(), undefined, {fun (ok) -> finished end, ok}]), - ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store_vhost_sup, - [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, + [?PERSISTENT_MSG_STORE_SUP, rabbit_mnesia:dir(), Refs, StartFunState]), lists:foreach( fun(VHost) -> - rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE, VHost), - rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE, VHost) + rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE_SUP, VHost), + rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE_SUP, VHost) end, VHosts), ok. stop_msg_store() -> - ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), - ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE). + ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE_SUP), + ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE_SUP). init(Queue, Recover, Callback) -> init( @@ -504,11 +513,11 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, new, VHost = QueueName#resource.virtual_host, init(IsDurable, IndexState, 0, 0, [], case IsDurable of - true -> msg_store_client_init(?PERSISTENT_MSG_STORE, + true -> msg_store_client_init(?PERSISTENT_MSG_STORE_SUP, MsgOnDiskFun, AsyncCallback, VHost); false -> undefined end, - msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, + msg_store_client_init(?TRANSIENT_MSG_STORE_SUP, undefined, AsyncCallback, VHost)); %% We can be recovering a transient queue if it crashed @@ -518,7 +527,7 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, VHost = QueueName#resource.virtual_host, {PersistentClient, ContainsCheckFun} = case IsDurable of - true -> C = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef, + true -> C = msg_store_client_init(?PERSISTENT_MSG_STORE_SUP, PRef, MsgOnDiskFun, AsyncCallback, VHost), {C, fun (MsgId) when is_binary(MsgId) -> @@ -528,14 +537,14 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, end}; false -> {undefined, fun(_MsgId) -> false end} end, - TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, + TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE_SUP, undefined, AsyncCallback, VHost), {DeltaCount, DeltaBytes, IndexState} = rabbit_queue_index:recover( QueueName, RecoveryTerms, rabbit_msg_store_vhost_sup:successfully_recovered_state( - ?PERSISTENT_MSG_STORE, VHost), + ?PERSISTENT_MSG_STORE_SUP, VHost), ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), init(IsDurable, IndexState, DeltaCount, DeltaBytes, RecoveryTerms, PersistentClient, TransientClient). @@ -1208,7 +1217,7 @@ msg_store_client_init(MsgStore, MsgOnDiskFun, Callback, VHost) -> Callback, VHost). msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback, VHost) -> - CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE), + CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE_SUP), rabbit_msg_store_vhost_sup:client_init(MsgStore, Ref, MsgOnDiskFun, fun () -> Callback(?MODULE, CloseFDsFun) @@ -2666,23 +2675,26 @@ multiple_routing_keys() -> %% Assumes message store is not running transform_storage(TransformFun) -> - transform_store(?PERSISTENT_MSG_STORE, TransformFun), - transform_store(?TRANSIENT_MSG_STORE, TransformFun). + transform_store(?PERSISTENT_MSG_STORE_SUP, TransformFun), + transform_store(?TRANSIENT_MSG_STORE_SUP, TransformFun). transform_store(Store, TransformFun) -> rabbit_msg_store:force_recovery(rabbit_mnesia:dir(), Store), rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). move_messages_to_vhost_store() -> - Queues = list_persistent_queues(), + Queues = rabbit_variable_queue:list_persistent_queues(), % Maybe recover old store. - {RecoveryTerms, StartFunState} = start_recovery_terms(Queues), - OldStore = run_old_persistent_store(RecoveryTerms, StartFunState), - NewStoreSup = start_new_store_sup(), - Migrations = spawn_for_each(fun(Queue) -> + {RecoveryTerms, StartFunState} = rabbit_variable_queue:start_recovery_terms(Queues), + OldStore = rabbit_variable_queue:run_old_persistent_store(RecoveryTerms, StartFunState), + NewStoreSup = rabbit_variable_queue:start_new_store_sup(), + lists:map(fun(Queue) -> migrate_queue(Queue, OldStore, NewStoreSup) end, Queues), - wait(Migrations), + % Migrations = spawn_for_each(fun(Queue) -> + % migrate_queue(Queue, OldStore, NewStoreSup) + % end, Queues), + % wait(Migrations), delete_old_store(OldStore). migrate_queue(Queue, OldStore, NewStoreSup) -> @@ -2697,8 +2709,19 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> _ -> OldC end end, + OldStoreClient, Queue). +walk_queue_index(Fun, Client, #amqqueue{name = QueueName}) -> + % WARNING: State is being recovered and terminated. This can cause side effects! + rabbit_queue_index:scan_queue_segments( + fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, _IsAcked, ClientState) + when is_binary(MsgId) -> + Fun(MsgId, ClientState); + (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, _IsAcked, ClientState) -> + ClientState + end, Client, QueueName). + spawn_for_each(Fun, List) -> Ref = erlang:make_ref(), Self = self(), @@ -2752,7 +2775,8 @@ list_persistent_queues() -> end). start_recovery_terms(Queues) -> - {AllTerms, StartFunState} = rabbit_queue_index:start(Queues), + QueueNames = [Name || #amqqueue{name = Name} <- Queues], + {AllTerms, StartFunState} = rabbit_queue_index:start(QueueNames), Refs = [Ref || Terms <- AllTerms, Terms /= non_clean_shutdown, begin @@ -2762,30 +2786,30 @@ start_recovery_terms(Queues) -> {Refs, StartFunState}. run_old_persistent_store(Refs, StartFunState) -> - OldStoreName = old_persistent_msg_store, + OldStoreName = ?PERSISTENT_MSG_STORE, ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, [OldStoreName, rabbit_mnesia:dir(), Refs, StartFunState]), OldStoreName. -run_persistent_store(Vhost) -> - - - ?PERSISTENT_MSG_STORE. - start_new_store_sup() -> % Start persistent store sup without recovery. - ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store_vhost_sup, - [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, + [?PERSISTENT_MSG_STORE_SUP, rabbit_mnesia:dir(), undefined, {fun (ok) -> finished end, ok}]), - ?PERSISTENT_MSG_STORE. + ?PERSISTENT_MSG_STORE_SUP. delete_old_store(OldStore) -> gen_server:stop(OldStore), - rabbit_file:recursive_delete( - filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])). - + rabbit_file:recursive_delete([filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])]). +setup() -> + application:load(rabbit), + mnesia:start(), + rabbit_sup:start_link(), + rabbit:start_fhc(), + rabbit_sup:start_restartable_child(rabbit_guid), + rabbit_sup:start_supervisor_child(worker_pool_sup). From 325e2fcd8a66a2db702e819cc8da1bafde4ac8ed Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 21 Apr 2016 13:19:18 +0100 Subject: [PATCH 07/47] New upgrade scope `queues` started by boot step before queue recovery --- src/rabbit.erl | 8 ++ src/rabbit_upgrade.erl | 13 +++ src/rabbit_variable_queue.erl | 161 ++++++++++++++++------------------ 3 files changed, 99 insertions(+), 83 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index f10eb463ab90..a7bebfc1276d 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -146,6 +146,14 @@ {requires, core_initialized}, {enables, routing_ready}]}). +-rabbit_boot_step({upgrade_queues, + [{description, "codec correctness check"}, + {mfa, {rabbit_upgrade, + maybe_upgrade_queues, + []}}, + {requires, [core_initialized]}, + {enables, recovery}]}). + -rabbit_boot_step({recovery, [{description, "exchange, queue and binding recovery"}, {mfa, {rabbit, recover, []}}, diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index f88b7cc73fcb..f5437e6b817f 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -17,6 +17,7 @@ -module(rabbit_upgrade). -export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0, + maybe_upgrade_queues/0, nodes_running/1, secondary_upgrade/1]). -include("rabbit.hrl"). @@ -252,6 +253,18 @@ maybe_upgrade_local() -> %% ------------------------------------------------------------------- +maybe_upgrade_queues() -> + case rabbit_version:upgrades_required(queues) of + {error, version_not_available} -> version_not_available; + {error, starting_from_scratch} -> starting_from_scratch; + {error, _} = Err -> throw(Err); + {ok, []} -> ok; + {ok, Upgrades} -> apply_upgrades(queues, Upgrades, + fun() -> ok end) + end. + +%% ------------------------------------------------------------------- + apply_upgrades(Scope, Upgrades, Fun) -> ok = rabbit_file:lock_file(lock_filename()), info("~s upgrades: ~w to apply~n", [Scope, length(Upgrades)]), diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 8460e7ffa905..1adc038a8049 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -350,7 +350,8 @@ %%---------------------------------------------------------------------------- -rabbit_upgrade({multiple_routing_keys, local, []}). -% -rabbit_upgrade({move_messages_to_vhost_store, local, []}). requires mnesia, requires rabbit_sup, requires worker_pool, requires fhc +-rabbit_upgrade({move_messages_to_vhost_store, queues, []}). + -compile(export_all). -type seq_id() :: non_neg_integer(). @@ -517,7 +518,7 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, new, MsgOnDiskFun, AsyncCallback, VHost); false -> undefined end, - msg_store_client_init(?TRANSIENT_MSG_STORE_SUP, undefined, + msg_store_client_init(?TRANSIENT_MSG_STORE_SUP, undefined, AsyncCallback, VHost)); %% We can be recovering a transient queue if it crashed @@ -538,7 +539,7 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, false -> {undefined, fun(_MsgId) -> false end} end, TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE_SUP, - undefined, AsyncCallback, + undefined, AsyncCallback, VHost), {DeltaCount, DeltaBytes, IndexState} = rabbit_queue_index:recover( @@ -1219,8 +1220,8 @@ msg_store_client_init(MsgStore, MsgOnDiskFun, Callback, VHost) -> msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback, VHost) -> CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE_SUP), rabbit_msg_store_vhost_sup:client_init(MsgStore, Ref, MsgOnDiskFun, - fun () -> - Callback(?MODULE, CloseFDsFun) + fun () -> + Callback(?MODULE, CloseFDsFun) end, VHost). @@ -2683,84 +2684,83 @@ transform_store(Store, TransformFun) -> rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). move_messages_to_vhost_store() -> - Queues = rabbit_variable_queue:list_persistent_queues(), - % Maybe recover old store. - {RecoveryTerms, StartFunState} = rabbit_variable_queue:start_recovery_terms(Queues), - OldStore = rabbit_variable_queue:run_old_persistent_store(RecoveryTerms, StartFunState), - NewStoreSup = rabbit_variable_queue:start_new_store_sup(), - lists:map(fun(Queue) -> - migrate_queue(Queue, OldStore, NewStoreSup) - end, Queues), - % Migrations = spawn_for_each(fun(Queue) -> - % migrate_queue(Queue, OldStore, NewStoreSup) - % end, Queues), - % wait(Migrations), - delete_old_store(OldStore). +rabbit_log:error("MIGRATING!!"), + Queues = list_persistent_queues(), + %% Old msg_store may require recovery. + %% This upgrade step should only be started + %% if we are upgrading from version with old store. + {RecoveryTerms, StartFunState} = start_recovery_terms(Queues), + OldStore = run_old_persistent_store(RecoveryTerms, StartFunState), + %% New store should not be recovered. + NewStoreSup = start_new_store_sup(), + lists:map(fun(Queue) -> + migrate_queue(Queue, OldStore, NewStoreSup) + end, + Queues), + + {ok, Gatherer} = gatherer:start_link(), + lists:map( + fun(Queue) -> + ok = gatherer:fork(Gatherer), + ok = worker_pool:submit_async( + fun () -> + migrate_queue(Queue, OldStore, NewStoreSup), + gatherer:finish(Gatherer) + end) + end, + Queues), + empty = gatherer:out(Gatherer), + ok = gatherer:stop(Gatherer), + + delete_old_store(OldStore), + + ok = rabbit_queue_index:stop(), + ok = rabbit_sup:stop_child(NewStoreSup). migrate_queue(Queue, OldStore, NewStoreSup) -> OldStoreClient = get_old_client(OldStore), NewStoreClient = get_new_store_client(Queue, NewStoreSup), - walk_queue_index( - fun(MessageIdInStore, OldC) -> - case rabbit_msg_store:read(MessageIdInStore, OldStoreClient) of - {{ok, Msg}, OldC1} -> - ok = rabbit_msg_store:write(MessageIdInStore, Msg, NewStoreClient), - OldC1; - _ -> OldC - end - end, - OldStoreClient, - Queue). - -walk_queue_index(Fun, Client, #amqqueue{name = QueueName}) -> - % WARNING: State is being recovered and terminated. This can cause side effects! + #amqqueue{name = QueueName} = Queue, + %% WARNING: During scan_queue_segments queue index state is being recovered + %% and terminated. This can cause side effects! rabbit_queue_index:scan_queue_segments( - fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, _IsAcked, ClientState) + %% We migrate only persistent messages, which is stored in msg_store + %% and is not acked yet + fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, false, OldC) when is_binary(MsgId) -> - Fun(MsgId, ClientState); - (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, _IsAcked, ClientState) -> - ClientState - end, Client, QueueName). - -spawn_for_each(Fun, List) -> - Ref = erlang:make_ref(), - Self = self(), - Processes = lists:map( - fun(El) -> - spawn_link( - fun() -> - Fun(El), - Self ! {ok, self(), Ref} - end) + migrate_message(MsgId, OldC, NewStoreClient); + (_SeqId, _MsgId, _MsgProps, + _IsPersistent, _IsDelivered, _IsAcked, OldC) -> + OldC end, - List), - {Ref, Processes}. - -wait({Ref, Processes}) -> - lists:foreach( - fun(Proc) -> - receive {ok, Proc, Ref} -> ok - end - end, - Processes). - -get_new_store_client(Queue, NewStoreSup) -> - Vhost = queue_vhost(Queue), - get_new_client(NewStoreSup, Vhost). - -queue_vhost(#amqqueue{name = #resource{virtual_host = VHost}}) -> VHost. + OldStoreClient, + QueueName). + +migrate_message(MsgId, OldC, NewC) -> + case rabbit_msg_store:read(MsgId, OldC) of + {{ok, Msg}, OldC1} -> + case rabbit_msg_store:contains(MsgId, NewC) of + false -> ok = rabbit_msg_store:write(MsgId, Msg, NewC); + true -> ok + end, + % TODO: maybe remove in batches? + ok = rabbit_msg_store:remove([MsgId], OldC1), + OldC1; + _ -> OldC + end; -get_new_client(NewStoreSup, VHost) -> - rabbit_msg_store_vhost_sup:client_init(NewStoreSup, +get_new_store_client(#amqqueue{name = #resource{virtual_host = VHost}}, + NewStoreSup) -> + rabbit_msg_store_vhost_sup:client_init(NewStoreSup, rabbit_guid:gen(), - fun(_,_) -> ok end, - fun() -> ok end, + fun(_,_) -> ok end, + fun() -> ok end, VHost). get_old_client(OldStore) -> rabbit_msg_store:client_init(OldStore, rabbit_guid:gen(), - fun(_,_) -> ok end, + fun(_,_) -> ok end, fun() -> ok end). list_persistent_queues() -> @@ -2785,7 +2785,7 @@ start_recovery_terms(Queues) -> end], {Refs, StartFunState}. -run_old_persistent_store(Refs, StartFunState) -> +run_old_persistent_store(Refs, StartFunState) -> OldStoreName = ?PERSISTENT_MSG_STORE, ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, [OldStoreName, rabbit_mnesia:dir(), @@ -2794,22 +2794,17 @@ run_old_persistent_store(Refs, StartFunState) -> start_new_store_sup() -> % Start persistent store sup without recovery. - ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE_SUP, + rabbit_msg_store_vhost_sup, [?PERSISTENT_MSG_STORE_SUP, rabbit_mnesia:dir(), undefined, {fun (ok) -> finished end, ok}]), ?PERSISTENT_MSG_STORE_SUP. delete_old_store(OldStore) -> - gen_server:stop(OldStore), - rabbit_file:recursive_delete([filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])]). - - - -setup() -> - application:load(rabbit), - mnesia:start(), - rabbit_sup:start_link(), - rabbit:start_fhc(), - rabbit_sup:start_restartable_child(rabbit_guid), - rabbit_sup:start_supervisor_child(worker_pool_sup). + ok = rabbit_sup:stop_child(OldStore), + rabbit_file:recursive_delete( + [filename:join([rabbit_mnesia:dir(), ?PERSISTENT_MSG_STORE])]), + %% Delete old transient store as well + rabbit_file:recursive_delete( + [filename:join([rabbit_mnesia:dir(), ?TRANSIENT_MSG_STORE])]). From d1f4a9d302c2ad319c58d7aa8aa21e1bc90aa4b5 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 21 Apr 2016 14:20:39 +0100 Subject: [PATCH 08/47] Remove duplicate migration. Match no_ack message --- src/rabbit_variable_queue.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 1adc038a8049..f87205532c09 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2693,10 +2693,6 @@ rabbit_log:error("MIGRATING!!"), OldStore = run_old_persistent_store(RecoveryTerms, StartFunState), %% New store should not be recovered. NewStoreSup = start_new_store_sup(), - lists:map(fun(Queue) -> - migrate_queue(Queue, OldStore, NewStoreSup) - end, - Queues), {ok, Gatherer} = gatherer:start_link(), lists:map( @@ -2726,7 +2722,7 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> rabbit_queue_index:scan_queue_segments( %% We migrate only persistent messages, which is stored in msg_store %% and is not acked yet - fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, false, OldC) + fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, OldC) when is_binary(MsgId) -> migrate_message(MsgId, OldC, NewStoreClient); (_SeqId, _MsgId, _MsgProps, @@ -2747,7 +2743,7 @@ migrate_message(MsgId, OldC, NewC) -> ok = rabbit_msg_store:remove([MsgId], OldC1), OldC1; _ -> OldC - end; + end. get_new_store_client(#amqqueue{name = #resource{virtual_host = VHost}}, NewStoreSup) -> From 93ba24b4327ab66c288a10fcfe1d131e53dcd477 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 21 Apr 2016 16:22:02 +0100 Subject: [PATCH 09/47] Moved directory selection to vhost_dir function --- src/rabbit_msg_store_vhost_sup.erl | 43 +++++++++++++++++------------- src/rabbit_variable_queue.erl | 10 +++---- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index a4ae8469dc4f..a33ffed56070 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -2,27 +2,30 @@ -behaviour(supervisor2). --export([start_link/4, init/1, add_vhost/2, client_init/5, start_vhost/5, successfully_recovered_state/2]). +-export([start_link/3, init/1, add_vhost/2, client_init/5, successfully_recovered_state/2]). -start_link(Name, Dir, ClientRefs, StartupFunState) -> +%% Internal +-export([start_vhost/4]). + +start_link(Name, ClientRefs, StartupFunState) -> supervisor2:start_link({local, Name}, ?MODULE, - [Name, Dir, ClientRefs, StartupFunState]). + [Name, ClientRefs, StartupFunState]). -init([Name, Dir, ClientRefs, StartupFunState]) -> +init([Name, ClientRefs, StartupFunState]) -> {ok, {{simple_one_for_one, 0, 1}, - [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_vhost, - [Name, Dir, ClientRefs, StartupFunState]}, + [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_vhost, + [Name, ClientRefs, StartupFunState]}, transient, infinity, supervisor, [rabbit_msg_store]}]}}. add_vhost(Name, VHost) -> supervisor2:start_child(Name, [VHost]). -start_vhost(Name, Dir, ClientRefs, StartupFunState, VHost) -> - VHostName = get_vhost_name(Name, VHost), - VHostDir = get_vhost_dir(Dir, VHost), +start_vhost(Name, ClientRefs, StartupFunState, VHost) -> + VHostName = vhost_store_name(Name, VHost), + VHostDir = vhost_store_dir(VHost), ok = rabbit_file:ensure_dir(VHostDir), - rabbit_msg_store:start_link(VHostName, VHostDir, + rabbit_msg_store:start_link(VHostName, VHostDir, ClientRefs, StartupFunState). @@ -31,27 +34,29 @@ client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> rabbit_msg_store:client_init(VHostName, Ref, MsgOnDiskFun, CloseFDsFun). maybe_start_vhost(Server, VHost) -> - VHostName = get_vhost_name(Server, VHost), - Trace = try throw(42) catch 42 -> erlang:get_stacktrace() end, + VHostName = vhost_store_name(Server, VHost), case whereis(VHostName) of undefined -> add_vhost(Server, VHost); _ -> ok end, VHostName. -get_vhost_name(Name, VHost) -> - VhostEncoded = encode_vhost(VHost), - binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, "_", VhostEncoded/binary>>, utf8). +vhost_store_name(Name, VHost) -> + VhostEncoded = encode_vhost_name(VHost), + binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, "_", + VhostEncoded/binary>>, + utf8). -get_vhost_dir(Dir, VHost) -> - VhostEncoded = encode_vhost(VHost), +vhost_store_dir(VHost) -> + Dir = rabbit_mnesia:dir(), + VhostEncoded = encode_vhost_name(VHost), binary_to_list(filename:join([Dir, VhostEncoded])). -encode_vhost(VHost) -> +encode_vhost_name(VHost) -> base64:encode(VHost). successfully_recovered_state(Name, VHost) -> - VHostName = get_vhost_name(Name, VHost), + VHostName = vhost_store_name(Name, VHost), rabbit_msg_store:successfully_recovered_state(VHostName). % force_recovery diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index f87205532c09..9427db78c46f 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -479,13 +479,13 @@ stop() -> ok = rabbit_queue_index:stop(). start_msg_store(Refs, StartFunState) -> - VHosts = rabbit_vhost:list(), ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, - [?TRANSIENT_MSG_STORE_SUP, rabbit_mnesia:dir(), + [?TRANSIENT_MSG_STORE_SUP, undefined, {fun (ok) -> finished end, ok}]), ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, - [?PERSISTENT_MSG_STORE_SUP, rabbit_mnesia:dir(), - Refs, StartFunState]), + [?PERSISTENT_MSG_STORE_SUP, Refs, StartFunState]), + %% Start message store for all known vhosts + VHosts = rabbit_vhost:list(), lists:foreach( fun(VHost) -> rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE_SUP, VHost), @@ -2792,7 +2792,7 @@ start_new_store_sup() -> % Start persistent store sup without recovery. ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, - [?PERSISTENT_MSG_STORE_SUP, rabbit_mnesia:dir(), + [?PERSISTENT_MSG_STORE_SUP, undefined, {fun (ok) -> finished end, ok}]), ?PERSISTENT_MSG_STORE_SUP. From 460abf24db53b0137271cb24db41ec520fabc75b Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 2 Jun 2016 13:40:34 +0100 Subject: [PATCH 10/47] Update tests --- src/rabbit_vm.erl | 4 ++-- test/channel_operation_timeout_test_queue.erl | 4 ++-- test/unit_inbroker_SUITE.erl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rabbit_vm.erl b/src/rabbit_vm.erl index 9c8732bb6b62..1e3e65abc462 100644 --- a/src/rabbit_vm.erl +++ b/src/rabbit_vm.erl @@ -43,7 +43,7 @@ memory() -> || Names <- distinguished_interesting_sups()], Mnesia = mnesia_memory(), - MsgIndexETS = ets_memory([msg_store_persistent, msg_store_transient]), + MsgIndexETS = ets_memory([msg_store_persistent_vhost, msg_store_transient_vhost]), MgmtDbETS = ets_memory([rabbit_mgmt_event_collector]), [{total, Total}, @@ -141,7 +141,7 @@ interesting_sups() -> [[rabbit_amqqueue_sup_sup], conn_sups() | interesting_sups0()]. interesting_sups0() -> - MsgIndexProcs = [msg_store_transient, msg_store_persistent], + MsgIndexProcs = [msg_store_transient_vhost, msg_store_persistent_vhost], MgmtDbProcs = [rabbit_mgmt_sup_sup], PluginProcs = plugin_sups(), [MsgIndexProcs, MgmtDbProcs, PluginProcs]. diff --git a/test/channel_operation_timeout_test_queue.erl b/test/channel_operation_timeout_test_queue.erl index 4407a24e7fbf..3a284c592bf5 100644 --- a/test/channel_operation_timeout_test_queue.erl +++ b/test/channel_operation_timeout_test_queue.erl @@ -111,8 +111,8 @@ }). -define(HEADER_GUESS_SIZE, 100). %% see determine_persist_to/2 --define(PERSISTENT_MSG_STORE, msg_store_persistent). --define(TRANSIENT_MSG_STORE, msg_store_transient). +-define(PERSISTENT_MSG_STORE, msg_store_persistent_vhost). +-define(TRANSIENT_MSG_STORE, msg_store_transient_vhost). -define(QUEUE, lqueue). -define(TIMEOUT_TEST_MSG, <<"timeout_test_msg!">>). diff --git a/test/unit_inbroker_SUITE.erl b/test/unit_inbroker_SUITE.erl index 7b783facd245..a82887d8df7e 100644 --- a/test/unit_inbroker_SUITE.erl +++ b/test/unit_inbroker_SUITE.erl @@ -22,8 +22,8 @@ -compile(export_all). --define(PERSISTENT_MSG_STORE, msg_store_persistent). --define(TRANSIENT_MSG_STORE, msg_store_transient). +-define(PERSISTENT_MSG_STORE, msg_store_persistent_vhost). +-define(TRANSIENT_MSG_STORE, msg_store_transient_vhost). -define(TIMEOUT_LIST_OPS_PASS, 5000). -define(TIMEOUT, 30000). From fdd7b871df020cd138623bd8bffab7077440cbd5 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 2 Jun 2016 15:13:39 +0100 Subject: [PATCH 11/47] Updated tests to support per-vhost message store --- test/channel_operation_timeout_test_queue.erl | 42 +++++++++++++------ test/unit_inbroker_SUITE.erl | 10 ++--- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/test/channel_operation_timeout_test_queue.erl b/test/channel_operation_timeout_test_queue.erl index 3a284c592bf5..07b1235672bb 100644 --- a/test/channel_operation_timeout_test_queue.erl +++ b/test/channel_operation_timeout_test_queue.erl @@ -230,12 +230,21 @@ stop() -> ok = rabbit_queue_index:stop(). start_msg_store(Refs, StartFunState) -> - ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store, + ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE, rabbit_msg_store_vhost_sup, [?TRANSIENT_MSG_STORE, rabbit_mnesia:dir(), undefined, {fun (ok) -> finished end, ok}]), - ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store, + ok = rabbit_sup:start_child(?PERSISTENT_MSG_STORE, rabbit_msg_store_vhost_sup, [?PERSISTENT_MSG_STORE, rabbit_mnesia:dir(), - Refs, StartFunState]). + Refs, StartFunState]), + %% Start message store for all known vhosts + VHosts = rabbit_vhost:list(), + lists:foreach( + fun(VHost) -> + rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE, VHost), + rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE, VHost) + end, + VHosts), + ok. stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE), @@ -254,22 +263,26 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, new, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) -> IndexState = rabbit_queue_index:init(QueueName, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), + VHost = QueueName#resource.virtual_host, init(IsDurable, IndexState, 0, 0, [], case IsDurable of true -> msg_store_client_init(?PERSISTENT_MSG_STORE, - MsgOnDiskFun, AsyncCallback); + MsgOnDiskFun, AsyncCallback, + VHost); false -> undefined end, - msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, AsyncCallback)); + msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, AsyncCallback, VHost)); %% We can be recovering a transient queue if it crashed init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, AsyncCallback, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) -> {PRef, RecoveryTerms} = process_recovery_terms(Terms), + VHost = QueueName#resource.virtual_host, {PersistentClient, ContainsCheckFun} = case IsDurable of true -> C = msg_store_client_init(?PERSISTENT_MSG_STORE, PRef, - MsgOnDiskFun, AsyncCallback), + MsgOnDiskFun, AsyncCallback, + VHost), {C, fun (MsgId) when is_binary(MsgId) -> rabbit_msg_store:contains(MsgId, C); (#basic_message{is_persistent = Persistent}) -> @@ -278,11 +291,12 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, false -> {undefined, fun(_MsgId) -> false end} end, TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, - undefined, AsyncCallback), + undefined, AsyncCallback, + VHost), {DeltaCount, DeltaBytes, IndexState} = rabbit_queue_index:recover( QueueName, RecoveryTerms, - rabbit_msg_store:successfully_recovered_state(?PERSISTENT_MSG_STORE), + rabbit_msg_store_vhost_sup:successfully_recovered_state(?PERSISTENT_MSG_STORE), ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), init(IsDurable, IndexState, DeltaCount, DeltaBytes, RecoveryTerms, PersistentClient, TransientClient). @@ -957,14 +971,16 @@ with_immutable_msg_store_state(MSCState, IsPersistent, Fun) -> end), Res. -msg_store_client_init(MsgStore, MsgOnDiskFun, Callback) -> +msg_store_client_init(MsgStore, MsgOnDiskFun, Callback, VHost) -> msg_store_client_init(MsgStore, rabbit_guid:gen(), MsgOnDiskFun, - Callback). + Callback, VHost). -msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback) -> +msg_store_client_init(MsgStore, Ref, MsgOnDiskFun, Callback, VHost) -> CloseFDsFun = msg_store_close_fds_fun(MsgStore =:= ?PERSISTENT_MSG_STORE), - rabbit_msg_store:client_init(MsgStore, Ref, MsgOnDiskFun, - fun () -> Callback(?MODULE, CloseFDsFun) end). + rabbit_msg_store_vhost_sup:client_init( + MsgStore, Ref, MsgOnDiskFun, + fun () -> Callback(?MODULE, CloseFDsFun) end, + VHost). msg_store_write(MSCState, IsPersistent, MsgId, Msg) -> with_immutable_msg_store_state( diff --git a/test/unit_inbroker_SUITE.erl b/test/unit_inbroker_SUITE.erl index a82887d8df7e..569c7a88fa0c 100644 --- a/test/unit_inbroker_SUITE.erl +++ b/test/unit_inbroker_SUITE.erl @@ -468,10 +468,10 @@ on_disk_stop(Pid) -> msg_store_client_init_capture(MsgStore, Ref) -> Pid = spawn(fun on_disk_capture/0), - {Pid, rabbit_msg_store:client_init( + {Pid, rabbit_msg_store_vhost_sup:client_init( MsgStore, Ref, fun (MsgIds, _ActionTaken) -> Pid ! {on_disk, MsgIds} - end, undefined)}. + end, undefined, <<"/">>)}. msg_store_contains(Atom, MsgIds, MSCState) -> Atom = lists:foldl( @@ -548,14 +548,14 @@ test_msg_store_confirm_timer() -> Ref = rabbit_guid:gen(), MsgId = msg_id_bin(1), Self = self(), - MSCState = rabbit_msg_store:client_init( + MSCState = rabbit_msg_store_vhost_sup:client_init( ?PERSISTENT_MSG_STORE, Ref, fun (MsgIds, _ActionTaken) -> case gb_sets:is_member(MsgId, MsgIds) of true -> Self ! on_disk; false -> ok end - end, undefined), + end, undefined, <<"/">>), ok = msg_store_write([MsgId], MSCState), ok = msg_store_keep_busy_until_confirm([msg_id_bin(2)], MSCState, false), ok = msg_store_remove([MsgId], MSCState), @@ -1424,7 +1424,7 @@ nop(_) -> ok. nop(_, _) -> ok. msg_store_client_init(MsgStore, Ref) -> - rabbit_msg_store:client_init(MsgStore, Ref, undefined, undefined). + rabbit_msg_store_vhost_sup:client_init(MsgStore, Ref, undefined, undefined, <<"/">>). variable_queue_init(Q, Recover) -> rabbit_variable_queue:init( From e6c76fd377c92cd9c4e1cdd6ab6ed6da785a9271 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 19 Oct 2016 11:16:34 +0100 Subject: [PATCH 12/47] Queue index per vhost location --- src/rabbit_msg_store_vhost_sup.erl | 7 ++---- src/rabbit_queue_index.erl | 34 ++++++++++++++---------------- src/rabbit_vhost.erl | 6 ++++++ 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index a33ffed56070..834b9cfb047d 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -42,19 +42,16 @@ maybe_start_vhost(Server, VHost) -> VHostName. vhost_store_name(Name, VHost) -> - VhostEncoded = encode_vhost_name(VHost), + VhostEncoded = rabbit_vhost:dir(VHost), binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, "_", VhostEncoded/binary>>, utf8). vhost_store_dir(VHost) -> Dir = rabbit_mnesia:dir(), - VhostEncoded = encode_vhost_name(VHost), + VhostEncoded = rabbit_vhost:dir(VHost), binary_to_list(filename:join([Dir, VhostEncoded])). -encode_vhost_name(VHost) -> - base64:encode(VHost). - successfully_recovered_state(Name, VHost) -> VHostName = vhost_store_name(Name, VHost), rabbit_msg_store:successfully_recovered_state(VHostName). diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 3e9d92f5c11c..a835bd4779bd 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -476,11 +476,10 @@ start(DurableQueueNames) -> end, {[], sets:new()}, DurableQueueNames), %% Any queue directory we've not been asked to recover is considered garbage - QueuesDir = queues_dir(), rabbit_file:recursive_delete( - [filename:join(QueuesDir, DirName) || - DirName <- all_queue_directory_names(QueuesDir), - not sets:is_element(DirName, DurableDirectories)]), + [DirName || + DirName <- all_queue_directory_names(), + not sets:is_element(filename:basename(DirName), DurableDirectories)]), rabbit_recovery_terms:clear(), @@ -491,12 +490,9 @@ start(DurableQueueNames) -> stop() -> rabbit_recovery_terms:stop(). -all_queue_directory_names(Dir) -> - case rabbit_file:list_dir(Dir) of - {ok, Entries} -> [E || E <- Entries, - rabbit_file:is_dir(filename:join(Dir, E))]; - {error, enoent} -> [] - end. +all_queue_directory_names() -> + QueuesBaseDir = queues_base_dir(), + filelib:wildcard(filename:join([QueuesBaseDir, "*", "queues", "*"])). %%---------------------------------------------------------------------------- %% startup and shutdown @@ -509,14 +505,18 @@ erase_index_dir(Dir) -> end. blank_state(QueueName) -> - blank_state_dir( - filename:join(queues_dir(), queue_name_to_dir_name(QueueName))). + blank_state_dir(queue_dir(QueueName)). blank_state_dir(Dir) -> blank_state_dir_funs(Dir, fun (_) -> ok end, fun (_) -> ok end). +queue_dir(#resource{ virtual_host = VHost } = QueueName) -> + %% Queue directory is rabbit_mnesia_dir/:vhost/queues/:queue_id + filename:join([queues_base_dir(), rabbit_vhost:dir(VHost), + "queues", queue_name_to_dir_name(QueueName)]). + blank_state_dir_funs(Dir, OnSyncFun, OnSyncMsgFun) -> {ok, MaxJournal} = application:get_env(rabbit, queue_index_max_journal_entries), @@ -630,8 +630,8 @@ queue_name_to_dir_name(Name = #resource { kind = queue }) -> <> = erlang:md5(term_to_binary(Name)), rabbit_misc:format("~.36B", [Num]). -queues_dir() -> - filename:join(rabbit_mnesia:dir(), "queues"). +queues_base_dir() -> + rabbit_mnesia:dir(). %%---------------------------------------------------------------------------- %% msg store startup delta function @@ -1353,15 +1353,13 @@ store_msg_segment(_) -> %%---------------------------------------------------------------------------- foreach_queue_index(Funs) -> - QueuesDir = queues_dir(), - QueueDirNames = all_queue_directory_names(QueuesDir), + QueueDirNames = all_queue_directory_names(), {ok, Gatherer} = gatherer:start_link(), [begin ok = gatherer:fork(Gatherer), ok = worker_pool:submit_async( fun () -> - transform_queue(filename:join(QueuesDir, QueueDirName), - Gatherer, Funs) + transform_queue(QueueDirName, Gatherer, Funs) end) end || QueueDirName <- QueueDirNames], empty = gatherer:out(Gatherer), diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 01f1046fb8b9..ddcd69049dd8 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -23,6 +23,7 @@ -export([add/1, delete/1, exists/1, list/0, with/2, assert/1, update/2, set_limits/2, limits_of/1]). -export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). +-export([dir/1]). -spec add(rabbit_types:vhost()) -> 'ok'. @@ -93,6 +94,8 @@ delete(VHostPath) -> with(VHostPath, fun () -> internal_delete(VHostPath) end)), ok = rabbit_event:notify(vhost_deleted, [{name, VHostPath}]), [ok = Fun() || Fun <- Funs], + rabbit_file:recursive_delete([filename:join(rabbit_mnesia:dir(), + VHostPath)]), ok. assert_benign(ok) -> ok; @@ -185,3 +188,6 @@ info_all(Ref, AggregatorPid) -> info_all(?INFO_KEYS, Ref, AggregatorPid). info_all(Items, Ref, AggregatorPid) -> rabbit_control_misc:emitting_map( AggregatorPid, Ref, fun(VHost) -> info(VHost, Items) end, list()). + +dir(Vhost) -> + base64:encode(Vhost). \ No newline at end of file From 4f12ebeb53c72d25ecb9f1b5bf0feda1fcf4523f Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 19 Oct 2016 14:45:52 +0100 Subject: [PATCH 13/47] Tests for per-vhost message store. Stop vhost message store when deleting vhost --- src/rabbit_msg_store_vhost_sup.erl | 10 +- src/rabbit_variable_queue.erl | 8 +- src/rabbit_vhost.erl | 6 +- test/per_vhost_msg_store_SUITE.erl | 255 +++++++++++++++++++++++++++++ 4 files changed, 274 insertions(+), 5 deletions(-) create mode 100644 test/per_vhost_msg_store_SUITE.erl diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 834b9cfb047d..8e514d506603 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -2,7 +2,8 @@ -behaviour(supervisor2). --export([start_link/3, init/1, add_vhost/2, client_init/5, successfully_recovered_state/2]). +-export([start_link/3, init/1, add_vhost/2, delete_vhost/2, + client_init/5, successfully_recovered_state/2]). %% Internal -export([start_vhost/4]). @@ -28,6 +29,13 @@ start_vhost(Name, ClientRefs, StartupFunState, VHost) -> rabbit_msg_store:start_link(VHostName, VHostDir, ClientRefs, StartupFunState). +delete_vhost(Name, VHost) -> + VHostName = vhost_store_name(Name, VHost), + case whereis(VHostName) of + undefined -> ok; + Pid -> supervisor2:terminate_child(Name, Pid) + end, + ok. client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> VHostName = maybe_start_vhost(Server, VHost), diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 9427db78c46f..45d9f97792b4 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -34,6 +34,7 @@ -export([start_msg_store/2, stop_msg_store/0, init/6]). -export([move_messages_to_vhost_store/0]). +-export([stop_vhost_msg_store/1]). -include_lib("stdlib/include/qlc.hrl"). %%---------------------------------------------------------------------------- @@ -460,8 +461,6 @@ %% Public API %%---------------------------------------------------------------------------- - - start(DurableQueues) -> {AllTerms, StartFunState} = rabbit_queue_index:start(DurableQueues), start_msg_store( @@ -498,6 +497,11 @@ stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE_SUP), ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE_SUP). +stop_vhost_msg_store(VHost) -> + rabbit_msg_store_vhost_sup:delete_vhost(?TRANSIENT_MSG_STORE_SUP, VHost), + rabbit_msg_store_vhost_sup:delete_vhost(?PERSISTENT_MSG_STORE_SUP, VHost), + ok. + init(Queue, Recover, Callback) -> init( Queue, Recover, Callback, diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index ddcd69049dd8..45511a12b741 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -94,8 +94,10 @@ delete(VHostPath) -> with(VHostPath, fun () -> internal_delete(VHostPath) end)), ok = rabbit_event:notify(vhost_deleted, [{name, VHostPath}]), [ok = Fun() || Fun <- Funs], - rabbit_file:recursive_delete([filename:join(rabbit_mnesia:dir(), - VHostPath)]), + VhostDir = filename:join(rabbit_mnesia:dir(), dir(VHostPath)), + rabbit_log:info("Deleting vhost directory '~s'~n", [VhostDir]), + rabbit_variable_queue:stop_vhost_msg_store(VHostPath), + ok = rabbit_file:recursive_delete([VhostDir]), ok. assert_benign(ok) -> ok; diff --git a/test/per_vhost_msg_store_SUITE.erl b/test/per_vhost_msg_store_SUITE.erl new file mode 100644 index 000000000000..4a890ce3ea9b --- /dev/null +++ b/test/per_vhost_msg_store_SUITE.erl @@ -0,0 +1,255 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is GoPivotal, Inc. +%% Copyright (c) 2007-2016 Pivotal Software, Inc. All rights reserved. +%% + +-module(per_vhost_msg_store_SUITE). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("amqp_client/include/amqp_client.hrl"). + +-compile(export_all). + +-define(MSGS_COUNT, 100). + +all() -> + [ + publish_to_different_dirs, + storage_deleted_on_vhost_delete, + single_vhost_storage_delete_is_safe + ]. + + + +init_per_suite(Config) -> + rabbit_ct_helpers:log_environment(), + Config1 = rabbit_ct_helpers:set_config( + Config, + [{rmq_nodename_suffix, ?MODULE}]), + Config2 = rabbit_ct_helpers:merge_app_env( + Config1, + {rabbit, [{queue_index_embed_msgs_below, 100}]}), + rabbit_ct_helpers:run_setup_steps( + Config2, + rabbit_ct_broker_helpers:setup_steps() ++ + rabbit_ct_client_helpers:setup_steps()). + +end_per_suite(Config) -> + rabbit_ct_helpers:run_teardown_steps( + Config, + rabbit_ct_client_helpers:teardown_steps() ++ + rabbit_ct_broker_helpers:teardown_steps()). + +init_per_testcase(_, Config) -> + Vhost1 = <<"vhost1">>, + Vhost2 = <<"vhost2">>, + rabbit_ct_broker_helpers:add_vhost(Config, Vhost1), + rabbit_ct_broker_helpers:add_vhost(Config, Vhost2), + Chan1 = open_channel(Vhost1, Config), + Chan2 = open_channel(Vhost2, Config), + rabbit_ct_helpers:set_config( + Config, + [{vhost1, Vhost1}, {vhost2, Vhost2}, + {channel1, Chan1}, {channel2, Chan2}]). + +end_per_testcase(single_vhost_storage_delete_is_safe, Config) -> + Config; +end_per_testcase(_, Config) -> + Vhost1 = ?config(vhost1, Config), + Vhost2 = ?config(vhost2, Config), + rabbit_ct_broker_helpers:delete_vhost(Config, Vhost1), + rabbit_ct_broker_helpers:delete_vhost(Config, Vhost2), + Config. + +publish_to_different_dirs(Config) -> + Vhost1 = ?config(vhost1, Config), + Vhost2 = ?config(vhost2, Config), + Channel1 = ?config(channel1, Config), + Channel2 = ?config(channel2, Config), + Queue1 = declare_durable_queue(Channel1), + Queue2 = declare_durable_queue(Channel2), + FolderSize1 = get_folder_size(Vhost1, Config), + FolderSize2 = get_folder_size(Vhost2, Config), + + %% Publish message to a queue index + publish_persistent_messages(index, Channel1, Queue1), + %% First storage increased + FolderSize11 = get_folder_size(Vhost1, Config), + true = (FolderSize1 < FolderSize11), + %% Second storage didn't increased + FolderSize2 = get_folder_size(Vhost2, Config), + + %% Publish message to a message store + publish_persistent_messages(store, Channel1, Queue1), + %% First storage increased + FolderSize12 = get_folder_size(Vhost1, Config), + true = (FolderSize11 < FolderSize12), + %% Second storage didn't increased + FolderSize2 = get_folder_size(Vhost2, Config), + + %% Publish message to a queue index + publish_persistent_messages(index, Channel2, Queue2), + %% First storage increased + FolderSize21 = get_folder_size(Vhost2, Config), + true = (FolderSize2 < FolderSize21), + %% Second storage didn't increased + FolderSize12 = get_folder_size(Vhost1, Config), + + %% Publish message to a message store + publish_persistent_messages(store, Channel2, Queue2), + %% Second storage increased + FolderSize22 = get_folder_size(Vhost2, Config), + true = (FolderSize21 < FolderSize22), + %% First storage didn't increased + FolderSize12 = get_folder_size(Vhost1, Config). + +storage_deleted_on_vhost_delete(Config) -> + Vhost1 = ?config(vhost1, Config), + Channel1 = ?config(channel1, Config), + Queue1 = declare_durable_queue(Channel1), + FolderSize = get_global_folder_size(Config), + + publish_persistent_messages(index, Channel1, Queue1), + publish_persistent_messages(store, Channel1, Queue1), + FolderSizeAfterPublish = get_global_folder_size(Config), + + %% Total storage size increased + true = (FolderSize < FolderSizeAfterPublish), + + ok = rabbit_ct_broker_helpers:delete_vhost(Config, Vhost1), + + %% Total memory reduced + FolderSizeAfterDelete = get_global_folder_size(Config), + true = (FolderSizeAfterPublish > FolderSizeAfterDelete), + + %% There is no Vhost1 folder + 0 = get_folder_size(Vhost1, Config). + + +single_vhost_storage_delete_is_safe(Config) -> +ct:pal("Start test 3", []), + Vhost1 = ?config(vhost1, Config), + Vhost2 = ?config(vhost2, Config), + Channel1 = ?config(channel1, Config), + Channel2 = ?config(channel2, Config), + Queue1 = declare_durable_queue(Channel1), + Queue2 = declare_durable_queue(Channel2), + + %% Publish messages to both stores + publish_persistent_messages(index, Channel1, Queue1), + publish_persistent_messages(store, Channel1, Queue1), + publish_persistent_messages(index, Channel2, Queue2), + publish_persistent_messages(store, Channel2, Queue2), + + queue_is_not_empty(Channel2, Queue2), + % Vhost2Dir = vhost_dir(Vhost2, Config), + % [StoreFile] = filelib:wildcard(binary_to_list(filename:join([Vhost2Dir, "msg_store_persistent_*", "0.rdq"]))), + % ct:pal("Store file ~p~n", [file:read_file(StoreFile)]). +% ok. + rabbit_ct_broker_helpers:stop_broker(Config, 0), + delete_vhost_data(Vhost1, Config), + rabbit_ct_broker_helpers:start_broker(Config, 0), + + Channel11 = open_channel(Vhost1, Config), + Channel21 = open_channel(Vhost2, Config), + + %% There are no Vhost1 messages + queue_is_empty(Channel11, Queue1), + + %% But Vhost2 messages are in place + queue_is_not_empty(Channel21, Queue2), + consume_messages(index, Channel21, Queue2), + consume_messages(store, Channel21, Queue2). + +declare_durable_queue(Channel) -> + QName = list_to_binary(erlang:ref_to_list(make_ref())), + #'queue.declare_ok'{queue = QName} = + amqp_channel:call(Channel, + #'queue.declare'{queue = QName, durable = true}), + QName. + +publish_persistent_messages(Storage, Channel, Queue) -> + MessagePayload = case Storage of + index -> binary:copy(<<"=">>, 50); + store -> binary:copy(<<"-">>, 150) + end, + amqp_channel:call(Channel, #'confirm.select'{}), + [amqp_channel:call(Channel, + #'basic.publish'{routing_key = Queue}, + #amqp_msg{props = #'P_basic'{delivery_mode = 2}, + payload = MessagePayload}) + || _ <- lists:seq(1, ?MSGS_COUNT)], + amqp_channel:wait_for_confirms(Channel). + + +get_folder_size(Vhost, Config) -> + Dir = vhost_dir(Vhost, Config), + folder_size(Dir). + +folder_size(Dir) -> + filelib:fold_files(Dir, ".*", true, + fun(F,Acc) -> filelib:file_size(F) + Acc end, 0). + +get_global_folder_size(Config) -> + BaseDir = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_mnesia, dir, []), + folder_size(BaseDir). + +vhost_dir(Vhost, Config) -> + BaseDir = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_mnesia, dir, []), + VhostDir = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_vhost, dir, [Vhost]), + filename:join(BaseDir, VhostDir). + +delete_vhost_data(Vhost, Config) -> + Dir = vhost_dir(Vhost, Config), + rabbit_file:recursive_delete([Dir]). + +queue_is_empty(Channel, Queue) -> + #'queue.declare_ok'{queue = Queue, message_count = 0} = + amqp_channel:call(Channel, + #'queue.declare'{ queue = Queue, + durable = true, + passive = true}). + +queue_is_not_empty(Channel, Queue) -> + #'queue.declare_ok'{queue = Queue, message_count = MsgCount} = + amqp_channel:call(Channel, + #'queue.declare'{ queue = Queue, + durable = true, + passive = true}), + ExpectedCount = ?MSGS_COUNT * 2, + ExpectedCount = MsgCount. + +consume_messages(Storage, Channel, Queue) -> + MessagePayload = case Storage of + index -> binary:copy(<<"=">>, 50); + store -> binary:copy(<<"-">>, 150) + end, + lists:foreach( + fun(I) -> + ct:pal("Consume message ~p~n ~p~n", [I, MessagePayload]), + {#'basic.get_ok'{}, Content} = + amqp_channel:call(Channel, + #'basic.get'{queue = Queue, + no_ack = true}), + #amqp_msg{payload = MessagePayload} = Content + end, + lists:seq(1, ?MSGS_COUNT)), + ok. + +open_channel(Vhost, Config) -> + Node = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), + {ok, Conn} = amqp_connection:start( + #amqp_params_direct{node = Node, virtual_host = Vhost}), + {ok, Chan} = amqp_connection:open_channel(Conn), + Chan. \ No newline at end of file From fd4db66c2c65b2de20b81ef89bc7a87dcea0f209 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 19 Oct 2016 17:47:04 +0100 Subject: [PATCH 14/47] Terminate message store ignoring file save errors --- src/rabbit_msg_store.erl | 22 ++++++++++++++++++---- src/rabbit_msg_store_ets_index.erl | 10 ++++++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index f033dea5f0c3..71a7d9f15aeb 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -995,12 +995,26 @@ terminate(_Reason, State = #msstate { index_state = IndexState, State2 end, State3 = close_all_handles(State1), - ok = store_file_summary(FileSummaryEts, Dir), + %% Let file summary saving fail. + case store_file_summary(FileSummaryEts, Dir) of + ok -> ok; + {error, FSErr} -> + rabbit_log:error("Unable to store file summary" + " for vhost message store for directory ~p~n" + " Error: ~p~n", + [Dir, FSErr]) + end, [true = ets:delete(T) || T <- [FileSummaryEts, FileHandlesEts, CurFileCacheEts, FlyingEts]], IndexModule:terminate(IndexState), - ok = store_recovery_terms([{client_refs, dict:fetch_keys(Clients)}, - {index_module, IndexModule}], Dir), + case store_recovery_terms([{client_refs, dict:fetch_keys(Clients)}, + {index_module, IndexModule}], Dir) of + ok -> ok; + {error, RTErr} -> + rabbit_log:error("Unable to save message store recovery terms" + "for directory ~p~n Error: ~p~n", + [Dir, RTErr]) + end, State3 #msstate { index_state = undefined, current_file_handle = undefined }. @@ -1582,7 +1596,7 @@ read_recovery_terms(Dir) -> end. store_file_summary(Tid, Dir) -> - ok = ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME), + ets:tab2file(Tid, filename:join(Dir, ?FILE_SUMMARY_FILENAME), [{extended_info, [object_count]}]). recover_file_summary(false, _Dir) -> diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl index 76ef112069c5..39c0cc96bd51 100644 --- a/src/rabbit_msg_store_ets_index.erl +++ b/src/rabbit_msg_store_ets_index.erl @@ -74,6 +74,12 @@ delete_by_file(File, State) -> ok. terminate(#state { table = MsgLocations, dir = Dir }) -> - ok = ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME), - [{extended_info, [object_count]}]), + case ets:tab2file(MsgLocations, filename:join(Dir, ?FILENAME), + [{extended_info, [object_count]}]) of + ok -> ok; + {error, Err} -> + rabbit_log:error("Unable to save message store index" + " for directory ~p~n Error: ~p~n", + [Dir, Err]) + end, ets:delete(MsgLocations). From 8369950c887c817a2096a3c2fdc941e32547ebce Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 19 Oct 2016 17:56:33 +0100 Subject: [PATCH 15/47] Allow restart of a vhost message store. purge_messages function to clean message storage (together with message indexes) for a vhost --- src/rabbit_msg_store_vhost_sup.erl | 2 +- src/rabbit_vhost.erl | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 8e514d506603..d60004caeff4 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -13,7 +13,7 @@ start_link(Name, ClientRefs, StartupFunState) -> [Name, ClientRefs, StartupFunState]). init([Name, ClientRefs, StartupFunState]) -> - {ok, {{simple_one_for_one, 0, 1}, + {ok, {{simple_one_for_one, 1, 1}, [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_vhost, [Name, ClientRefs, StartupFunState]}, transient, infinity, supervisor, [rabbit_msg_store]}]}}. diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 45511a12b741..a277c275ed32 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -24,7 +24,7 @@ set_limits/2, limits_of/1]). -export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). -export([dir/1]). - +-export([purge_messages/1]). -spec add(rabbit_types:vhost()) -> 'ok'. -spec delete(rabbit_types:vhost()) -> 'ok'. @@ -94,11 +94,18 @@ delete(VHostPath) -> with(VHostPath, fun () -> internal_delete(VHostPath) end)), ok = rabbit_event:notify(vhost_deleted, [{name, VHostPath}]), [ok = Fun() || Fun <- Funs], + purge_messages(VHostPath), + ok. + +purge_messages(VHostPath) -> VhostDir = filename:join(rabbit_mnesia:dir(), dir(VHostPath)), rabbit_log:info("Deleting vhost directory '~s'~n", [VhostDir]), + %% Message store is stopped to close file handles rabbit_variable_queue:stop_vhost_msg_store(VHostPath), ok = rabbit_file:recursive_delete([VhostDir]), - ok. + %% Second terminate is made in case message store is + %% restarted during deletion + rabbit_variable_queue:stop_vhost_msg_store(VHostPath). assert_benign(ok) -> ok; assert_benign({ok, _}) -> ok; From 1c81c509d738b9ba85baf48f579b734468d72c2d Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 20 Oct 2016 11:04:52 +0100 Subject: [PATCH 16/47] Move queues to vhost storage on upgrade --- src/rabbit_queue_index.erl | 17 +++++++++++++++++ src/rabbit_variable_queue.erl | 1 + 2 files changed, 18 insertions(+) diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index a835bd4779bd..3827080a2f86 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -25,6 +25,9 @@ -export([add_queue_ttl/0, avoid_zeroes/0, store_msg_size/0, store_msg/0]). -export([scan_queue_segments/3]). +%% Migration to per-vhost message store +-export([move_to_vhost_store/1]). + -define(CLEAN_FILENAME, "clean.dot"). %%---------------------------------------------------------------------------- @@ -1400,3 +1403,17 @@ drive_transform_fun(Fun, Hdl, Contents) -> {Output, Contents1} -> ok = file_handle_cache:append(Hdl, Output), drive_transform_fun(Fun, Hdl, Contents1) end. + +move_to_vhost_store(#resource{} = QueueName) -> + OldQueueDir = filename:join([queues_base_dir(), "queues", + queue_name_to_dir_name(QueueName)]), + NewQueueDir = queue_dir(QueueName), + case rabbit_file:is_dir(OldQueueDir) of + true -> + ok = rabbit_file:ensure_dir(NewQueueDir), + ok = rabbit_file:rename(OldQueueDir, NewQueueDir); + false -> + rabbit_log:info("Queue index directoy not found for queue ~p~n", + [QueueName]) + end, + ok. diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 45d9f97792b4..b65154bb4801 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2721,6 +2721,7 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> OldStoreClient = get_old_client(OldStore), NewStoreClient = get_new_store_client(Queue, NewStoreSup), #amqqueue{name = QueueName} = Queue, + rabbit_queue_index:move_to_vhost_store(QueueName), %% WARNING: During scan_queue_segments queue index state is being recovered %% and terminated. This can cause side effects! rabbit_queue_index:scan_queue_segments( From 8a45f409c3177505b342ad33f0c3d84cb04359db Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 20 Oct 2016 11:07:04 +0100 Subject: [PATCH 17/47] Migration to per-vhost store message --- src/rabbit_variable_queue.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index b65154bb4801..961246058ea4 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2688,7 +2688,7 @@ transform_store(Store, TransformFun) -> rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). move_messages_to_vhost_store() -> -rabbit_log:error("MIGRATING!!"), + rabbit_log:info("Moving messages to per-vhsot message store"), Queues = list_persistent_queues(), %% Old msg_store may require recovery. %% This upgrade step should only be started From 36f3e6791cb12ba5ab448a330baa11b3eae64e9a Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 20 Oct 2016 11:10:39 +0100 Subject: [PATCH 18/47] Purge vhost storage in transaction --- src/rabbit_vhost.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index a277c275ed32..2f323fc41b2f 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -94,7 +94,6 @@ delete(VHostPath) -> with(VHostPath, fun () -> internal_delete(VHostPath) end)), ok = rabbit_event:notify(vhost_deleted, [{name, VHostPath}]), [ok = Fun() || Fun <- Funs], - purge_messages(VHostPath), ok. purge_messages(VHostPath) -> @@ -129,6 +128,7 @@ internal_delete(VHostPath) -> Fs2 = [rabbit_policy:delete(VHostPath, proplists:get_value(name, Info)) || Info <- rabbit_policy:list(VHostPath)], ok = mnesia:delete({rabbit_vhost, VHostPath}), + purge_messages(VHostPath), Fs1 ++ Fs2. exists(VHostPath) -> From 008c4eb7f0a9b14838439db934eb98dbe7801dbb Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Wed, 9 Nov 2016 12:22:34 -0800 Subject: [PATCH 19/47] Rename an upgrade function --- src/rabbit.erl | 4 ++-- src/rabbit_upgrade.erl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index a7bebfc1276d..2581fd49f6bb 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -147,9 +147,9 @@ {enables, routing_ready}]}). -rabbit_boot_step({upgrade_queues, - [{description, "codec correctness check"}, + [{description, "per-vhost message store migration"}, {mfa, {rabbit_upgrade, - maybe_upgrade_queues, + maybe_migrate_queues_to_per_vhost_storage, []}}, {requires, [core_initialized]}, {enables, recovery}]}). diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index f5437e6b817f..8b7d11e4faf4 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -17,7 +17,7 @@ -module(rabbit_upgrade). -export([maybe_upgrade_mnesia/0, maybe_upgrade_local/0, - maybe_upgrade_queues/0, + maybe_migrate_queues_to_per_vhost_storage/0, nodes_running/1, secondary_upgrade/1]). -include("rabbit.hrl"). @@ -253,7 +253,7 @@ maybe_upgrade_local() -> %% ------------------------------------------------------------------- -maybe_upgrade_queues() -> +maybe_migrate_queues_to_per_vhost_storage() -> case rabbit_version:upgrades_required(queues) of {error, version_not_available} -> version_not_available; {error, starting_from_scratch} -> starting_from_scratch; From 05e9dbebf54711bc5e13baa0da9f08fb5f738b91 Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Fri, 11 Nov 2016 16:59:36 +0300 Subject: [PATCH 20/47] Naming and cosmetics --- src/rabbit_msg_store.erl | 9 ++++----- src/rabbit_msg_store_ets_index.erl | 2 +- src/rabbit_msg_store_vhost_sup.erl | 10 +++++----- src/rabbit_queue_index.erl | 8 ++++---- src/rabbit_variable_queue.erl | 2 +- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index 71a7d9f15aeb..b0beb975e83c 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -718,7 +718,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> Dir = filename:join(BaseDir, atom_to_list(Server)), - {ok, IndexModule} = application:get_env(rabbit,msg_store_index_module), + {ok, IndexModule} = application:get_env(rabbit, msg_store_index_module), rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]), AttemptFileSummaryRecovery = @@ -758,7 +758,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> DyingIndex = ets:new(rabbit_msg_store_dying_client_index, [set, public, {keypos, #dying_client.client_ref}]), - {ok, FileSizeLimit} = application:get_env(rabbit,msg_store_file_size_limit), + {ok, FileSizeLimit} = application:get_env(rabbit, msg_store_file_size_limit), {ok, GCPid} = rabbit_msg_store_gc:start_link( #gc_state { dir = Dir, @@ -995,13 +995,12 @@ terminate(_Reason, State = #msstate { index_state = IndexState, State2 end, State3 = close_all_handles(State1), - %% Let file summary saving fail. case store_file_summary(FileSummaryEts, Dir) of ok -> ok; {error, FSErr} -> rabbit_log:error("Unable to store file summary" " for vhost message store for directory ~p~n" - " Error: ~p~n", + "Error: ~p~n", [Dir, FSErr]) end, [true = ets:delete(T) || T <- [FileSummaryEts, FileHandlesEts, @@ -1012,7 +1011,7 @@ terminate(_Reason, State = #msstate { index_state = IndexState, ok -> ok; {error, RTErr} -> rabbit_log:error("Unable to save message store recovery terms" - "for directory ~p~n Error: ~p~n", + "for directory ~p~nError: ~p~n", [Dir, RTErr]) end, State3 #msstate { index_state = undefined, diff --git a/src/rabbit_msg_store_ets_index.erl b/src/rabbit_msg_store_ets_index.erl index 39c0cc96bd51..0e8b7174e248 100644 --- a/src/rabbit_msg_store_ets_index.erl +++ b/src/rabbit_msg_store_ets_index.erl @@ -79,7 +79,7 @@ terminate(#state { table = MsgLocations, dir = Dir }) -> ok -> ok; {error, Err} -> rabbit_log:error("Unable to save message store index" - " for directory ~p~n Error: ~p~n", + " for directory ~p.~nError: ~p~n", [Dir, Err]) end, ets:delete(MsgLocations). diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index d60004caeff4..a5be43109354 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -50,19 +50,19 @@ maybe_start_vhost(Server, VHost) -> VHostName. vhost_store_name(Name, VHost) -> - VhostEncoded = rabbit_vhost:dir(VHost), + Base64EncodedName = rabbit_vhost:dir(VHost), binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, "_", - VhostEncoded/binary>>, + Base64EncodedName/binary>>, utf8). vhost_store_dir(VHost) -> Dir = rabbit_mnesia:dir(), - VhostEncoded = rabbit_vhost:dir(VHost), - binary_to_list(filename:join([Dir, VhostEncoded])). + Base64EncodedName = rabbit_vhost:dir(VHost), + binary_to_list(filename:join([Dir, Base64EncodedName])). successfully_recovered_state(Name, VHost) -> VHostName = vhost_store_name(Name, VHost), rabbit_msg_store:successfully_recovered_state(VHostName). % force_recovery -% transform_dir \ No newline at end of file +% transform_dir diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 3827080a2f86..d19694403c78 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -25,8 +25,8 @@ -export([add_queue_ttl/0, avoid_zeroes/0, store_msg_size/0, store_msg/0]). -export([scan_queue_segments/3]). -%% Migration to per-vhost message store --export([move_to_vhost_store/1]). +%% Migrates from global to per-vhost message stores +-export([move_to_per_vhost_stores/1]). -define(CLEAN_FILENAME, "clean.dot"). @@ -516,7 +516,7 @@ blank_state_dir(Dir) -> fun (_) -> ok end). queue_dir(#resource{ virtual_host = VHost } = QueueName) -> - %% Queue directory is rabbit_mnesia_dir/:vhost/queues/:queue_id + %% Queue directory is {node_database_dir}/{vhost}/queues/{queue} filename:join([queues_base_dir(), rabbit_vhost:dir(VHost), "queues", queue_name_to_dir_name(QueueName)]). @@ -1404,7 +1404,7 @@ drive_transform_fun(Fun, Hdl, Contents) -> drive_transform_fun(Fun, Hdl, Contents1) end. -move_to_vhost_store(#resource{} = QueueName) -> +move_to_per_vhost_stores(#resource{} = QueueName) -> OldQueueDir = filename:join([queues_base_dir(), "queues", queue_name_to_dir_name(QueueName)]), NewQueueDir = queue_dir(QueueName), diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 95604d08f4b8..41ee6cde4cd2 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2746,7 +2746,7 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> OldStoreClient = get_old_client(OldStore), NewStoreClient = get_new_store_client(Queue, NewStoreSup), #amqqueue{name = QueueName} = Queue, - rabbit_queue_index:move_to_vhost_store(QueueName), + rabbit_queue_index:move_to_per_vhost_stores(QueueName), %% WARNING: During scan_queue_segments queue index state is being recovered %% and terminated. This can cause side effects! rabbit_queue_index:scan_queue_segments( From cc75e5672ad23b63fb7c1f93e263b706a95d0cf6 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Fri, 11 Nov 2016 18:00:36 +0000 Subject: [PATCH 21/47] Avoid atom exhaustion in vhost message stores --- src/rabbit_msg_store.erl | 32 ++++---- src/rabbit_msg_store_vhost_sup.erl | 77 ++++++++++++------- test/channel_operation_timeout_test_queue.erl | 2 +- 3 files changed, 65 insertions(+), 46 deletions(-) diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index b0beb975e83c..1e929c7f7c56 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -474,15 +474,15 @@ %% public API %%---------------------------------------------------------------------------- -start_link(Server, Dir, ClientRefs, StartupFunState) -> - gen_server2:start_link({local, Server}, ?MODULE, - [Server, Dir, ClientRefs, StartupFunState], +start_link(Name, Dir, ClientRefs, StartupFunState) when is_atom(Name) -> + gen_server2:start_link(?MODULE, + [Name, Dir, ClientRefs, StartupFunState], [{timeout, infinity}]). successfully_recovered_state(Server) -> gen_server2:call(Server, successfully_recovered_state, infinity). -client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) -> +client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) when is_pid(Server) -> {IState, IModule, Dir, GCPid, FileHandlesEts, FileSummaryEts, CurFileCacheEts, FlyingEts} = gen_server2:call( @@ -522,7 +522,7 @@ write_flow(MsgId, Msg, %% rabbit_amqqueue_process process via the %% rabbit_variable_queue. We are accessing the %% rabbit_amqqueue_process process dictionary. - credit_flow:send(whereis(Server), CreditDiscBound), + credit_flow:send(Server, CreditDiscBound), client_write(MsgId, Msg, flow, CState). write(MsgId, Msg, CState) -> client_write(MsgId, Msg, noflow, CState). @@ -548,7 +548,7 @@ remove(MsgIds, CState = #client_msstate { client_ref = CRef }) -> [client_update_flying(-1, MsgId, CState) || MsgId <- MsgIds], server_cast(CState, {remove, CRef, MsgIds}). -set_maximum_since_use(Server, Age) -> +set_maximum_since_use(Server, Age) when is_pid(Server) -> gen_server2:cast(Server, {set_maximum_since_use, Age}). %%---------------------------------------------------------------------------- @@ -710,16 +710,16 @@ clear_client(CRef, State = #msstate { cref_to_msg_ids = CTM, %% gen_server callbacks %%---------------------------------------------------------------------------- -init([Server, BaseDir, ClientRefs, StartupFunState]) -> +init([Name, BaseDir, ClientRefs, StartupFunState]) -> process_flag(trap_exit, true), ok = file_handle_cache:register_callback(?MODULE, set_maximum_since_use, [self()]), - Dir = filename:join(BaseDir, atom_to_list(Server)), + Dir = filename:join(BaseDir, atom_to_list(Name)), {ok, IndexModule} = application:get_env(rabbit, msg_store_index_module), - rabbit_log:info("~w: using ~p to provide index~n", [Server, IndexModule]), + rabbit_log:info("~tp: using ~p to provide index~n", [Dir, IndexModule]), AttemptFileSummaryRecovery = case ClientRefs of @@ -738,7 +738,7 @@ init([Server, BaseDir, ClientRefs, StartupFunState]) -> {CleanShutdown, IndexState, ClientRefs1} = recover_index_and_client_refs(IndexModule, FileSummaryRecovered, - ClientRefs, Dir, Server), + ClientRefs, Dir), Clients = dict:from_list( [{CRef, {undefined, undefined, undefined}} || CRef <- ClientRefs1]), @@ -1551,16 +1551,16 @@ index_delete_by_file(File, #msstate { index_module = Index, %% shutdown and recovery %%---------------------------------------------------------------------------- -recover_index_and_client_refs(IndexModule, _Recover, undefined, Dir, _Server) -> +recover_index_and_client_refs(IndexModule, _Recover, undefined, Dir) -> {false, IndexModule:new(Dir), []}; -recover_index_and_client_refs(IndexModule, false, _ClientRefs, Dir, Server) -> - rabbit_log:warning("~w: rebuilding indices from scratch~n", [Server]), +recover_index_and_client_refs(IndexModule, false, _ClientRefs, Dir) -> + rabbit_log:warning("~tp : rebuilding indices from scratch~n", [Dir]), {false, IndexModule:new(Dir), []}; -recover_index_and_client_refs(IndexModule, true, ClientRefs, Dir, Server) -> +recover_index_and_client_refs(IndexModule, true, ClientRefs, Dir) -> Fresh = fun (ErrorMsg, ErrorArgs) -> - rabbit_log:warning("~w: " ++ ErrorMsg ++ "~n" + rabbit_log:warning("~tp : " ++ ErrorMsg ++ "~n" "rebuilding indices from scratch~n", - [Server | ErrorArgs]), + [Dir | ErrorArgs]), {false, IndexModule:new(Dir), []} end, case read_recovery_terms(Dir) of diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index a5be43109354..5784f199caef 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -13,6 +13,7 @@ start_link(Name, ClientRefs, StartupFunState) -> [Name, ClientRefs, StartupFunState]). init([Name, ClientRefs, StartupFunState]) -> + ets:new(Name, [named_table, public]), {ok, {{simple_one_for_one, 1, 1}, [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_vhost, [Name, ClientRefs, StartupFunState]}, @@ -23,37 +24,54 @@ add_vhost(Name, VHost) -> supervisor2:start_child(Name, [VHost]). start_vhost(Name, ClientRefs, StartupFunState, VHost) -> - VHostName = vhost_store_name(Name, VHost), - VHostDir = vhost_store_dir(VHost), - ok = rabbit_file:ensure_dir(VHostDir), - rabbit_msg_store:start_link(VHostName, VHostDir, - ClientRefs, StartupFunState). + case vhost_store_pid(Name, VHost) of + no_pid -> + VHostDir = vhost_store_dir(VHost), + ok = rabbit_file:ensure_dir(VHostDir), + case rabbit_msg_store:start_link(Name, VHostDir, ClientRefs, StartupFunState) of + {ok, Pid} -> + ets:insert(Name, {VHost, Pid}), + {ok, Pid}; + Other -> Other + end; + Pid when is_pid(Pid) -> + {error, {already_started, Pid}} + end. delete_vhost(Name, VHost) -> - VHostName = vhost_store_name(Name, VHost), - case whereis(VHostName) of - undefined -> ok; - Pid -> supervisor2:terminate_child(Name, Pid) + case vhost_store_pid(Name, VHost) of + no_pid -> ok; + Pid when is_pid(Pid) -> + supervisor2:terminate_child(Name, Pid), + cleanup_vhost_store(Name, VHost, Pid) end, ok. -client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> - VHostName = maybe_start_vhost(Server, VHost), - rabbit_msg_store:client_init(VHostName, Ref, MsgOnDiskFun, CloseFDsFun). +client_init(Name, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> + VHostPid = maybe_start_vhost(Name, VHost), + rabbit_msg_store:client_init(VHostPid, Ref, MsgOnDiskFun, CloseFDsFun). -maybe_start_vhost(Server, VHost) -> - VHostName = vhost_store_name(Server, VHost), - case whereis(VHostName) of - undefined -> add_vhost(Server, VHost); - _ -> ok - end, - VHostName. +maybe_start_vhost(Name, VHost) -> + case add_vhost(Name, VHost) of + {ok, Pid} -> Pid; + {error, {already_started, Pid}} -> Pid; + Error -> throw(Error) + end. -vhost_store_name(Name, VHost) -> - Base64EncodedName = rabbit_vhost:dir(VHost), - binary_to_atom(<<(atom_to_binary(Name, utf8))/binary, "_", - Base64EncodedName/binary>>, - utf8). +vhost_store_pid(Name, VHost) -> + case ets:lookup(Name, VHost) of + [] -> no_pid; + [Pid] -> + case erlang:is_process_alive(Pid) of + true -> Pid; + false -> + cleanup_vhost_store(Name, VHost, Pid), + no_pid + end + end. + +cleanup_vhost_store(Name, VHost, Pid) -> + ets:delete_object(Name, {VHost, Pid}). vhost_store_dir(VHost) -> Dir = rabbit_mnesia:dir(), @@ -61,8 +79,9 @@ vhost_store_dir(VHost) -> binary_to_list(filename:join([Dir, Base64EncodedName])). successfully_recovered_state(Name, VHost) -> - VHostName = vhost_store_name(Name, VHost), - rabbit_msg_store:successfully_recovered_state(VHostName). - -% force_recovery -% transform_dir + case vhost_store_pid(Name, VHost) of + no_pid -> + throw({message_store_not_started, Name, VHost}); + Pid when is_pid(Pid) -> + rabbit_msg_store:successfully_recovered_state(Pid) + end. diff --git a/test/channel_operation_timeout_test_queue.erl b/test/channel_operation_timeout_test_queue.erl index 07b1235672bb..5e256b138162 100644 --- a/test/channel_operation_timeout_test_queue.erl +++ b/test/channel_operation_timeout_test_queue.erl @@ -296,7 +296,7 @@ init(#amqqueue { name = QueueName, durable = IsDurable }, Terms, {DeltaCount, DeltaBytes, IndexState} = rabbit_queue_index:recover( QueueName, RecoveryTerms, - rabbit_msg_store_vhost_sup:successfully_recovered_state(?PERSISTENT_MSG_STORE), + rabbit_msg_store_vhost_sup:successfully_recovered_state(?PERSISTENT_MSG_STORE, VHost), ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), init(IsDurable, IndexState, DeltaCount, DeltaBytes, RecoveryTerms, PersistentClient, TransientClient). From eea4fc3fd423d2b5c624ecf440e7bfcbed6ff044 Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Fri, 11 Nov 2016 20:08:38 +0300 Subject: [PATCH 22/47] Use a fixed size encoding fn for vhost directory names --- src/rabbit_msg_store_vhost_sup.erl | 10 +++++----- src/rabbit_queue_index.erl | 2 +- src/rabbit_vhost.erl | 9 +++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 5784f199caef..06034819b487 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -73,11 +73,6 @@ vhost_store_pid(Name, VHost) -> cleanup_vhost_store(Name, VHost, Pid) -> ets:delete_object(Name, {VHost, Pid}). -vhost_store_dir(VHost) -> - Dir = rabbit_mnesia:dir(), - Base64EncodedName = rabbit_vhost:dir(VHost), - binary_to_list(filename:join([Dir, Base64EncodedName])). - successfully_recovered_state(Name, VHost) -> case vhost_store_pid(Name, VHost) of no_pid -> @@ -85,3 +80,8 @@ successfully_recovered_state(Name, VHost) -> Pid when is_pid(Pid) -> rabbit_msg_store:successfully_recovered_state(Pid) end. + +vhost_store_dir(VHost) -> + Dir = rabbit_mnesia:dir(), + EncodedName = list_to_binary(rabbit_vhost:vhost_name_to_dir_name(VHost)), + binary_to_list(filename:join([Dir, EncodedName])). diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index d19694403c78..8bd44cb1edad 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -517,7 +517,7 @@ blank_state_dir(Dir) -> queue_dir(#resource{ virtual_host = VHost } = QueueName) -> %% Queue directory is {node_database_dir}/{vhost}/queues/{queue} - filename:join([queues_base_dir(), rabbit_vhost:dir(VHost), + filename:join([queues_base_dir(), rabbit_vhost:vhost_name_to_dir_name(VHost), "queues", queue_name_to_dir_name(QueueName)]). blank_state_dir_funs(Dir, OnSyncFun, OnSyncMsgFun) -> diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 2f323fc41b2f..e6df4e9364bb 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -23,7 +23,7 @@ -export([add/1, delete/1, exists/1, list/0, with/2, assert/1, update/2, set_limits/2, limits_of/1]). -export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). --export([dir/1]). +-export([vhost_name_to_dir_name/1]). -export([purge_messages/1]). -spec add(rabbit_types:vhost()) -> 'ok'. @@ -97,7 +97,7 @@ delete(VHostPath) -> ok. purge_messages(VHostPath) -> - VhostDir = filename:join(rabbit_mnesia:dir(), dir(VHostPath)), + VhostDir = filename:join(rabbit_mnesia:dir(), vhost_name_to_dir_name(VHostPath)), rabbit_log:info("Deleting vhost directory '~s'~n", [VhostDir]), %% Message store is stopped to close file handles rabbit_variable_queue:stop_vhost_msg_store(VHostPath), @@ -198,5 +198,6 @@ info_all(Items, Ref, AggregatorPid) -> rabbit_control_misc:emitting_map( AggregatorPid, Ref, fun(VHost) -> info(VHost, Items) end, list()). -dir(Vhost) -> - base64:encode(Vhost). \ No newline at end of file +vhost_name_to_dir_name(Vhost) -> + <> = erlang:md5(term_to_binary(Vhost)), + rabbit_misc:format("~.36B", [Num]). From 1643c344b872ee43317928f6e579adb568cb45db Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Fri, 11 Nov 2016 20:32:23 +0300 Subject: [PATCH 23/47] Renames --- src/rabbit_msg_store_vhost_sup.erl | 4 ++-- src/rabbit_variable_queue.erl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 06034819b487..601b8746e727 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -48,10 +48,10 @@ delete_vhost(Name, VHost) -> ok. client_init(Name, Ref, MsgOnDiskFun, CloseFDsFun, VHost) -> - VHostPid = maybe_start_vhost(Name, VHost), + VHostPid = maybe_start_store_for_vhost(Name, VHost), rabbit_msg_store:client_init(VHostPid, Ref, MsgOnDiskFun, CloseFDsFun). -maybe_start_vhost(Name, VHost) -> +maybe_start_store_for_vhost(Name, VHost) -> case add_vhost(Name, VHost) of {ok, Pid} -> Pid; {error, {already_started, Pid}} -> Pid; diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 41ee6cde4cd2..899515488642 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2743,8 +2743,8 @@ move_messages_to_vhost_store() -> ok = rabbit_sup:stop_child(NewStoreSup). migrate_queue(Queue, OldStore, NewStoreSup) -> - OldStoreClient = get_old_client(OldStore), - NewStoreClient = get_new_store_client(Queue, NewStoreSup), + OldStoreClient = get_global_store_client(OldStore), + NewStoreClient = get_per_vhost_store_client(Queue, NewStoreSup), #amqqueue{name = QueueName} = Queue, rabbit_queue_index:move_to_per_vhost_stores(QueueName), %% WARNING: During scan_queue_segments queue index state is being recovered @@ -2775,15 +2775,15 @@ migrate_message(MsgId, OldC, NewC) -> _ -> OldC end. -get_new_store_client(#amqqueue{name = #resource{virtual_host = VHost}}, - NewStoreSup) -> +get_per_vhost_store_client(#amqqueue{name = #resource{virtual_host = VHost}}, + NewStoreSup) -> rabbit_msg_store_vhost_sup:client_init(NewStoreSup, rabbit_guid:gen(), fun(_,_) -> ok end, fun() -> ok end, VHost). -get_old_client(OldStore) -> +get_global_store_client(OldStore) -> rabbit_msg_store:client_init(OldStore, rabbit_guid:gen(), fun(_,_) -> ok end, From 460d413aca94ce0bd68200efa2edefbfef2f1cbe Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Fri, 11 Nov 2016 21:30:29 +0300 Subject: [PATCH 24/47] We get a tuple from ETS here, not just a pid --- src/rabbit_msg_store_vhost_sup.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 601b8746e727..d9211d21a322 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -61,7 +61,7 @@ maybe_start_store_for_vhost(Name, VHost) -> vhost_store_pid(Name, VHost) -> case ets:lookup(Name, VHost) of [] -> no_pid; - [Pid] -> + [{VHost, Pid}] -> case erlang:is_process_alive(Pid) of true -> Pid; false -> From 245fd0804d451cef7f55a07e517b8ba176294e82 Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Sat, 12 Nov 2016 17:13:09 +0300 Subject: [PATCH 25/47] Fix test expectations, more renames, more logging --- src/rabbit_msg_store_vhost_sup.erl | 9 +++++---- src/rabbit_queue_index.erl | 2 +- src/rabbit_vhost.erl | 8 ++++---- test/per_vhost_msg_store_SUITE.erl | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index d9211d21a322..6b4988b06d54 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -6,7 +6,7 @@ client_init/5, successfully_recovered_state/2]). %% Internal --export([start_vhost/4]). +-export([start_store_for_vhost/4]). start_link(Name, ClientRefs, StartupFunState) -> supervisor2:start_link({local, Name}, ?MODULE, @@ -15,7 +15,7 @@ start_link(Name, ClientRefs, StartupFunState) -> init([Name, ClientRefs, StartupFunState]) -> ets:new(Name, [named_table, public]), {ok, {{simple_one_for_one, 1, 1}, - [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_vhost, + [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_store_for_vhost, [Name, ClientRefs, StartupFunState]}, transient, infinity, supervisor, [rabbit_msg_store]}]}}. @@ -23,11 +23,12 @@ init([Name, ClientRefs, StartupFunState]) -> add_vhost(Name, VHost) -> supervisor2:start_child(Name, [VHost]). -start_vhost(Name, ClientRefs, StartupFunState, VHost) -> +start_store_for_vhost(Name, ClientRefs, StartupFunState, VHost) -> case vhost_store_pid(Name, VHost) of no_pid -> VHostDir = vhost_store_dir(VHost), ok = rabbit_file:ensure_dir(VHostDir), + rabbit_log:info("Making sure message store directory '~s' for vhost '~s' exists~n", [VHostDir, VHost]), case rabbit_msg_store:start_link(Name, VHostDir, ClientRefs, StartupFunState) of {ok, Pid} -> ets:insert(Name, {VHost, Pid}), @@ -83,5 +84,5 @@ successfully_recovered_state(Name, VHost) -> vhost_store_dir(VHost) -> Dir = rabbit_mnesia:dir(), - EncodedName = list_to_binary(rabbit_vhost:vhost_name_to_dir_name(VHost)), + EncodedName = list_to_binary(rabbit_vhost:dir(VHost)), binary_to_list(filename:join([Dir, EncodedName])). diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 8bd44cb1edad..d19694403c78 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -517,7 +517,7 @@ blank_state_dir(Dir) -> queue_dir(#resource{ virtual_host = VHost } = QueueName) -> %% Queue directory is {node_database_dir}/{vhost}/queues/{queue} - filename:join([queues_base_dir(), rabbit_vhost:vhost_name_to_dir_name(VHost), + filename:join([queues_base_dir(), rabbit_vhost:dir(VHost), "queues", queue_name_to_dir_name(QueueName)]). blank_state_dir_funs(Dir, OnSyncFun, OnSyncMsgFun) -> diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index e6df4e9364bb..26fdaa0db879 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -23,7 +23,7 @@ -export([add/1, delete/1, exists/1, list/0, with/2, assert/1, update/2, set_limits/2, limits_of/1]). -export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). --export([vhost_name_to_dir_name/1]). +-export([dir/1]). -export([purge_messages/1]). -spec add(rabbit_types:vhost()) -> 'ok'. @@ -97,8 +97,8 @@ delete(VHostPath) -> ok. purge_messages(VHostPath) -> - VhostDir = filename:join(rabbit_mnesia:dir(), vhost_name_to_dir_name(VHostPath)), - rabbit_log:info("Deleting vhost directory '~s'~n", [VhostDir]), + VhostDir = filename:join(rabbit_mnesia:dir(), dir(VHostPath)), + rabbit_log:info("Deleting vhost message store directory at '~s'~n", [VhostDir]), %% Message store is stopped to close file handles rabbit_variable_queue:stop_vhost_msg_store(VHostPath), ok = rabbit_file:recursive_delete([VhostDir]), @@ -198,6 +198,6 @@ info_all(Items, Ref, AggregatorPid) -> rabbit_control_misc:emitting_map( AggregatorPid, Ref, fun(VHost) -> info(VHost, Items) end, list()). -vhost_name_to_dir_name(Vhost) -> +dir(Vhost) -> <> = erlang:md5(term_to_binary(Vhost)), rabbit_misc:format("~.36B", [Num]). diff --git a/test/per_vhost_msg_store_SUITE.erl b/test/per_vhost_msg_store_SUITE.erl index 4a890ce3ea9b..2a13440564d0 100644 --- a/test/per_vhost_msg_store_SUITE.erl +++ b/test/per_vhost_msg_store_SUITE.erl @@ -252,4 +252,4 @@ open_channel(Vhost, Config) -> {ok, Conn} = amqp_connection:start( #amqp_params_direct{node = Node, virtual_host = Vhost}), {ok, Chan} = amqp_connection:open_channel(Conn), - Chan. \ No newline at end of file + Chan. From 7a84a2492f541166591eee8f412c09911b4e13ab Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Sun, 13 Nov 2016 01:01:09 +0300 Subject: [PATCH 26/47] Towards functional message store migration fn --- src/rabbit_msg_store.erl | 11 ++++++++--- src/rabbit_msg_store_vhost_sup.erl | 7 +------ src/rabbit_sup.erl | 10 +++++++++- src/rabbit_variable_queue.erl | 10 ++++++---- src/rabbit_vhost.erl | 30 +++++++++++++++++++----------- 5 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index 1e929c7f7c56..cb87af70e15c 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -18,7 +18,7 @@ -behaviour(gen_server2). --export([start_link/4, successfully_recovered_state/1, +-export([start_link/4, start_global_store_link/4, successfully_recovered_state/1, client_init/4, client_terminate/1, client_delete_and_terminate/1, client_ref/1, close_all_indicated/1, write/3, write_flow/3, read/2, contains/2, remove/2]). @@ -479,10 +479,15 @@ start_link(Name, Dir, ClientRefs, StartupFunState) when is_atom(Name) -> [Name, Dir, ClientRefs, StartupFunState], [{timeout, infinity}]). +start_global_store_link(Name, Dir, ClientRefs, StartupFunState) when is_atom(Name) -> + gen_server2:start_link({local, Name}, ?MODULE, + [Name, Dir, ClientRefs, StartupFunState], + [{timeout, infinity}]). + successfully_recovered_state(Server) -> gen_server2:call(Server, successfully_recovered_state, infinity). -client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) when is_pid(Server) -> +client_init(Server, Ref, MsgOnDiskFun, CloseFDsFun) when is_pid(Server); is_atom(Server) -> {IState, IModule, Dir, GCPid, FileHandlesEts, FileSummaryEts, CurFileCacheEts, FlyingEts} = gen_server2:call( @@ -548,7 +553,7 @@ remove(MsgIds, CState = #client_msstate { client_ref = CRef }) -> [client_update_flying(-1, MsgId, CState) || MsgId <- MsgIds], server_cast(CState, {remove, CRef, MsgIds}). -set_maximum_since_use(Server, Age) when is_pid(Server) -> +set_maximum_since_use(Server, Age) when is_pid(Server); is_atom(Server) -> gen_server2:cast(Server, {set_maximum_since_use, Age}). %%---------------------------------------------------------------------------- diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 6b4988b06d54..65bf2c74e81a 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -26,7 +26,7 @@ add_vhost(Name, VHost) -> start_store_for_vhost(Name, ClientRefs, StartupFunState, VHost) -> case vhost_store_pid(Name, VHost) of no_pid -> - VHostDir = vhost_store_dir(VHost), + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), ok = rabbit_file:ensure_dir(VHostDir), rabbit_log:info("Making sure message store directory '~s' for vhost '~s' exists~n", [VHostDir, VHost]), case rabbit_msg_store:start_link(Name, VHostDir, ClientRefs, StartupFunState) of @@ -81,8 +81,3 @@ successfully_recovered_state(Name, VHost) -> Pid when is_pid(Pid) -> rabbit_msg_store:successfully_recovered_state(Pid) end. - -vhost_store_dir(VHost) -> - Dir = rabbit_mnesia:dir(), - EncodedName = list_to_binary(rabbit_vhost:dir(VHost)), - binary_to_list(filename:join([Dir, EncodedName])). diff --git a/src/rabbit_sup.erl b/src/rabbit_sup.erl index ad70540e5b26..a457938dc94f 100644 --- a/src/rabbit_sup.erl +++ b/src/rabbit_sup.erl @@ -18,7 +18,7 @@ -behaviour(supervisor). --export([start_link/0, start_child/1, start_child/2, start_child/3, +-export([start_link/0, start_child/1, start_child/2, start_child/3, start_child/4, start_supervisor_child/1, start_supervisor_child/2, start_supervisor_child/3, start_restartable_child/1, start_restartable_child/2, @@ -37,6 +37,7 @@ -spec start_child(atom()) -> 'ok'. -spec start_child(atom(), [any()]) -> 'ok'. -spec start_child(atom(), atom(), [any()]) -> 'ok'. +-spec start_child(atom(), atom(), atom(), [any()]) -> 'ok'. -spec start_supervisor_child(atom()) -> 'ok'. -spec start_supervisor_child(atom(), [any()]) -> 'ok'. -spec start_supervisor_child(atom(), atom(), [any()]) -> 'ok'. @@ -60,6 +61,13 @@ start_child(ChildId, Mod, Args) -> {ChildId, {Mod, start_link, Args}, transient, ?WORKER_WAIT, worker, [Mod]})). +start_child(ChildId, Mod, Fun, Args) -> + child_reply(supervisor:start_child( + ?SERVER, + {ChildId, {Mod, Fun, Args}, + transient, ?WORKER_WAIT, worker, [Mod]})). + + start_supervisor_child(Mod) -> start_supervisor_child(Mod, []). start_supervisor_child(Mod, Args) -> start_supervisor_child(Mod, Mod, Args). diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 899515488642..fc8dee83854a 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2715,9 +2715,9 @@ transform_store(Store, TransformFun) -> move_messages_to_vhost_store() -> rabbit_log:info("Moving messages to per-vhsot message store"), Queues = list_persistent_queues(), - %% Old msg_store may require recovery. + %% Legacy (global) msg_store may require recovery. %% This upgrade step should only be started - %% if we are upgrading from version with old store. + %% if we are upgrading from a pre-3.7.0 version. {RecoveryTerms, StartFunState} = start_recovery_terms(Queues), OldStore = run_old_persistent_store(RecoveryTerms, StartFunState), %% New store should not be recovered. @@ -2743,9 +2743,11 @@ move_messages_to_vhost_store() -> ok = rabbit_sup:stop_child(NewStoreSup). migrate_queue(Queue, OldStore, NewStoreSup) -> + #amqqueue{name = QueueName} = Queue, + rabbit_log:info("Migrating messages in queue ~s in vhost ~s to per-vhost message store~n", + [QueueName#resource.name, QueueName#resource.virtual_host]), OldStoreClient = get_global_store_client(OldStore), NewStoreClient = get_per_vhost_store_client(Queue, NewStoreSup), - #amqqueue{name = QueueName} = Queue, rabbit_queue_index:move_to_per_vhost_stores(QueueName), %% WARNING: During scan_queue_segments queue index state is being recovered %% and terminated. This can cause side effects! @@ -2813,7 +2815,7 @@ start_recovery_terms(Queues) -> run_old_persistent_store(Refs, StartFunState) -> OldStoreName = ?PERSISTENT_MSG_STORE, - ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, + ok = rabbit_sup:start_child(OldStoreName, rabbit_msg_store, start_global_store_link, [OldStoreName, rabbit_mnesia:dir(), Refs, StartFunState]), OldStoreName. diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index 26fdaa0db879..bbf77f290fab 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -23,7 +23,7 @@ -export([add/1, delete/1, exists/1, list/0, with/2, assert/1, update/2, set_limits/2, limits_of/1]). -export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). --export([dir/1]). +-export([dir/1, msg_store_dir_path/1]). -export([purge_messages/1]). -spec add(rabbit_types:vhost()) -> 'ok'. @@ -96,15 +96,15 @@ delete(VHostPath) -> [ok = Fun() || Fun <- Funs], ok. -purge_messages(VHostPath) -> - VhostDir = filename:join(rabbit_mnesia:dir(), dir(VHostPath)), - rabbit_log:info("Deleting vhost message store directory at '~s'~n", [VhostDir]), +purge_messages(VHost) -> + VhostDir = msg_store_dir_path(VHost), + rabbit_log:info("Deleting message store directory for vhost '~s' at '~s'~n", [VHost, VhostDir]), %% Message store is stopped to close file handles - rabbit_variable_queue:stop_vhost_msg_store(VHostPath), + rabbit_variable_queue:stop_vhost_msg_store(VHost), ok = rabbit_file:recursive_delete([VhostDir]), - %% Second terminate is made in case message store is - %% restarted during deletion - rabbit_variable_queue:stop_vhost_msg_store(VHostPath). + %% Ensure the store is terminated even if it was restarted during the delete operation + %% above. + rabbit_variable_queue:stop_vhost_msg_store(VHost). assert_benign(ok) -> ok; assert_benign({ok, _}) -> ok; @@ -179,6 +179,17 @@ set_limits(VHost = #vhost{}, undefined) -> set_limits(VHost = #vhost{}, Limits) -> VHost#vhost{limits = Limits}. + +dir(Vhost) -> + <> = erlang:md5(term_to_binary(Vhost)), + rabbit_misc:format("~.36B", [Num]). + +msg_store_dir_path(VHost) -> + Dir = rabbit_mnesia:dir(), + EncodedName = list_to_binary(dir(VHost)), + binary_to_list(filename:join([Dir, "msg_stores", "vhosts", EncodedName])). + + %%---------------------------------------------------------------------------- infos(Items, X) -> [{Item, i(Item, X)} || Item <- Items]. @@ -198,6 +209,3 @@ info_all(Items, Ref, AggregatorPid) -> rabbit_control_misc:emitting_map( AggregatorPid, Ref, fun(VHost) -> info(VHost, Items) end, list()). -dir(Vhost) -> - <> = erlang:md5(term_to_binary(Vhost)), - rabbit_misc:format("~.36B", [Num]). From f6feb6ba16e9132194a991ef233c663a734f4e24 Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Sun, 13 Nov 2016 02:57:13 +0300 Subject: [PATCH 27/47] Cosmetics --- src/rabbit_msg_store.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index cb87af70e15c..81364c68bc9d 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -63,7 +63,7 @@ %% the module for index ops, %% rabbit_msg_store_ets_index by default index_module, - %% %% where are messages? + %% where are messages? index_state, %% current file name as number current_file, From c135b70c4690ddf3fdbafb9e0a8191cb2312eaae Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Mon, 14 Nov 2016 13:11:13 +0000 Subject: [PATCH 28/47] New dir path for queue index data --- src/rabbit_queue_index.erl | 12 +++++++----- src/rabbit_variable_queue.erl | 2 +- src/rabbit_vhost.erl | 12 +++++++++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index d19694403c78..07ae20f19e47 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -494,8 +494,8 @@ start(DurableQueueNames) -> stop() -> rabbit_recovery_terms:stop(). all_queue_directory_names() -> - QueuesBaseDir = queues_base_dir(), - filelib:wildcard(filename:join([QueuesBaseDir, "*", "queues", "*"])). + filelib:wildcard(filename:join([rabbit_vhost:msg_store_dir_wildcard(), + "queues", "*"])). %%---------------------------------------------------------------------------- %% startup and shutdown @@ -516,9 +516,11 @@ blank_state_dir(Dir) -> fun (_) -> ok end). queue_dir(#resource{ virtual_host = VHost } = QueueName) -> - %% Queue directory is {node_database_dir}/{vhost}/queues/{queue} - filename:join([queues_base_dir(), rabbit_vhost:dir(VHost), - "queues", queue_name_to_dir_name(QueueName)]). + %% Queue directory is + %% {node_database_dir}/msg_stores/vhosts/{vhost}/queues/{queue} + VHostDir = rabbit_vhost:msg_store_dir_path(VHost), + QueueDir = queue_name_to_dir_name(QueueName), + filename:join([VHostDir, "queues", QueueDir]). blank_state_dir_funs(Dir, OnSyncFun, OnSyncMsgFun) -> {ok, MaxJournal} = diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index fc8dee83854a..7f57c4320c39 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2713,7 +2713,7 @@ transform_store(Store, TransformFun) -> rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). move_messages_to_vhost_store() -> - rabbit_log:info("Moving messages to per-vhsot message store"), + rabbit_log:info("Moving messages to per-vhost message store"), Queues = list_persistent_queues(), %% Legacy (global) msg_store may require recovery. %% This upgrade step should only be started diff --git a/src/rabbit_vhost.erl b/src/rabbit_vhost.erl index bbf77f290fab..26b8143feccd 100644 --- a/src/rabbit_vhost.erl +++ b/src/rabbit_vhost.erl @@ -23,7 +23,7 @@ -export([add/1, delete/1, exists/1, list/0, with/2, assert/1, update/2, set_limits/2, limits_of/1]). -export([info/1, info/2, info_all/0, info_all/1, info_all/2, info_all/3]). --export([dir/1, msg_store_dir_path/1]). +-export([dir/1, msg_store_dir_path/1, msg_store_dir_wildcard/0]). -export([purge_messages/1]). -spec add(rabbit_types:vhost()) -> 'ok'. @@ -185,10 +185,16 @@ dir(Vhost) -> rabbit_misc:format("~.36B", [Num]). msg_store_dir_path(VHost) -> - Dir = rabbit_mnesia:dir(), EncodedName = list_to_binary(dir(VHost)), - binary_to_list(filename:join([Dir, "msg_stores", "vhosts", EncodedName])). + rabbit_data_coercion:to_list(filename:join([msg_store_dir_base(), + EncodedName])). + +msg_store_dir_wildcard() -> + rabbit_data_coercion:to_list(filename:join([msg_store_dir_base(), "*"])). +msg_store_dir_base() -> + Dir = rabbit_mnesia:dir(), + filename:join([Dir, "msg_stores", "vhosts"]). %%---------------------------------------------------------------------------- From c3ae58154b231c81a341358ef2db438f316be578 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 24 Nov 2016 15:10:33 +0000 Subject: [PATCH 29/47] Rollback to sequential queue migration. Move queue directories before migrating messages --- src/rabbit_variable_queue.erl | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 7f57c4320c39..c711760417e4 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2715,6 +2715,13 @@ transform_store(Store, TransformFun) -> move_messages_to_vhost_store() -> rabbit_log:info("Moving messages to per-vhost message store"), Queues = list_persistent_queues(), + %% Move the queue index for each persistent queue to the new store + lists:map( + fun(Queue) -> + #amqqueue{name = QueueName} = Queue, + rabbit_queue_index:move_to_per_vhost_stores(QueueName) + end, + Queues), %% Legacy (global) msg_store may require recovery. %% This upgrade step should only be started %% if we are upgrading from a pre-3.7.0 version. @@ -2723,19 +2730,22 @@ move_messages_to_vhost_store() -> %% New store should not be recovered. NewStoreSup = start_new_store_sup(), - {ok, Gatherer} = gatherer:start_link(), + % {ok, Gatherer} = gatherer:start_link(), lists:map( fun(Queue) -> - ok = gatherer:fork(Gatherer), - ok = worker_pool:submit_async( - fun () -> - migrate_queue(Queue, OldStore, NewStoreSup), - gatherer:finish(Gatherer) - end) + migrate_queue(Queue, OldStore, NewStoreSup), + #amqqueue{name = QueueName} = Queue, + rabbit_log:info("Queue migration finished ~p", [QueueName]) + % ok = gatherer:fork(Gatherer), + % ok = worker_pool:submit_async( + % fun () -> + % migrate_queue(Queue, OldStore, NewStoreSup), + % gatherer:finish(Gatherer) + % end) end, Queues), - empty = gatherer:out(Gatherer), - ok = gatherer:stop(Gatherer), + % empty = gatherer:out(Gatherer), + % ok = gatherer:stop(Gatherer), delete_old_store(OldStore), @@ -2748,7 +2758,6 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> [QueueName#resource.name, QueueName#resource.virtual_host]), OldStoreClient = get_global_store_client(OldStore), NewStoreClient = get_per_vhost_store_client(Queue, NewStoreSup), - rabbit_queue_index:move_to_per_vhost_stores(QueueName), %% WARNING: During scan_queue_segments queue index state is being recovered %% and terminated. This can cause side effects! rabbit_queue_index:scan_queue_segments( From 944e0f476b2055e9a74be31aa7d193dd6569863a Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 24 Nov 2016 15:49:52 +0000 Subject: [PATCH 30/47] Fix vhost dir location in test --- test/per_vhost_msg_store_SUITE.erl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/per_vhost_msg_store_SUITE.erl b/test/per_vhost_msg_store_SUITE.erl index 2a13440564d0..4d88c84b7ee5 100644 --- a/test/per_vhost_msg_store_SUITE.erl +++ b/test/per_vhost_msg_store_SUITE.erl @@ -206,9 +206,8 @@ get_global_folder_size(Config) -> folder_size(BaseDir). vhost_dir(Vhost, Config) -> - BaseDir = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_mnesia, dir, []), - VhostDir = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_vhost, dir, [Vhost]), - filename:join(BaseDir, VhostDir). + rabbit_ct_broker_helpers:rpc(Config, 0, + rabbit_vhost, msg_store_dir_path, [Vhost]). delete_vhost_data(Vhost, Config) -> Dir = vhost_dir(Vhost, Config), From 03bdc78d54c0bf1c1c9b8bf7fa544ef511d08a17 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Mon, 28 Nov 2016 15:56:10 +0000 Subject: [PATCH 31/47] Keep backup between mnesia upgrade and message store upgrade --- src/rabbit_upgrade.erl | 35 ++++++++++++++++++++++++----------- src/rabbit_version.erl | 27 ++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index 8b7d11e4faf4..1ee742340412 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -108,6 +108,7 @@ ensure_backup_taken() -> take_backup() -> BackupDir = backup_dir(), + info("upgrades: Backing up mnesia dir to ~p~n", [BackupDir]), case rabbit_mnesia:copy_db(BackupDir) of ok -> info("upgrades: Mnesia dir backed up to ~p~n", [BackupDir]); @@ -127,7 +128,9 @@ remove_backup() -> maybe_upgrade_mnesia() -> AllNodes = rabbit_mnesia:cluster_nodes(all), ok = rabbit_mnesia_rename:maybe_finish(AllNodes), - case rabbit_version:upgrades_required(mnesia) of + %% Mnesia upgrade is the first upgrade scope, + %% so we should create a backup here if there are any upgrades + case rabbit_version:all_upgrades_required([mnesia, local, message_store]) of {error, starting_from_scratch} -> ok; {error, version_not_available} -> @@ -143,10 +146,15 @@ maybe_upgrade_mnesia() -> ok; {ok, Upgrades} -> ensure_backup_taken(), - ok = case upgrade_mode(AllNodes) of - primary -> primary_upgrade(Upgrades, AllNodes); - secondary -> secondary_upgrade(AllNodes) - end + run_mnesia_upgrades(proplists:get_value(mnesia, Upgrades, []), + AllNodes) + end. + +run_mnesia_upgrades([], _) -> ok; +run_mnesia_upgrades(Upgrades, AllNodes) -> + case upgrade_mode(AllNodes) of + primary -> primary_upgrade(Upgrades, AllNodes); + secondary -> secondary_upgrade(AllNodes) end. upgrade_mode(AllNodes) -> @@ -244,24 +252,29 @@ maybe_upgrade_local() -> {ok, []} -> ensure_backup_removed(), ok; {ok, Upgrades} -> mnesia:stop(), - ensure_backup_taken(), ok = apply_upgrades(local, Upgrades, fun () -> ok end), - ensure_backup_removed(), ok end. %% ------------------------------------------------------------------- maybe_migrate_queues_to_per_vhost_storage() -> - case rabbit_version:upgrades_required(queues) of + Result = case rabbit_version:upgrades_required(message_store) of {error, version_not_available} -> version_not_available; {error, starting_from_scratch} -> starting_from_scratch; {error, _} = Err -> throw(Err); {ok, []} -> ok; - {ok, Upgrades} -> apply_upgrades(queues, Upgrades, - fun() -> ok end) - end. + {ok, Upgrades} -> apply_upgrades(message_store, + Upgrades, + fun() -> ok end), + ok + end, + %% Message store upgrades should be + %% the last group. + %% Backup can be deleted here. + ensure_backup_removed(), + Result. %% ------------------------------------------------------------------- diff --git a/src/rabbit_version.erl b/src/rabbit_version.erl index a27f0aca0052..4e2edd19ebab 100644 --- a/src/rabbit_version.erl +++ b/src/rabbit_version.erl @@ -18,7 +18,8 @@ -export([recorded/0, matches/2, desired/0, desired_for_scope/1, record_desired/0, record_desired_for_scope/1, - upgrades_required/1, check_version_consistency/3, + upgrades_required/1, all_upgrades_required/1, + check_version_consistency/3, check_version_consistency/4, check_otp_consistency/1, version_error/3]). @@ -117,6 +118,30 @@ upgrades_required(Scope) -> end, Scope) end. +all_upgrades_required(Scopes) -> + case recorded() of + {error, enoent} -> + case filelib:is_file(rabbit_guid:filename()) of + false -> {error, starting_from_scratch}; + true -> {error, version_not_available} + end; + {ok, _} -> + lists:foldl( + fun + (_, {error, Err}) -> {error, Err}; + (Scope, {ok, Acc}) -> + case upgrades_required(Scope) of + %% Lift errors from any scope. + {error, Err} -> {error, Err}; + %% Filter non-upgradable scopes + {ok, []} -> {ok, Acc}; + {ok, Upgrades} -> {ok, [{Scope, Upgrades} | Acc]} + end + end, + {ok, []}, + Scopes) + end. + %% ------------------------------------------------------------------- with_upgrade_graph(Fun, Scope) -> From 55451523b84168ceeb36398f7a8e6734b35e969a Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Mon, 28 Nov 2016 16:27:45 +0000 Subject: [PATCH 32/47] Migrate queues in batches of 100 in parallel. Write recovery terms for migrated queues. --- src/rabbit_queue_index.erl | 6 ++- src/rabbit_variable_queue.erl | 84 ++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 07ae20f19e47..4d2e865d3b23 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -26,7 +26,7 @@ -export([scan_queue_segments/3]). %% Migrates from global to per-vhost message stores --export([move_to_per_vhost_stores/1]). +-export([move_to_per_vhost_stores/1, update_recovery_term/2]). -define(CLEAN_FILENAME, "clean.dot"). @@ -1419,3 +1419,7 @@ move_to_per_vhost_stores(#resource{} = QueueName) -> [QueueName]) end, ok. + +update_recovery_term(#resource{} = QueueName, Term) -> + Key = queue_name_to_dir_name(QueueName), + rabbit_recovery_terms:store(Key, Term). \ No newline at end of file diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index c711760417e4..bafbc1d1a9b7 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -37,6 +37,8 @@ -export([stop_vhost_msg_store/1]). -include_lib("stdlib/include/qlc.hrl"). +-define(QUEUE_MIGRATION_BATCH_SIZE, 100). + %%---------------------------------------------------------------------------- %% Messages, and their position in the queue, can be in memory or on %% disk, or both. Persistent messages will have both message and @@ -351,7 +353,7 @@ %%---------------------------------------------------------------------------- -rabbit_upgrade({multiple_routing_keys, local, []}). --rabbit_upgrade({move_messages_to_vhost_store, queues, []}). +-rabbit_upgrade({move_messages_to_vhost_store, message_store, []}). -compile(export_all). @@ -2716,7 +2718,7 @@ move_messages_to_vhost_store() -> rabbit_log:info("Moving messages to per-vhost message store"), Queues = list_persistent_queues(), %% Move the queue index for each persistent queue to the new store - lists:map( + lists:foreach( fun(Queue) -> #amqqueue{name = QueueName} = Queue, rabbit_queue_index:move_to_per_vhost_stores(QueueName) @@ -2725,39 +2727,49 @@ move_messages_to_vhost_store() -> %% Legacy (global) msg_store may require recovery. %% This upgrade step should only be started %% if we are upgrading from a pre-3.7.0 version. - {RecoveryTerms, StartFunState} = start_recovery_terms(Queues), - OldStore = run_old_persistent_store(RecoveryTerms, StartFunState), + {QueuesWithTerms, RecoveryRefs, StartFunState} = start_recovery_terms(Queues), + + OldStore = run_old_persistent_store(RecoveryRefs, StartFunState), %% New store should not be recovered. NewStoreSup = start_new_store_sup(), + in_batches(?QUEUE_MIGRATION_BATCH_SIZE, + {rabbit_variable_queue, migrate_queue, [OldStore, NewStoreSup]}, + QueuesWithTerms), - % {ok, Gatherer} = gatherer:start_link(), - lists:map( - fun(Queue) -> - migrate_queue(Queue, OldStore, NewStoreSup), - #amqqueue{name = QueueName} = Queue, - rabbit_log:info("Queue migration finished ~p", [QueueName]) - % ok = gatherer:fork(Gatherer), - % ok = worker_pool:submit_async( - % fun () -> - % migrate_queue(Queue, OldStore, NewStoreSup), - % gatherer:finish(Gatherer) - % end) - end, - Queues), - % empty = gatherer:out(Gatherer), - % ok = gatherer:stop(Gatherer), - + rabbit_log:info("Message store migration finished"), delete_old_store(OldStore), ok = rabbit_queue_index:stop(), - ok = rabbit_sup:stop_child(NewStoreSup). + ok = rabbit_sup:stop_child(NewStoreSup), + ok. -migrate_queue(Queue, OldStore, NewStoreSup) -> - #amqqueue{name = QueueName} = Queue, +in_batches(Size, MFA, List) -> + in_batches(Size, 1, MFA, List). + +in_batches(_, _, _, []) -> ok; +in_batches(Size, BatchNum, MFA, List) -> + {Batch, Tail} = case Size > length(List) of + true -> {List, []}; + false -> lists:split(Size, List) + end, + rabbit_log:info("Migrating batch ~p of ~p queues ~n", [BatchNum, Size]), + {M, F, A} = MFA, + Keys = [ rpc:async_call(node(), M, F, [El | A]) || El <- Batch ], + lists:foreach(fun(Key) -> + case rpc:yield(Key) of + {badrpc, Err} -> throw(Err); + _ -> ok + end + end, + Keys), + rabbit_log:info("Batch ~p of ~p queues migrated ~n", [BatchNum, Size]), + in_batches(Size, BatchNum + 1, MFA, Tail). + +migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> rabbit_log:info("Migrating messages in queue ~s in vhost ~s to per-vhost message store~n", [QueueName#resource.name, QueueName#resource.virtual_host]), OldStoreClient = get_global_store_client(OldStore), - NewStoreClient = get_per_vhost_store_client(Queue, NewStoreSup), + NewStoreClient = get_per_vhost_store_client(QueueName, NewStoreSup), %% WARNING: During scan_queue_segments queue index state is being recovered %% and terminated. This can cause side effects! rabbit_queue_index:scan_queue_segments( @@ -2771,23 +2783,25 @@ migrate_queue(Queue, OldStore, NewStoreSup) -> OldC end, OldStoreClient, - QueueName). + QueueName), + rabbit_msg_store:client_terminate(OldStoreClient), + rabbit_msg_store:client_terminate(NewStoreClient), + NewClientRef = rabbit_msg_store:client_ref(NewStoreClient), + NewRecoveryTerm = lists:keyreplace(persistent_ref, 1, RecoveryTerm, + {persistent_ref, NewClientRef}), + rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm), + rabbit_log:info("Queue migration finished ~p", [QueueName]), + {QueueName, NewClientRef}. migrate_message(MsgId, OldC, NewC) -> case rabbit_msg_store:read(MsgId, OldC) of {{ok, Msg}, OldC1} -> - case rabbit_msg_store:contains(MsgId, NewC) of - false -> ok = rabbit_msg_store:write(MsgId, Msg, NewC); - true -> ok - end, - % TODO: maybe remove in batches? - ok = rabbit_msg_store:remove([MsgId], OldC1), + ok = rabbit_msg_store:write(MsgId, Msg, NewC), OldC1; _ -> OldC end. -get_per_vhost_store_client(#amqqueue{name = #resource{virtual_host = VHost}}, - NewStoreSup) -> +get_per_vhost_store_client(#resource{virtual_host = VHost}, NewStoreSup) -> rabbit_msg_store_vhost_sup:client_init(NewStoreSup, rabbit_guid:gen(), fun(_,_) -> ok end, @@ -2820,7 +2834,7 @@ start_recovery_terms(Queues) -> Ref = proplists:get_value(persistent_ref, Terms), Ref =/= undefined end], - {Refs, StartFunState}. + {lists:zip(QueueNames, AllTerms), Refs, StartFunState}. run_old_persistent_store(Refs, StartFunState) -> OldStoreName = ?PERSISTENT_MSG_STORE, From cfc9a88d90dc24911a8ea7cafc1b4726d621ea77 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 30 Nov 2016 12:17:17 +0000 Subject: [PATCH 33/47] Write per queue upgrade log to a separate file --- scripts/rabbitmq-env | 9 ++++++--- scripts/rabbitmq-server | 3 +++ src/rabbit_lager.erl | 20 ++++++++++++++------ src/rabbit_log.erl | 1 + src/rabbit_variable_queue.erl | 26 +++++++++++++++++++------- 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/scripts/rabbitmq-env b/scripts/rabbitmq-env index d975f274b297..f1624eddf99d 100755 --- a/scripts/rabbitmq-env +++ b/scripts/rabbitmq-env @@ -236,9 +236,11 @@ rmq_normalize_path_var RABBITMQ_PLUGINS_DIR [ "x" = "x$RABBITMQ_LOGS" ] && RABBITMQ_LOGS=${LOGS} [ "x" != "x$RABBITMQ_LOGS" ] && export RABBITMQ_LOGS_source=environment [ "x" = "x$RABBITMQ_LOGS" ] && RABBITMQ_LOGS="${RABBITMQ_LOG_BASE}/${RABBITMQ_NODENAME}.log" +[ "x" = "x$RABBITMQ_UPGRADE_LOG" ] && RABBITMQ_UPGRADE_LOG="${RABBITMQ_LOG_BASE}/${RABBITMQ_NODENAME}_upgrade.log" -rmq_normalize_path_var \ - RABBITMQ_LOGS +rmq_normalize_path_var RABBITMQ_LOGS + +rmq_normalize_path_var RABBITMQ_UPGRADE_LOG [ "x" = "x$RABBITMQ_CTL_ERL_ARGS" ] && RABBITMQ_CTL_ERL_ARGS=${CTL_ERL_ARGS} @@ -254,7 +256,8 @@ rmq_check_if_shared_with_mnesia \ RABBITMQ_PLUGINS_EXPAND_DIR \ RABBITMQ_ENABLED_PLUGINS_FILE \ RABBITMQ_PLUGINS_DIR \ - RABBITMQ_LOGS + RABBITMQ_LOGS \ + RABBITMQ_UPGRADE_LOG ##--- End of overridden variables diff --git a/scripts/rabbitmq-server b/scripts/rabbitmq-server index 48365252e546..b85b4348c175 100755 --- a/scripts/rabbitmq-server +++ b/scripts/rabbitmq-server @@ -156,9 +156,11 @@ RABBITMQ_LISTEN_ARG= if [ "$RABBITMQ_LOGS" = '-' ]; then SASL_ERROR_LOGGER=tty RABBIT_LAGER_HANDLER=tty + RABBITMQ_LAGER_HANDLER_UPGRADE=tty else SASL_ERROR_LOGGER=false RABBIT_LAGER_HANDLER='"'${RABBITMQ_LOGS}'"' + RABBITMQ_LAGER_HANDLER_UPGRADE='"'${RABBITMQ_UPGRADE_LOG}'"' fi # we need to turn off path expansion because some of the vars, notably @@ -206,6 +208,7 @@ start_rabbitmq_server() { -sasl sasl_error_logger "$SASL_ERROR_LOGGER" \ -rabbit lager_log_root "\"$RABBITMQ_LOG_BASE\"" \ -rabbit lager_handler "$RABBIT_LAGER_HANDLER" \ + -rabbit lager_handler_upgrade "$RABBITMQ_LAGER_HANDLER_UPGRADE" \ -rabbit enabled_plugins_file "\"$RABBITMQ_ENABLED_PLUGINS_FILE\"" \ -rabbit plugins_dir "\"$RABBITMQ_PLUGINS_DIR\"" \ -rabbit plugins_expand_dir "\"$RABBITMQ_PLUGINS_EXPAND_DIR\"" \ diff --git a/src/rabbit_lager.erl b/src/rabbit_lager.erl index 8beee1084633..db918d69a999 100644 --- a/src/rabbit_lager.erl +++ b/src/rabbit_lager.erl @@ -210,7 +210,7 @@ configure_lager() -> %% messages to the default sink. To know the list of expected extra %% sinks, we look at the 'lager_extra_sinks' compilation option. Sinks0 = application:get_env(lager, extra_sinks, []), - Sinks1 = configure_extra_sinks(Sinks0, + Sinks1 = configure_extra_sinks(Sinks0, [error_logger | list_expected_sinks()]), %% TODO Waiting for basho/lager#303 %% Sinks2 = lists:keystore(error_logger_lager_event, 1, Sinks1, @@ -231,11 +231,7 @@ configure_lager() -> configure_extra_sinks(Sinks, [SinkName | Rest]) -> Sink0 = proplists:get_value(SinkName, Sinks, []), Sink1 = case proplists:is_defined(handlers, Sink0) of - false -> lists:keystore(handlers, 1, Sink0, - {handlers, - [{lager_forwarder_backend, - lager_util:make_internal_sink_name(lager) - }]}); + false -> default_sink_config(SinkName, Sink0); true -> Sink0 end, Sinks1 = lists:keystore(SinkName, 1, Sinks, {SinkName, Sink1}), @@ -243,6 +239,18 @@ configure_extra_sinks(Sinks, [SinkName | Rest]) -> configure_extra_sinks(Sinks, []) -> Sinks. +default_sink_config(rabbit_log_upgrade_lager_event, Sink) -> + io:format("I AM UPGRADE SINK"), + Handlers = lager_handlers(application:get_env(rabbit, + lager_handler_upgrade, + tty)), + lists:keystore(handlers, 1, Sink, {handlers, Handlers}); +default_sink_config(_, Sink) -> + lists:keystore(handlers, 1, Sink, + {handlers, + [{lager_forwarder_backend, + lager_util:make_internal_sink_name(lager)}]}). + list_expected_sinks() -> case application:get_env(rabbit, lager_extra_sinks) of {ok, List} -> diff --git a/src/rabbit_log.erl b/src/rabbit_log.erl index f60cf6c0c2ff..bb5ae14c3eb6 100644 --- a/src/rabbit_log.erl +++ b/src/rabbit_log.erl @@ -79,6 +79,7 @@ make_internal_sink_name(rabbit_log_channel) -> rabbit_log_channel_lager_event; make_internal_sink_name(rabbit_log_mirroring) -> rabbit_log_mirroring_lager_event; make_internal_sink_name(rabbit_log_queue) -> rabbit_log_queue_lager_event; make_internal_sink_name(rabbit_log_federation) -> rabbit_log_federation_lager_event; +make_internal_sink_name(rabbit_log_upgrade) -> rabbit_log_upgrade_lager_event; make_internal_sink_name(Category) -> lager_util:make_internal_sink_name(Category). diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index bafbc1d1a9b7..ca8c92b80e26 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2715,7 +2715,7 @@ transform_store(Store, TransformFun) -> rabbit_msg_store:transform_dir(rabbit_mnesia:dir(), Store, TransformFun). move_messages_to_vhost_store() -> - rabbit_log:info("Moving messages to per-vhost message store"), + log_upgrade("Moving messages to per-vhost message store"), Queues = list_persistent_queues(), %% Move the queue index for each persistent queue to the new store lists:foreach( @@ -2736,7 +2736,7 @@ move_messages_to_vhost_store() -> {rabbit_variable_queue, migrate_queue, [OldStore, NewStoreSup]}, QueuesWithTerms), - rabbit_log:info("Message store migration finished"), + log_upgrade("Message store migration finished"), delete_old_store(OldStore), ok = rabbit_queue_index:stop(), @@ -2752,7 +2752,7 @@ in_batches(Size, BatchNum, MFA, List) -> true -> {List, []}; false -> lists:split(Size, List) end, - rabbit_log:info("Migrating batch ~p of ~p queues ~n", [BatchNum, Size]), + log_upgrade("Migrating batch ~p of ~p queues ~n", [BatchNum, Size]), {M, F, A} = MFA, Keys = [ rpc:async_call(node(), M, F, [El | A]) || El <- Batch ], lists:foreach(fun(Key) -> @@ -2762,12 +2762,13 @@ in_batches(Size, BatchNum, MFA, List) -> end end, Keys), - rabbit_log:info("Batch ~p of ~p queues migrated ~n", [BatchNum, Size]), + log_upgrade("Batch ~p of ~p queues migrated ~n", [BatchNum, Size]), in_batches(Size, BatchNum + 1, MFA, Tail). migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> - rabbit_log:info("Migrating messages in queue ~s in vhost ~s to per-vhost message store~n", - [QueueName#resource.name, QueueName#resource.virtual_host]), + log_upgrade_verbose( + "Migrating messages in queue ~s in vhost ~s to per-vhost message store~n", + [QueueName#resource.name, QueueName#resource.virtual_host]), OldStoreClient = get_global_store_client(OldStore), NewStoreClient = get_per_vhost_store_client(QueueName, NewStoreSup), %% WARNING: During scan_queue_segments queue index state is being recovered @@ -2790,7 +2791,7 @@ migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> NewRecoveryTerm = lists:keyreplace(persistent_ref, 1, RecoveryTerm, {persistent_ref, NewClientRef}), rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm), - rabbit_log:info("Queue migration finished ~p", [QueueName]), + log_upgrade_verbose("Queue migration finished ~p", [QueueName]), {QueueName, NewClientRef}. migrate_message(MsgId, OldC, NewC) -> @@ -2859,3 +2860,14 @@ delete_old_store(OldStore) -> rabbit_file:recursive_delete( [filename:join([rabbit_mnesia:dir(), ?TRANSIENT_MSG_STORE])]). +log_upgrade(Msg) -> + log_upgrade(Msg, []). + +log_upgrade(Msg, Args) -> + rabbit_log:info("message_store upgrades: " ++ Msg, Args). + +log_upgrade_verbose(Msg) -> + log_upgrade_verbose(Msg, []). + +log_upgrade_verbose(Msg, Args) -> + rabbit_log_upgrade:info(Msg, Args). \ No newline at end of file From f43990d05f8c629e41e9f748d7e8f6963039905f Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 30 Nov 2016 12:23:16 +0000 Subject: [PATCH 34/47] Make queue migration batch size configurable --- src/rabbit_variable_queue.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index ca8c92b80e26..9dcb2eef825a 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2732,7 +2732,9 @@ move_messages_to_vhost_store() -> OldStore = run_old_persistent_store(RecoveryRefs, StartFunState), %% New store should not be recovered. NewStoreSup = start_new_store_sup(), - in_batches(?QUEUE_MIGRATION_BATCH_SIZE, + MigrationBatchSize = application:get_env(rabbit, queue_migration_batch_size, + ?QUEUE_MIGRATION_BATCH_SIZE), + in_batches(MigrationBatchSize, {rabbit_variable_queue, migrate_queue, [OldStore, NewStoreSup]}, QueuesWithTerms), From 806cd800b055a660988c5b1da6ae388bb72fa3a7 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Fri, 16 Dec 2016 18:09:04 +0000 Subject: [PATCH 35/47] Replace dying_client_index and dying_clients set with a map Required to decrease ETS tables usage per vhost --- src/rabbit_lager.erl | 1 - src/rabbit_msg_store.erl | 39 +++++++++++++++------------------------ 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/rabbit_lager.erl b/src/rabbit_lager.erl index db918d69a999..c1ed6130886d 100644 --- a/src/rabbit_lager.erl +++ b/src/rabbit_lager.erl @@ -240,7 +240,6 @@ configure_extra_sinks(Sinks, []) -> Sinks. default_sink_config(rabbit_log_upgrade_lager_event, Sink) -> - io:format("I AM UPGRADE SINK"), Handlers = lager_handlers(application:get_env(rabbit, lager_handler_upgrade, tty)), diff --git a/src/rabbit_msg_store.erl b/src/rabbit_msg_store.erl index 81364c68bc9d..2da802e72242 100644 --- a/src/rabbit_msg_store.erl +++ b/src/rabbit_msg_store.erl @@ -91,8 +91,6 @@ flying_ets, %% set of dying clients dying_clients, - %% index of file positions for client death messages - dying_client_index, %% map of references of all registered clients %% to callbacks clients, @@ -704,11 +702,9 @@ client_update_flying(Diff, MsgId, #client_msstate { flying_ets = FlyingEts, end. clear_client(CRef, State = #msstate { cref_to_msg_ids = CTM, - dying_clients = DyingClients, - dying_client_index = DyingIndex }) -> - ets:delete(DyingIndex, CRef), + dying_clients = DyingClients }) -> State #msstate { cref_to_msg_ids = dict:erase(CRef, CTM), - dying_clients = sets:del_element(CRef, DyingClients) }. + dying_clients = maps:remove(CRef, DyingClients) }. %%---------------------------------------------------------------------------- @@ -760,8 +756,6 @@ init([Name, BaseDir, ClientRefs, StartupFunState]) -> [ordered_set, public]), CurFileCacheEts = ets:new(rabbit_msg_store_cur_file, [set, public]), FlyingEts = ets:new(rabbit_msg_store_flying, [set, public]), - DyingIndex = ets:new(rabbit_msg_store_dying_client_index, - [set, public, {keypos, #dying_client.client_ref}]), {ok, FileSizeLimit} = application:get_env(rabbit, msg_store_file_size_limit), @@ -792,8 +786,7 @@ init([Name, BaseDir, ClientRefs, StartupFunState]) -> file_summary_ets = FileSummaryEts, cur_file_cache_ets = CurFileCacheEts, flying_ets = FlyingEts, - dying_clients = sets:new(), - dying_client_index = DyingIndex, + dying_clients = #{}, clients = Clients, successfully_recovered = CleanShutdown, file_size_limit = FileSizeLimit, @@ -871,14 +864,14 @@ handle_call({contains, MsgId}, From, State) -> handle_cast({client_dying, CRef}, State = #msstate { dying_clients = DyingClients, - dying_client_index = DyingIndex, current_file_handle = CurHdl, current_file = CurFile }) -> - DyingClients1 = sets:add_element(CRef, DyingClients), {ok, CurOffset} = file_handle_cache:current_virtual_offset(CurHdl), - true = ets:insert_new(DyingIndex, #dying_client{client_ref = CRef, - file = CurFile, - offset = CurOffset}), + DyingClients1 = maps:put(CRef, + #dying_client{client_ref = CRef, + file = CurFile, + offset = CurOffset}, + DyingClients), noreply(State #msstate { dying_clients = DyingClients1 }); handle_cast({client_delete, CRef}, @@ -1375,17 +1368,15 @@ blind_confirm(CRef, MsgIds, ActionTaken, State) -> %% msg and thus should be ignored. Note that this (correctly) returns %% false when testing to remove the death msg itself. should_mask_action(CRef, MsgId, - State = #msstate { dying_clients = DyingClients, - dying_client_index = DyingIndex }) -> - case {sets:is_element(CRef, DyingClients), index_lookup(MsgId, State)} of - {false, Location} -> + State = #msstate{dying_clients = DyingClients}) -> + case {maps:find(CRef, DyingClients), index_lookup(MsgId, State)} of + {error, Location} -> {false, Location}; - {true, not_found} -> + {{ok, _}, not_found} -> {true, not_found}; - {true, #msg_location { file = File, offset = Offset, - ref_count = RefCount } = Location} -> - [#dying_client { file = DeathFile, offset = DeathOffset }] = - ets:lookup(DyingIndex, CRef), + {{ok, Client}, #msg_location { file = File, offset = Offset, + ref_count = RefCount } = Location} -> + #dying_client{file = DeathFile, offset = DeathOffset} = Client, {case {{DeathFile, DeathOffset} < {File, Offset}, RefCount} of {true, _} -> true; {false, 0} -> false_if_increment; From 3d568701e733dfb07d9a61390c192364ad4ce68a Mon Sep 17 00:00:00 2001 From: Gabriele Santomaggio Date: Tue, 20 Dec 2016 17:08:14 +0100 Subject: [PATCH 36/47] fix typo --- src/rabbit_queue_index.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_queue_index.erl b/src/rabbit_queue_index.erl index 4d2e865d3b23..1748d775269c 100644 --- a/src/rabbit_queue_index.erl +++ b/src/rabbit_queue_index.erl @@ -1415,7 +1415,7 @@ move_to_per_vhost_stores(#resource{} = QueueName) -> ok = rabbit_file:ensure_dir(NewQueueDir), ok = rabbit_file:rename(OldQueueDir, NewQueueDir); false -> - rabbit_log:info("Queue index directoy not found for queue ~p~n", + rabbit_log:info("Queue index directory not found for queue ~p~n", [QueueName]) end, ok. From 308a2b5f5603b20906de2bec0d7d80ff99073e86 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Tue, 20 Dec 2016 18:38:40 +0000 Subject: [PATCH 37/47] Group recovery client refs by vhost --- src/rabbit_msg_store_vhost_sup.erl | 16 ++++++----- src/rabbit_variable_queue.erl | 27 +++++++++++++------ test/channel_operation_timeout_test_queue.erl | 27 +++++++++++++------ 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 65bf2c74e81a..3aef5e74284f 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -8,28 +8,32 @@ %% Internal -export([start_store_for_vhost/4]). -start_link(Name, ClientRefs, StartupFunState) -> +start_link(Name, VhostsClientRefs, StartupFunState) -> supervisor2:start_link({local, Name}, ?MODULE, - [Name, ClientRefs, StartupFunState]). + [Name, VhostsClientRefs, StartupFunState]). -init([Name, ClientRefs, StartupFunState]) -> +init([Name, VhostsClientRefs, StartupFunState]) -> ets:new(Name, [named_table, public]), {ok, {{simple_one_for_one, 1, 1}, [{rabbit_msg_store_vhost, {rabbit_msg_store_vhost_sup, start_store_for_vhost, - [Name, ClientRefs, StartupFunState]}, + [Name, VhostsClientRefs, StartupFunState]}, transient, infinity, supervisor, [rabbit_msg_store]}]}}. add_vhost(Name, VHost) -> supervisor2:start_child(Name, [VHost]). -start_store_for_vhost(Name, ClientRefs, StartupFunState, VHost) -> +start_store_for_vhost(Name, VhostsClientRefs, StartupFunState, VHost) -> case vhost_store_pid(Name, VHost) of no_pid -> VHostDir = rabbit_vhost:msg_store_dir_path(VHost), ok = rabbit_file:ensure_dir(VHostDir), rabbit_log:info("Making sure message store directory '~s' for vhost '~s' exists~n", [VHostDir, VHost]), - case rabbit_msg_store:start_link(Name, VHostDir, ClientRefs, StartupFunState) of + VhostRefs = case maps:find(VHost, VhostsClientRefs) of + {ok, Refs} -> Refs; + error -> [] + end, + case rabbit_msg_store:start_link(Name, VHostDir, VhostRefs, StartupFunState) of {ok, Pid} -> ets:insert(Name, {VHost, Pid}), {ok, Pid}; diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 9dcb2eef825a..43e0d30a4ea0 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -465,14 +465,25 @@ start(DurableQueues) -> {AllTerms, StartFunState} = rabbit_queue_index:start(DurableQueues), - start_msg_store( - [Ref || Terms <- AllTerms, - Terms /= non_clean_shutdown, - begin - Ref = proplists:get_value(persistent_ref, Terms), - Ref =/= undefined - end], - StartFunState), + %% Group recovery terms by vhost. + {[], VhostRefs} = lists:foldl( + fun + %% We need to skip a queue name + (non_clean_shutdown, {[_|QNames], VhostRefs}) -> + {QNames, VhostRefs}; + (Terms, {[QueueName | QNames], VhostRefs}) -> + case proplists:get_value(persistent_ref, Terms) of + undefined -> {QNames, VhostRefs}; + Ref -> + #resource{virtual_host = VHost} = QueueName, + Refs = case maps:find(VHost, VhostRefs) of + {ok, Val} -> Val; + error -> [] + end, + {QNames, maps:put(VHost, [Ref|Refs], VhostRefs)} + end + end), + start_msg_store(VhostRefs, StartFunState), {ok, AllTerms}. stop() -> diff --git a/test/channel_operation_timeout_test_queue.erl b/test/channel_operation_timeout_test_queue.erl index 5e256b138162..5a41eb86bfae 100644 --- a/test/channel_operation_timeout_test_queue.erl +++ b/test/channel_operation_timeout_test_queue.erl @@ -215,14 +215,25 @@ start(DurableQueues) -> {AllTerms, StartFunState} = rabbit_queue_index:start(DurableQueues), - start_msg_store( - [Ref || Terms <- AllTerms, - Terms /= non_clean_shutdown, - begin - Ref = proplists:get_value(persistent_ref, Terms), - Ref =/= undefined - end], - StartFunState), + %% Group recovery terms by vhost. + {[], VhostRefs} = lists:foldl( + fun + %% We need to skip a queue name + (non_clean_shutdown, {[_|QNames], VhostRefs}) -> + {QNames, VhostRefs}; + (Terms, {[QueueName | QNames], VhostRefs}) -> + case proplists:get_value(persistent_ref, Terms) of + undefined -> {QNames, VhostRefs}; + Ref -> + #resource{virtual_host = VHost} = QueueName, + Refs = case maps:find(VHost, VhostRefs) of + {ok, Val} -> Val; + error -> [] + end, + {QNames, maps:put(VHost, [Ref|Refs], VhostRefs)} + end + end), + start_msg_store(VhostRefs, StartFunState), {ok, AllTerms}. stop() -> From 1ec2ac8b0ffb712f6b1e14ac0366a6dd869dc1d3 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Tue, 20 Dec 2016 18:45:30 +0000 Subject: [PATCH 38/47] Pass queues and recovery terms to fold --- src/rabbit_variable_queue.erl | 4 +++- test/channel_operation_timeout_test_queue.erl | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 43e0d30a4ea0..c0a2adbc672b 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -482,7 +482,9 @@ start(DurableQueues) -> end, {QNames, maps:put(VHost, [Ref|Refs], VhostRefs)} end - end), + end, + {DurableQueues, #{}}, + AllTerms), start_msg_store(VhostRefs, StartFunState), {ok, AllTerms}. diff --git a/test/channel_operation_timeout_test_queue.erl b/test/channel_operation_timeout_test_queue.erl index 5a41eb86bfae..124fda47b17b 100644 --- a/test/channel_operation_timeout_test_queue.erl +++ b/test/channel_operation_timeout_test_queue.erl @@ -232,7 +232,9 @@ start(DurableQueues) -> end, {QNames, maps:put(VHost, [Ref|Refs], VhostRefs)} end - end), + end, + {DurableQueues, #{}}, + AllTerms), start_msg_store(VhostRefs, StartFunState), {ok, AllTerms}. From eafad5a84eb94bf7cb001ee932540f9de7d6a897 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Tue, 20 Dec 2016 18:54:33 +0000 Subject: [PATCH 39/47] Refs can be undefined --- src/rabbit_msg_store_vhost_sup.erl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index 3aef5e74284f..d5ed915da4ed 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -29,10 +29,7 @@ start_store_for_vhost(Name, VhostsClientRefs, StartupFunState, VHost) -> VHostDir = rabbit_vhost:msg_store_dir_path(VHost), ok = rabbit_file:ensure_dir(VHostDir), rabbit_log:info("Making sure message store directory '~s' for vhost '~s' exists~n", [VHostDir, VHost]), - VhostRefs = case maps:find(VHost, VhostsClientRefs) of - {ok, Refs} -> Refs; - error -> [] - end, + VhostRefs = refs_for_vhost(VHost, VhostsClientRefs), case rabbit_msg_store:start_link(Name, VHostDir, VhostRefs, StartupFunState) of {ok, Pid} -> ets:insert(Name, {VHost, Pid}), @@ -43,6 +40,14 @@ start_store_for_vhost(Name, VhostsClientRefs, StartupFunState, VHost) -> {error, {already_started, Pid}} end. +refs_for_vhost(_, undefined) -> undefined; +refs_for_vhost(VHost, Refs) -> + case maps:find(VHost, Refs) of + {ok, Val} -> Val; + error -> [] + end. + + delete_vhost(Name, VHost) -> case vhost_store_pid(Name, VHost) of no_pid -> ok; From 569d114f5c85d9f329bdc10cba1fdb41f97e42d3 Mon Sep 17 00:00:00 2001 From: Gabriele Santomaggio Date: Wed, 21 Dec 2016 11:26:23 +0100 Subject: [PATCH 40/47] change error function with rabbit_log:error --- src/rabbit_upgrade.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabbit_upgrade.erl b/src/rabbit_upgrade.erl index 1ee742340412..95af84de39e6 100644 --- a/src/rabbit_upgrade.erl +++ b/src/rabbit_upgrade.erl @@ -99,7 +99,7 @@ ensure_backup_taken() -> _ -> ok end; true -> - error("Found lock file at ~s. + rabbit_log:error("Found lock file at ~s. Either previous upgrade is in progress or has failed. Database backup path: ~s", [lock_filename(), backup_dir()]), From e5557ad53c2d01c5534ad7626f2b447ad1c0aa9b Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 21 Dec 2016 17:54:04 +0000 Subject: [PATCH 41/47] Fix msg_store test --- test/unit_inbroker_SUITE.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit_inbroker_SUITE.erl b/test/unit_inbroker_SUITE.erl index aee815aee4b1..8e05faa4c3b4 100644 --- a/test/unit_inbroker_SUITE.erl +++ b/test/unit_inbroker_SUITE.erl @@ -347,7 +347,7 @@ msg_store1(_Config) -> %% stop and restart, preserving every other msg in 2nd half ok = rabbit_variable_queue:stop_msg_store(), ok = rabbit_variable_queue:start_msg_store( - [], {fun ([]) -> finished; + #{}, {fun ([]) -> finished; ([MsgId|MsgIdsTail]) when length(MsgIdsTail) rem 2 == 0 -> {MsgId, 1, MsgIdsTail}; From 1403596717839c48f95903dc37058c2146878f9f Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Wed, 21 Dec 2016 17:54:54 +0000 Subject: [PATCH 42/47] Force recover client references to be maps. Start empty message store for each vhost on migration. Empty message stores should be started to avoid rebuilding indexes on actual startup. --- src/rabbit_msg_store_vhost_sup.erl | 3 ++- src/rabbit_variable_queue.erl | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/rabbit_msg_store_vhost_sup.erl b/src/rabbit_msg_store_vhost_sup.erl index d5ed915da4ed..0209e88cf7cc 100644 --- a/src/rabbit_msg_store_vhost_sup.erl +++ b/src/rabbit_msg_store_vhost_sup.erl @@ -8,7 +8,8 @@ %% Internal -export([start_store_for_vhost/4]). -start_link(Name, VhostsClientRefs, StartupFunState) -> +start_link(Name, VhostsClientRefs, StartupFunState) when is_map(VhostsClientRefs); + VhostsClientRefs == undefined -> supervisor2:start_link({local, Name}, ?MODULE, [Name, VhostsClientRefs, StartupFunState]). diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index c0a2adbc672b..111f263130a9 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -492,7 +492,7 @@ stop() -> ok = stop_msg_store(), ok = rabbit_queue_index:stop(). -start_msg_store(Refs, StartFunState) -> +start_msg_store(Refs, StartFunState) when is_map(Refs); Refs == undefined -> ok = rabbit_sup:start_child(?TRANSIENT_MSG_STORE_SUP, rabbit_msg_store_vhost_sup, [?TRANSIENT_MSG_STORE_SUP, undefined, {fun (ok) -> finished end, ok}]), @@ -2745,6 +2745,11 @@ move_messages_to_vhost_store() -> OldStore = run_old_persistent_store(RecoveryRefs, StartFunState), %% New store should not be recovered. NewStoreSup = start_new_store_sup(), + Vhosts = rabbit_vhost:list(), + lists:foreach(fun(VHost) -> + rabbit_msg_store_vhost_sup:add_vhost(NewStoreSup, VHost) + end, + Vhosts), MigrationBatchSize = application:get_env(rabbit, queue_migration_batch_size, ?QUEUE_MIGRATION_BATCH_SIZE), in_batches(MigrationBatchSize, From 5282534220029eaaf138542eb15b16fa96afde82 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 22 Dec 2016 11:55:33 +0000 Subject: [PATCH 43/47] Recover vhosts message stores in parallel after a crash --- src/rabbit_variable_queue.erl | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 111f263130a9..367cbb464e9b 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -30,6 +30,9 @@ -export([start/1, stop/0]). +%% exported for parallel map +-export([add_vhost_msg_store/1]). + %% exported for testing only -export([start_msg_store/2, stop_msg_store/0, init/6]). @@ -38,6 +41,7 @@ -include_lib("stdlib/include/qlc.hrl"). -define(QUEUE_MIGRATION_BATCH_SIZE, 100). +-define(MSG_STORE_RESTORE_BATCH_SIZE, 100). %%---------------------------------------------------------------------------- %% Messages, and their position in the queue, can be in memory or on @@ -500,14 +504,17 @@ start_msg_store(Refs, StartFunState) when is_map(Refs); Refs == undefined -> [?PERSISTENT_MSG_STORE_SUP, Refs, StartFunState]), %% Start message store for all known vhosts VHosts = rabbit_vhost:list(), - lists:foreach( - fun(VHost) -> - rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE_SUP, VHost), - rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE_SUP, VHost) - end, - VHosts), + RestoreBatchSize = application:get_env(rabbit, msg_store_restore_batch_size, + ?MSG_STORE_RESTORE_BATCH_SIZE), + in_batches(RestoreBatchSize, + {rabbit_variable_queue, add_vhost_msg_store, []}, + VHosts), ok. +add_vhost_msg_store(VHost) -> + rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE_SUP, VHost), + rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE_SUP, VHost). + stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE_SUP), ok = rabbit_sup:stop_child(?TRANSIENT_MSG_STORE_SUP). From 79685d044aee1ec7c557660fbefbcede8d789338 Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Thu, 22 Dec 2016 12:03:52 +0000 Subject: [PATCH 44/47] Recover queues after non-clean shutdown --- src/rabbit_variable_queue.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 367cbb464e9b..dda7605969b7 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2815,9 +2815,13 @@ migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> rabbit_msg_store:client_terminate(OldStoreClient), rabbit_msg_store:client_terminate(NewStoreClient), NewClientRef = rabbit_msg_store:client_ref(NewStoreClient), - NewRecoveryTerm = lists:keyreplace(persistent_ref, 1, RecoveryTerm, - {persistent_ref, NewClientRef}), - rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm), + case RecoveryTerm of + non_clean_shutdown -> ok; + Term when is_list(Term) -> + NewRecoveryTerm = lists:keyreplace(persistent_ref, 1, RecoveryTerm, + {persistent_ref, NewClientRef}), + rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm) + end, log_upgrade_verbose("Queue migration finished ~p", [QueueName]), {QueueName, NewClientRef}. From 83d32221804d75ce0d6957a0886ae7a451e2a08b Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Fri, 23 Dec 2016 10:25:23 +0000 Subject: [PATCH 45/47] Fix logs. Expect multiple logs during startup --- src/rabbit_variable_queue.erl | 26 ++++++++++++++++---------- test/unit_inbroker_SUITE.erl | 6 +++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index dda7605969b7..3295da6b8dc5 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -508,12 +508,16 @@ start_msg_store(Refs, StartFunState) when is_map(Refs); Refs == undefined -> ?MSG_STORE_RESTORE_BATCH_SIZE), in_batches(RestoreBatchSize, {rabbit_variable_queue, add_vhost_msg_store, []}, - VHosts), + VHosts, + "Recovering batch ~p of ~p vhosts ~n", + "Batch ~p of ~p vhsots recovered ~n"), ok. add_vhost_msg_store(VHost) -> + rabbit_log:info("Starting message store vor vhost ~p~n", [VHost]), rabbit_msg_store_vhost_sup:add_vhost(?TRANSIENT_MSG_STORE_SUP, VHost), - rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE_SUP, VHost). + rabbit_msg_store_vhost_sup:add_vhost(?PERSISTENT_MSG_STORE_SUP, VHost), + rabbit_log:info("Message store is started vor vhost ~p~n", [VHost]). stop_msg_store() -> ok = rabbit_sup:stop_child(?PERSISTENT_MSG_STORE_SUP), @@ -2761,7 +2765,9 @@ move_messages_to_vhost_store() -> ?QUEUE_MIGRATION_BATCH_SIZE), in_batches(MigrationBatchSize, {rabbit_variable_queue, migrate_queue, [OldStore, NewStoreSup]}, - QueuesWithTerms), + QueuesWithTerms, + "Migrating batch ~p of ~p queues ~n", + "Batch ~p of ~p queues migrated ~n"), log_upgrade("Message store migration finished"), delete_old_store(OldStore), @@ -2770,16 +2776,16 @@ move_messages_to_vhost_store() -> ok = rabbit_sup:stop_child(NewStoreSup), ok. -in_batches(Size, MFA, List) -> - in_batches(Size, 1, MFA, List). +in_batches(Size, MFA, List, MessageStart, MessageEnd) -> + in_batches(Size, 1, MFA, List, MessageStart, MessageEnd). -in_batches(_, _, _, []) -> ok; -in_batches(Size, BatchNum, MFA, List) -> +in_batches(_, _, _, [], _, _) -> ok; +in_batches(Size, BatchNum, MFA, List, MessageStart, MessageEnd) -> {Batch, Tail} = case Size > length(List) of true -> {List, []}; false -> lists:split(Size, List) end, - log_upgrade("Migrating batch ~p of ~p queues ~n", [BatchNum, Size]), + log_upgrade(MessageStart, [BatchNum, Size]), {M, F, A} = MFA, Keys = [ rpc:async_call(node(), M, F, [El | A]) || El <- Batch ], lists:foreach(fun(Key) -> @@ -2789,8 +2795,8 @@ in_batches(Size, BatchNum, MFA, List) -> end end, Keys), - log_upgrade("Batch ~p of ~p queues migrated ~n", [BatchNum, Size]), - in_batches(Size, BatchNum + 1, MFA, Tail). + log_upgrade(MessageEnd, [BatchNum, Size]), + in_batches(Size, BatchNum + 1, MFA, Tail, MessageStart, MessageEnd). migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> log_upgrade_verbose( diff --git a/test/unit_inbroker_SUITE.erl b/test/unit_inbroker_SUITE.erl index 8e05faa4c3b4..98fb6ac3c215 100644 --- a/test/unit_inbroker_SUITE.erl +++ b/test/unit_inbroker_SUITE.erl @@ -1842,7 +1842,7 @@ log_management(Config) -> ?MODULE, log_management1, [Config]). log_management1(_Config) -> - [LogFile] = rabbit:log_locations(), + [LogFile|_] = rabbit:log_locations(), Suffix = ".0", ok = test_logs_working([LogFile]), @@ -1917,7 +1917,7 @@ log_management_during_startup(Config) -> ?MODULE, log_management_during_startup1, [Config]). log_management_during_startup1(_Config) -> - [LogFile] = rabbit:log_locations(), + [LogFile|_] = rabbit:log_locations(), Suffix = ".0", %% start application with simple tty logging @@ -2002,7 +2002,7 @@ externally_rotated_logs_are_automatically_reopened(Config) -> ?MODULE, externally_rotated_logs_are_automatically_reopened1, [Config]). externally_rotated_logs_are_automatically_reopened1(_Config) -> - [LogFile] = rabbit:log_locations(), + [LogFile|_] = rabbit:log_locations(), %% Make sure log file is opened ok = test_logs_working([LogFile]), From 12901fbbc5656d2140738c98c72272d7b8383c9b Mon Sep 17 00:00:00 2001 From: Daniil Fedotov Date: Fri, 23 Dec 2016 17:30:13 +0000 Subject: [PATCH 46/47] Revert to syncronous vhost recovery since there are concurency limitations --- src/rabbit_variable_queue.erl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 3295da6b8dc5..23ce3c6ef641 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -41,7 +41,6 @@ -include_lib("stdlib/include/qlc.hrl"). -define(QUEUE_MIGRATION_BATCH_SIZE, 100). --define(MSG_STORE_RESTORE_BATCH_SIZE, 100). %%---------------------------------------------------------------------------- %% Messages, and their position in the queue, can be in memory or on @@ -504,13 +503,10 @@ start_msg_store(Refs, StartFunState) when is_map(Refs); Refs == undefined -> [?PERSISTENT_MSG_STORE_SUP, Refs, StartFunState]), %% Start message store for all known vhosts VHosts = rabbit_vhost:list(), - RestoreBatchSize = application:get_env(rabbit, msg_store_restore_batch_size, - ?MSG_STORE_RESTORE_BATCH_SIZE), - in_batches(RestoreBatchSize, - {rabbit_variable_queue, add_vhost_msg_store, []}, - VHosts, - "Recovering batch ~p of ~p vhosts ~n", - "Batch ~p of ~p vhsots recovered ~n"), + lists:foreach(fun(VHost) -> + add_vhost_msg_store(VHost) + end, + VHosts), ok. add_vhost_msg_store(VHost) -> From 1d4e939b84b56dd793bd265bec074a392cdf194b Mon Sep 17 00:00:00 2001 From: Michael Klishin Date: Sat, 24 Dec 2016 01:05:37 +0300 Subject: [PATCH 47/47] Don't log #resource records --- src/rabbit_variable_queue.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rabbit_variable_queue.erl b/src/rabbit_variable_queue.erl index 23ce3c6ef641..9dbd4fdbf209 100644 --- a/src/rabbit_variable_queue.erl +++ b/src/rabbit_variable_queue.erl @@ -2794,17 +2794,17 @@ in_batches(Size, BatchNum, MFA, List, MessageStart, MessageEnd) -> log_upgrade(MessageEnd, [BatchNum, Size]), in_batches(Size, BatchNum + 1, MFA, Tail, MessageStart, MessageEnd). -migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> +migrate_queue({QueueName = #resource{virtual_host = VHost, name = Name}, RecoveryTerm}, OldStore, NewStoreSup) -> log_upgrade_verbose( "Migrating messages in queue ~s in vhost ~s to per-vhost message store~n", - [QueueName#resource.name, QueueName#resource.virtual_host]), + [Name, VHost]), OldStoreClient = get_global_store_client(OldStore), NewStoreClient = get_per_vhost_store_client(QueueName, NewStoreSup), %% WARNING: During scan_queue_segments queue index state is being recovered %% and terminated. This can cause side effects! rabbit_queue_index:scan_queue_segments( - %% We migrate only persistent messages, which is stored in msg_store - %% and is not acked yet + %% We migrate only persistent messages which are found in message store + %% and are not acked yet fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, OldC) when is_binary(MsgId) -> migrate_message(MsgId, OldC, NewStoreClient); @@ -2824,7 +2824,7 @@ migrate_queue({QueueName, RecoveryTerm}, OldStore, NewStoreSup) -> {persistent_ref, NewClientRef}), rabbit_queue_index:update_recovery_term(QueueName, NewRecoveryTerm) end, - log_upgrade_verbose("Queue migration finished ~p", [QueueName]), + log_upgrade_verbose("Finished migrating queue ~s in vhost ~s", [Name, VHost]), {QueueName, NewClientRef}. migrate_message(MsgId, OldC, NewC) -> @@ -2903,4 +2903,4 @@ log_upgrade_verbose(Msg) -> log_upgrade_verbose(Msg, []). log_upgrade_verbose(Msg, Args) -> - rabbit_log_upgrade:info(Msg, Args). \ No newline at end of file + rabbit_log_upgrade:info(Msg, Args).