Skip to content

Commit

Permalink
HTTP API: add two new health checks, references #13153
Browse files Browse the repository at this point in the history
(cherry picked from commit cb81bb9)
  • Loading branch information
michaelklishin authored and mergify[bot] committed Jan 29, 2025
1 parent 8645d17 commit 95fcf32
Show file tree
Hide file tree
Showing 13 changed files with 161 additions and 30 deletions.
2 changes: 2 additions & 0 deletions deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ dispatcher() ->
%% modern generation of fine-grained health checks
{"/health/checks/alarms", rabbit_mgmt_wm_health_check_alarms, []},
{"/health/checks/local-alarms", rabbit_mgmt_wm_health_check_local_alarms, []},
{"/health/checks/metadata-store/initialized", rabbit_mgmt_wm_health_check_metadata_store_initialized, []},
{"/health/checks/metadata-store/initialized/with-data", rabbit_mgmt_wm_health_check_metadata_store_initialized_with_data, []},
{"/health/checks/certificate-expiration/:within/:unit", rabbit_mgmt_wm_health_check_certificate_expiration, []},
{"/health/checks/port-listener/:port", rabbit_mgmt_wm_health_check_port_listener, []},
{"/health/checks/protocol-listener/:protocol", rabbit_mgmt_wm_health_check_protocol_listener, []},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ to_json(ReqData, Context) ->
rabbit_mgmt_util:vhost(ReqData),
ReqData,
Context,
fun(_Ch) -> rabbit_mgmt_util:reply([{status, ok}], ReqData, Context) end
fun(_Ch) -> rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context) end
).

is_authorized(ReqData, Context) ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ to_json(ReqData, Context) ->
end,
case rabbit_alarm:get_alarms(Timeout) of
[] ->
rabbit_mgmt_util:reply([{status, ok}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
Xs when length(Xs) > 0 ->
Msg = "There are alarms in effect in the cluster",
failure(Msg, Xs, ReqData, Context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,20 @@ to_json(ReqData, Context) ->
end, [], Local),
case ExpiringListeners of
[] ->
rabbit_mgmt_util:reply([{status, ok}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
_ ->
Msg = <<"Certificates expiring">>,
failure(Msg, ExpiringListeners, ReqData, Context)
end
end.

failure(Message, Listeners, ReqData, Context) ->
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply([{status, failed},
{reason, Message},
{expired, Listeners}],
ReqData, Context),
Body = #{
status => failed,
reason => Message,
expired => Listeners
},
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body, ReqData, Context),
{stop, cowboy_req:reply(503, #{}, Response, ReqData1), Context1}.

is_authorized(ReqData, Context) ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ to_json(ReqData, Context) ->
end,
case rabbit_alarm:get_local_alarms(Timeout) of
[] ->
rabbit_mgmt_util:reply([{status, ok}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
Xs when length(Xs) > 0 ->
Msg = "There are alarms in effect on the node",
failure(Msg, Xs, ReqData, Context)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
%% This Source Code Form is subject to the terms of the Mozilla Public
%% License, v. 2.0. If a copy of the MPL was not distributed with this
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
%%
%% Copyright (c) 2007-2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
%%

%% An HTTP API counterpart of 'rabbitmq-dignoastics check_local_alarms'
-module(rabbit_mgmt_wm_health_check_metadata_store_initialized).

-export([init/2, to_json/2, content_types_provided/2, is_authorized/2]).
-export([resource_exists/2]).
-export([variances/2]).

-include("rabbit_mgmt.hrl").
-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl").

%%--------------------------------------------------------------------

init(Req, _State) ->
{cowboy_rest, rabbit_mgmt_headers:set_common_permission_headers(Req, ?MODULE), #context{}}.

variances(Req, Context) ->
{[<<"accept-encoding">>, <<"origin">>], Req, Context}.

content_types_provided(ReqData, Context) ->
{rabbit_mgmt_util:responder_map(to_json), ReqData, Context}.

resource_exists(ReqData, Context) ->
{true, ReqData, Context}.

to_json(ReqData, Context) ->
Result = rabbit_db:is_init_finished(),
case Result of
true ->
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
false ->
Msg = "Metadata store has not yet reported as initialized",
failure(Msg, ReqData, Context)
end.

failure(Message, ReqData, Context) ->
Body = #{
status => failed,
reason => rabbit_data_coercion:to_binary(Message)
},
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body, ReqData, Context),
{stop, cowboy_req:reply(?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1), Context1}.

is_authorized(ReqData, Context) ->
rabbit_mgmt_util:is_authorized(ReqData, Context).
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
%% This Source Code Form is subject to the terms of the Mozilla Public
%% License, v. 2.0. If a copy of the MPL was not distributed with this
%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
%%
%% Copyright (c) 2007-2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
%%

%% An HTTP API counterpart of 'rabbitmq-dignoastics check_local_alarms'
-module(rabbit_mgmt_wm_health_check_metadata_store_initialized_with_data).

-export([init/2, to_json/2, content_types_provided/2, is_authorized/2]).
-export([resource_exists/2]).
-export([variances/2]).

-include("rabbit_mgmt.hrl").
-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl").

%%--------------------------------------------------------------------

init(Req, _State) ->
{cowboy_rest, rabbit_mgmt_headers:set_common_permission_headers(Req, ?MODULE), #context{}}.

variances(Req, Context) ->
{[<<"accept-encoding">>, <<"origin">>], Req, Context}.

content_types_provided(ReqData, Context) ->
{rabbit_mgmt_util:responder_map(to_json), ReqData, Context}.

resource_exists(ReqData, Context) ->
{true, ReqData, Context}.

to_json(ReqData, Context) ->
InitializedMetadataStore = rabbit_db:is_init_finished(),
%% We cannot know how many entities are supposed to be in the data store,
%% so let's verify that there's at least some data.
%%
%% Clusters without users or their permissions do exist (e.g. OAuth 2 or LDAP are used exclusively)
%% but clusters without any virtual hosts do not.
{ok, N} = rabbit_db_vhost:count_all(),
HasAVirtualHost = N > 0,
Result = InitializedMetadataStore andalso HasAVirtualHost,
case Result of
true ->
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
false ->
Msg = "Metadata store has not yet been initialized: it reports to have no virtual hosts",
failure(Msg, ReqData, Context)
end.

failure(Message, ReqData, Context) ->
Body = #{
status => failed,
reason => rabbit_data_coercion:to_binary(Message)
},
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body, ReqData, Context),
{stop, cowboy_req:reply(?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1), Context1}.

is_authorized(ReqData, Context) ->
rabbit_mgmt_util:is_authorized(ReqData, Context).
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,23 @@ resource_exists(ReqData, Context) ->
to_json(ReqData, Context) ->
case rabbit_nodes:is_single_node_cluster() of
true ->
rabbit_mgmt_util:reply([{status, ok},
{reason, <<"single node cluster">>}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok,
reason => <<"single node cluster">>}, ReqData, Context);
false ->
case rabbit_upgrade_preparation:list_with_minimum_quorum_for_cli() of
[] ->
rabbit_mgmt_util:reply([{status, ok}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
Qs when length(Qs) > 0 ->
Msg = <<"There are quorum queues that would lose their quorum if the target node is shut down">>,
failure(Msg, Qs, ReqData, Context)
end
end.

failure(Message, Qs, ReqData, Context) ->
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply([{status, failed},
{reason, Message},
{queues, Qs}],
ReqData, Context),
Body = #{status => failed,
reason => Message,
queues => Qs},
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body, ReqData, Context),
{stop, cowboy_req:reply(503, #{}, Response, ReqData1), Context1}.

is_authorized(ReqData, Context) ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,21 @@ to_json(ReqData, Context) ->
Msg = <<"No active listener">>,
failure(Msg, Port, [P || #listener{port = P} <- Local], ReqData, Context);
_ ->
rabbit_mgmt_util:reply([{status, ok},
{port, Port}], ReqData, Context)
Body = #{status => ok,
port => Port},
rabbit_mgmt_util:reply(Body, ReqData, Context)
end
catch
error:badarg ->
rabbit_mgmt_util:bad_request(<<"Invalid port">>, ReqData, Context)
end.

failure(Message, Missing, Ports, ReqData, Context) ->
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply([{status, failed},
{reason, Message},
{missing, Missing},
{ports, Ports}],
Body = #{status => failed,
reason => Message,
missing => Missing,
ports => Ports},
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body,
ReqData, Context),
{stop, cowboy_req:reply(503, #{}, Response, ReqData1), Context1}.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,19 @@ to_json(ReqData, Context) ->
Msg = <<"No active listener">>,
failure(Msg, Protocol, [P || #listener{protocol = P} <- Local], ReqData, Context);
_ ->
rabbit_mgmt_util:reply([{status, ok},
{protocol, list_to_binary(Protocol)}], ReqData, Context)
Body = #{status => ok,
protocol => list_to_binary(Protocol)},
rabbit_mgmt_util:reply(Body, ReqData, Context)
end.

failure(Message, Missing, Protocols, ReqData, Context) ->
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply([{status, failed},
{reason, Message},
{missing, list_to_binary(Missing)},
{protocols, Protocols}],
ReqData, Context),
Body = #{
status => failed,
reason => Message,
missing => list_to_binary(Missing),
protocols => Protocols
},
{Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body, ReqData, Context),
{stop, cowboy_req:reply(503, #{}, Response, ReqData1), Context1}.

is_authorized(ReqData, Context) ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ resource_exists(ReqData, Context) ->
to_json(ReqData, Context) ->
case rabbit_vhost_sup_sup:check() of
[] ->
rabbit_mgmt_util:reply([{status, ok}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
Vs when length(Vs) > 0 ->
Msg = <<"Some virtual hosts are down">>,
failure(Msg, Vs, ReqData, Context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ to_json(ReqData, Context) ->
end,
case rabbit_health_check:node(Node, Timeout) of
ok ->
rabbit_mgmt_util:reply([{status, ok}], ReqData, Context);
rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context);
{badrpc, timeout} ->
ErrMsg = rabbit_mgmt_format:print("node ~tp health check timed out", [Node]),
failure(ErrMsg, ReqData, Context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,16 @@ groups() ->
{single_node, [], [
alarms_test,
local_alarms_test,
metadata_store_initialized_test,
metadata_store_initialized_with_data_test,
is_quorum_critical_single_node_test]}
].

all_tests() -> [
health_checks_test,
virtual_hosts_test,
metadata_store_initialized_test,
metadata_store_initialized_with_data_test,
protocol_listener_test,
port_listener_test,
certificate_expiration_test
Expand Down Expand Up @@ -102,6 +106,14 @@ health_checks_test(Config) ->
http_get(Config, "/health/checks/node-is-quorum-critical", ?OK),
passed.

metadata_store_initialized_test(Config) ->
http_get(Config, "/health/checks/metadata-store/initialized", ?OK),
passed.

metadata_store_initialized_with_data_test(Config) ->
http_get(Config, "/health/checks/metadata-store/initialized/with-data", ?OK),
passed.

alarms_test(Config) ->
Server = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename),
rabbit_ct_broker_helpers:clear_all_alarms(Config, Server),
Expand Down

0 comments on commit 95fcf32

Please sign in to comment.