Skip to content
This repository has been archived by the owner on Nov 18, 2020. It is now read-only.

Commit

Permalink
Introduce rabbitmq-diagnostics check[_local]_alarms
Browse files Browse the repository at this point in the history
References #292.

(cherry picked from commit f247b4c)
  • Loading branch information
michaelklishin committed Jan 22, 2019
1 parent 8f06d39 commit 3bd0820
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 36 deletions.
14 changes: 14 additions & 0 deletions lib/rabbitmq/cli/core/output.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ defmodule RabbitMQ.CLI.Core.Output do
def format_output(:ok, _, _) do
:ok
end
# the command intends to produce no output
def format_output({:ok, nil}, _, _) do
:ok
end
def format_output({:ok, :check_passed}, _, _) do
:ok
end
def format_output({:ok, output}, formatter, options) do
{:ok, formatter.format_output(output, options)}
end
Expand All @@ -37,6 +44,13 @@ defmodule RabbitMQ.CLI.Core.Output do
printer.print_ok(printer_state)
:ok
end
# the command intends to produce no output
def print_output_0({:ok, nil}, printer, printer_state) do
:ok
end
def print_output_0({:ok, :check_passed}, printer, printer_state) do
:ok
end
def print_output_0({:ok, single_value}, printer, printer_state) do
printer.print_output(single_value, printer_state)
:ok
Expand Down
43 changes: 11 additions & 32 deletions lib/rabbitmq/cli/diagnostics/commands/alarms_command.ex
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,17 @@
## Copyright (c) 2007-2019 Pivotal Software, Inc. All rights reserved.

defmodule RabbitMQ.CLI.Diagnostics.Commands.AlarmsCommand do
@moduledoc """
Displays all alarms reported by the target node.
Returns a code of 0 unless there were connectivity and authentication
errors. This command is not meant to be used in health checks.
"""

alias RabbitMQ.CLI.Core.Helpers
import RabbitMQ.CLI.Diagnostics.Helpers, only: [alarm_lines: 2,
local_alarms: 2,
clusterwide_alarms: 2]

@behaviour RabbitMQ.CLI.CommandBehaviour

Expand Down Expand Up @@ -48,6 +58,7 @@ defmodule RabbitMQ.CLI.Diagnostics.Commands.AlarmsCommand do

def output([], %{node: node_name, formatter: "json"}) do
{:ok, %{"result" => "ok",
"node" => node_name,
"alarms" => []}}
end
def output([], %{node: node_name}) do
Expand Down Expand Up @@ -76,36 +87,4 @@ defmodule RabbitMQ.CLI.Diagnostics.Commands.AlarmsCommand do
end

def formatter(), do: RabbitMQ.CLI.Formatters.String


defp alarm_lines(alarms, node_name) do
Enum.reduce(alarms, [],
fn
(:file_descriptor_limit, acc) ->
["File descriptor limit alarm on node #{node_name}" | acc]
({{resource_limit, :memory, alarmed_node_name}, _}, acc) ->
["Memory alarm on node #{alarmed_node_name}" | acc]
({{resource_limit, :disk, alarmed_node_name}, _}, acc) ->
["Free disk space alarm on node #{alarmed_node_name}" | acc]
end) |> Enum.reverse
end

defp local_alarms(alarms, node_name) do
Enum.filter(alarms,
fn
# local by definition
(:file_descriptor_limit) ->
true
({{:resource_limit, _, a_node}, _}) ->
node_name == a_node
end)
end

defp clusterwide_alarms(alarms, node_name) do
alarms
|> Enum.reject(fn x -> x == :file_descriptor_limit end)
|> Enum.filter(fn ({{:resource_limit, _, a_node}, _}) ->
a_node != node_name
end)
end
end
94 changes: 94 additions & 0 deletions lib/rabbitmq/cli/diagnostics/commands/check_alarms_command.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
## The contents of this file are subject to the Mozilla Public License
## Version 1.1 (the "License"); you may not use this file except in
## compliance with the License. You may obtain a copy of the License
## at http://www.mozilla.org/MPL/
##
## Software distributed under the License is distributed on an "AS IS"
## basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
## the License for the specific language governing rights and
## limitations under the License.
##
## The Original Code is RabbitMQ.
##
## The Initial Developer of the Original Code is GoPivotal, Inc.
## Copyright (c) 2007-2019 Pivotal Software, Inc. All rights reserved.

defmodule RabbitMQ.CLI.Diagnostics.Commands.CheckAlarmsCommand do
@moduledoc """
Exits with a non-zero code if the target node reports any alarms,
local or clusterwide.
This command meant to be used in health checks.
"""

alias RabbitMQ.CLI.Core.Helpers
import RabbitMQ.CLI.Diagnostics.Helpers, only: [alarm_lines: 2,
local_alarms: 2,
clusterwide_alarms: 2]

@behaviour RabbitMQ.CLI.CommandBehaviour

def switches(), do: [timeout: :integer]
def aliases(), do: [t: :timeout]

def merge_defaults(args, opts), do: {args, opts}

def validate(args, _) when length(args) > 0 do
{:validation_failure, :too_many_args}
end
def validate(_, _), do: :ok
use RabbitMQ.CLI.Core.RequiresRabbitAppRunning


def run([], %{node: node_name, timeout: timeout}) do
# Example response when there are alarms:
#
# [
# file_descriptor_limit,
# {{resource_limit,disk,hare@warp10},[]},
# {{resource_limit,memory,hare@warp10},[]},
# {{resource_limit,disk,rabbit@warp10},[]},
# {{resource_limit,memory,rabbit@warp10},[]}
# ]
#
# The topmost file_descriptor_limit alarm is node-local.
:rabbit_misc.rpc_call(node_name, :rabbit_alarm, :get_alarms, [], timeout)
end


def output([], %{formatter: "json"}) do
{:ok, %{"result" => "ok"}}
end
def output([], %{silent: true}) do
{:ok, :check_passed}
end
def output([], %{node: node_name}) do
{:ok, "Node #{node_name} reported no alarms, local or clusterwide"}
end
def output(alarms, %{node: node_name, formatter: "json"}) do
local = local_alarms(alarms, node_name)
global = clusterwide_alarms(alarms, node_name)

{:ok, %{"result" => "ok",
"local" => alarm_lines(local, node_name),
"global" => alarm_lines(global, node_name),
"message" => "Node #{node_name} reported alarms"}}
end
def output(_alarms, %{silent: true}) do
{:error, :check_failed}
end
def output(alarms, %{node: node_name}) do
lines = alarm_lines(alarms, node_name)

{:error, Enum.join(lines, Helpers.line_separator())}
end
use RabbitMQ.CLI.DefaultOutput

def usage, do: "check_local_alarms"

def banner([], %{node: node_name}) do
"Asking node #{node_name} to report any local resource alarms ..."
end

def formatter(), do: RabbitMQ.CLI.Formatters.String
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
## The contents of this file are subject to the Mozilla Public License
## Version 1.1 (the "License"); you may not use this file except in
## compliance with the License. You may obtain a copy of the License
## at http://www.mozilla.org/MPL/
##
## Software distributed under the License is distributed on an "AS IS"
## basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
## the License for the specific language governing rights and
## limitations under the License.
##
## The Original Code is RabbitMQ.
##
## The Initial Developer of the Original Code is GoPivotal, Inc.
## Copyright (c) 2007-2019 Pivotal Software, Inc. All rights reserved.

defmodule RabbitMQ.CLI.Diagnostics.Commands.CheckLocalAlarmsCommand do
@moduledoc """
Exits with a non-zero code if the target node reports any local alarms.
This command meant to be used in health checks.
"""

alias RabbitMQ.CLI.Core.Helpers
import RabbitMQ.CLI.Diagnostics.Helpers, only: [alarm_lines: 2,
local_alarms: 2]

@behaviour RabbitMQ.CLI.CommandBehaviour

def switches(), do: [timeout: :integer]
def aliases(), do: [t: :timeout]

def merge_defaults(args, opts), do: {args, opts}

def validate(args, _) when length(args) > 0 do
{:validation_failure, :too_many_args}
end
def validate(_, _), do: :ok
use RabbitMQ.CLI.Core.RequiresRabbitAppRunning


def run([], %{node: node_name, timeout: timeout}) do
# Example response when there are alarms:
#
# [
# file_descriptor_limit,
# {{resource_limit,disk,hare@warp10},[]},
# {{resource_limit,memory,hare@warp10},[]},
# {{resource_limit,disk,rabbit@warp10},[]},
# {{resource_limit,memory,rabbit@warp10},[]}
# ]
#
# The topmost file_descriptor_limit alarm is node-local.
case :rabbit_misc.rpc_call(node_name, :rabbit_alarm, :get_alarms, [], timeout) do
[] -> []
xs when is_list(xs) -> local_alarms(xs, node_name)
other -> other
end
end


def output([], %{formatter: "json"}) do
{:ok, %{"result" => "ok"}}
end
def output([], %{silent: true}) do
{:ok, :check_passed}
end
def output([], %{node: node_name}) do
{:ok, "Node #{node_name} reported no alarms, local or clusterwide"}
end
def output(alarms, %{node: node_name, formatter: "json"}) do
local = local_alarms(alarms, node_name)

{:ok, %{"result" => "ok",
"local" => alarm_lines(local, node_name),
"message" => "Node #{node_name} reported alarms"}}
end
def output(_alarms, %{silent: true}) do
{:error, :check_failed}
end
def output(alarms, %{node: node_name}) do
lines = alarm_lines(alarms, node_name)

{:error, Enum.join(lines, Helpers.line_separator())}
end
use RabbitMQ.CLI.DefaultOutput

def usage, do: "check_local_alarms"

def banner([], %{node: node_name}) do
"Asking node #{node_name} to report any local resource alarms ..."
end

def formatter(), do: RabbitMQ.CLI.Formatters.String
end
36 changes: 36 additions & 0 deletions lib/rabbitmq/cli/diagnostics/diagnostics_helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,41 @@ defmodule RabbitMQ.CLI.Diagnostics.Helpers do
end
end

#
# Alarms
#

def alarm_lines(alarms, node_name) do
Enum.reduce(alarms, [],
fn
(:file_descriptor_limit, acc) ->
["File descriptor limit alarm on node #{node_name}" | acc]
({{:resource_limit, :memory, alarmed_node_name}, _}, acc) ->
["Memory alarm on node #{alarmed_node_name}" | acc]
({{:resource_limit, :disk, alarmed_node_name}, _}, acc) ->
["Free disk space alarm on node #{alarmed_node_name}" | acc]
end) |> Enum.reverse
end

def local_alarms(alarms, node_name) do
Enum.filter(alarms,
fn
# local by definition
(:file_descriptor_limit) ->
true
({{:resource_limit, _, a_node}, _}) ->
node_name == a_node
end)
end

def clusterwide_alarms(alarms, node_name) do
alarms
|> Enum.reject(fn x -> x == :file_descriptor_limit end)
|> Enum.filter(fn ({{:resource_limit, _, a_node}, _}) ->
a_node != node_name
end)
end

#
# Implementation
#
Expand All @@ -90,4 +125,5 @@ defmodule RabbitMQ.CLI.Diagnostics.Helpers do
defp protocol_label(:"http/web-stomp"), do: "STOMP over WebSockets"
defp protocol_label(:clustering), do: "inter-node and CLI tool communication"
defp protocol_label(other), do: to_string(other)

end
25 changes: 21 additions & 4 deletions lib/rabbitmqctl.ex
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,21 @@ defmodule RabbitMQCtl do
end
end

defp handle_shutdown({:error, exit_code, output}) do
output_device = case exit_code == ExitCodes.exit_ok do
defp output_device(exit_code) do
case exit_code == ExitCodes.exit_ok do
true -> :stdio;
false -> :stderr
end
end

defp handle_shutdown({:error, exit_code, nil}) do
exit_program(exit_code)
end
defp handle_shutdown({:error, exit_code, output}) do
device = output_device(exit_code)

for line <- List.flatten([output]) do
IO.puts(output_device, Helpers.string_or_inspect(line))
IO.puts(device, Helpers.string_or_inspect(line))
end
exit_program(exit_code)
end
Expand Down Expand Up @@ -352,6 +359,16 @@ defmodule RabbitMQCtl do
{:error, ExitCodes.exit_dataerr(),
"Could not update enabled plugins file at #{path}: the file does not exist (ENOENT)"}
end
# Special case health checks. This makes it easier to change
# output of all health checks at once.
defp format_error({:error, :check_failed}, _, _) do
{:error, ExitCodes.exit_unavailable(), nil}
end
defp format_error({:error, nil}, _, _) do
# the command intends to produce no output, e.g. a return code
# is sufficient
{:error, ExitCodes.exit_unavailable(), nil}
end
# Catch all
defp format_error({:error, err} = result, _, _) do
string_err = Helpers.string_or_inspect(err)
Expand Down Expand Up @@ -387,7 +404,7 @@ defmodule RabbitMQCtl do
## via distribution callback in the command as :cli, :none or {:custom, fun()}.
## :cli - default rabbitmqctl node name
## :none - do not start a distribution (e.g. offline command)
## {:fun, fun} - run a custom fuction to enable distribution.
## {:fun, fun} - run a custom function to enable distribution.
## custom mode is usefult for commands which should have specific node name.
## Runs code if distribution is successful, or not needed.
@spec maybe_with_distribution(module(), options(), (() -> command_result())) :: command_result()
Expand Down

0 comments on commit 3bd0820

Please sign in to comment.