Skip to content
This repository has been archived by the owner on Mar 27, 2024. It is now read-only.

Commit

Permalink
docker_engine: collect docker swarm metrics (#213)
Browse files Browse the repository at this point in the history
* add swarm manager metrics

* add swarm manager charts

* collect swarm manager metrics

* check fix

* update readme
  • Loading branch information
ilyam8 authored May 8, 2019
1 parent fdbdff9 commit cca0d2c
Show file tree
Hide file tree
Showing 7 changed files with 826 additions and 32 deletions.
36 changes: 36 additions & 0 deletions modules/docker_engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,42 @@ It produces the following charts:
4. **Health Checks** in events/s
* fails

<br>

If docker in [swarm mode](https://docs.docker.com/engine/swarm/) and the instance is swarm manager additional charts will be added:

1. **Swarm Manager Leader** in bool
* is_leader

2. **Swarm Manager Object Store** in count
* nodes
* services
* tasks
* networks
* secrets
* configs

3. **Swarm Manager Nodes Per State** in count
* ready
* down
* unknown
* disconnected

4. **Swarm Manager Tasks Per State** in count
* running
* failed
* ready
* rejected
* starting
* shutdown
* new
* orphaned
* preparing
* pending
* complete
* remove
* accepted
* assigned

### configuration

Expand Down
71 changes: 71 additions & 0 deletions modules/docker_engine/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ var charts = Charts{
Title: "Container Actions",
Units: "actions/s",
Fam: "containers",
Ctx: "docker_engine.engine_daemon_container_actions",
Type: module.Stacked,
Dims: Dims{
{ID: "container_actions_changes", Name: "changes", Algo: module.Incremental},
Expand All @@ -29,6 +30,7 @@ var charts = Charts{
Title: "Containers In Various States",
Units: "count",
Fam: "containers",
Ctx: "docker_engine.engine_daemon_container_states_containers",
Type: module.Stacked,
Dims: Dims{
{ID: "container_states_running", Name: "running"},
Expand All @@ -41,6 +43,7 @@ var charts = Charts{
Title: "Builder Builds Fails By Reason",
Units: "fails/s",
Fam: "builder",
Ctx: "docker_engine.builder_builds_failed_total",
Type: module.Stacked,
Dims: Dims{
{ID: "builder_fails_build_canceled", Name: "build_canceled", Algo: module.Incremental},
Expand All @@ -58,8 +61,76 @@ var charts = Charts{
Title: "Health Checks",
Units: "events/s",
Fam: "health checks",
Ctx: "docker_engine.engine_daemon_health_checks_failed_total",
Dims: Dims{
{ID: "health_checks_failed", Name: "fails", Algo: module.Incremental},
},
},
}

var swarmManagerCharts = Charts{
{
ID: "swarm_manager_leader",
Title: "Swarm Manager Leader",
Units: "bool",
Fam: "swarm",
Ctx: "docker_engine.swarm_manager_leader",
Dims: Dims{
{ID: "swarm_manager_leader", Name: "is_leader"},
},
},
{
ID: "swarm_manager_object_store",
Title: "Swarm Manager Object Store",
Units: "count",
Fam: "swarm",
Type: module.Stacked,
Ctx: "docker_engine.swarm_manager_object_store",
Dims: Dims{
{ID: "swarm_manager_nodes_total", Name: "nodes"},
{ID: "swarm_manager_services_total", Name: "services"},
{ID: "swarm_manager_tasks_total", Name: "tasks"},
{ID: "swarm_manager_networks_total", Name: "networks"},
{ID: "swarm_manager_secrets_total", Name: "secrets"},
{ID: "swarm_manager_configs_total", Name: "configs"},
},
},
{
ID: "swarm_manager_nodes_per_state",
Title: "Swarm Manager Nodes Per State",
Units: "count",
Fam: "swarm",
Ctx: "docker_engine.swarm_manager_nodes_per_state",
Type: module.Stacked,
Dims: Dims{
{ID: "swarm_manager_nodes_state_ready", Name: "ready"},
{ID: "swarm_manager_nodes_state_down", Name: "down"},
{ID: "swarm_manager_nodes_state_unknown", Name: "unknown"},
{ID: "swarm_manager_nodes_state_disconnected", Name: "disconnected"},
},
},
{
ID: "swarm_manager_tasks_per_state",
Title: "Swarm Manager Tasks Per State",
Units: "count",
Fam: "swarm",
Ctx: "docker_engine.swarm_manager_tasks_per_state",
Type: module.Stacked,
Dims: Dims{
{ID: "swarm_manager_tasks_state_running", Name: "running"},
{ID: "swarm_manager_tasks_state_failed", Name: "failed"},
{ID: "swarm_manager_tasks_state_ready", Name: "ready"},
{ID: "swarm_manager_tasks_state_rejected", Name: "rejected"},
{ID: "swarm_manager_tasks_state_starting", Name: "starting"},
{ID: "swarm_manager_tasks_state_shutdown", Name: "shutdown"},
{ID: "swarm_manager_tasks_state_new", Name: "new"},
{ID: "swarm_manager_tasks_state_orphaned", Name: "orphaned"},
{ID: "swarm_manager_tasks_state_preparing", Name: "preparing"},
{ID: "swarm_manager_tasks_state_pending", Name: "pending"},
{ID: "swarm_manager_tasks_state_complete", Name: "complete"},
{ID: "swarm_manager_tasks_state_remove", Name: "remove"},
{ID: "swarm_manager_tasks_state_accepted", Name: "accepted"},
{ID: "swarm_manager_tasks_state_assigned", Name: "assigned"},
},
},
}
158 changes: 132 additions & 26 deletions modules/docker_engine/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,178 @@ func (de *DockerEngine) collect() (map[string]int64, error) {

var mx metrics

collectHealthChecks(raw, &mx)
collectContainerActions(raw, &mx)
collectContainerStates(raw, &mx)
collectBuilderBuildsFails(raw, &mx)
collectHealthChecks(&mx, raw)
collectContainerActions(&mx, raw)
collectContainerStates(&mx, raw)
collectBuilderBuildsFails(&mx, raw)

if isSwarmManager(raw) {
de.isSwarmManager = true
mx.SwarmManager = &swarmManager{}
collectSwarmManager(&mx, raw)
}

return stm.ToMap(mx), nil

}

func collectHealthChecks(raw prometheus.Metrics, mx *metrics) {
m := raw.FindByName("engine_daemon_health_checks_failed_total")
mx.HealthChecks.Failed.Set(m.Max())
func collectHealthChecks(mx *metrics, raw prometheus.Metrics) {
mx.HealthChecks.Failed.Set(raw.FindByName("engine_daemon_health_checks_failed_total").Max())
}

func collectContainerActions(raw prometheus.Metrics, mx *metrics) {
func collectContainerActions(mx *metrics, raw prometheus.Metrics) {
for _, metric := range raw.FindByName("engine_daemon_container_actions_seconds_count") {
action := metric.Labels.Get("action")
if action == "" {
continue
}
v := metric.Value

switch action {
default:
case "changes":
mx.Container.Actions.Changes.Set(metric.Value)
mx.Container.Actions.Changes.Set(v)
case "commit":
mx.Container.Actions.Commit.Set(metric.Value)
mx.Container.Actions.Commit.Set(v)
case "create":
mx.Container.Actions.Create.Set(metric.Value)
mx.Container.Actions.Create.Set(v)
case "delete":
mx.Container.Actions.Delete.Set(metric.Value)
mx.Container.Actions.Delete.Set(v)
case "start":
mx.Container.Actions.Start.Set(metric.Value)
mx.Container.Actions.Start.Set(v)
}
}
}

func collectContainerStates(raw prometheus.Metrics, mx *metrics) {
func collectContainerStates(mx *metrics, raw prometheus.Metrics) {
for _, metric := range raw.FindByName("engine_daemon_container_states_containers") {
state := metric.Labels.Get("state")
if state == "" {
continue
}
v := metric.Value

switch state {
default:
case "paused":
mx.Container.States.Paused.Set(metric.Value)
mx.Container.States.Paused.Set(v)
case "running":
mx.Container.States.Running.Set(metric.Value)
mx.Container.States.Running.Set(v)
case "stopped":
mx.Container.States.Stopped.Set(metric.Value)
mx.Container.States.Stopped.Set(v)
}
}
}

func collectBuilderBuildsFails(raw prometheus.Metrics, ms *metrics) {
func collectBuilderBuildsFails(mx *metrics, raw prometheus.Metrics) {
for _, metric := range raw.FindByName("builder_builds_failed_total") {
reason := metric.Labels.Get("reason")
if reason == "" {
continue
}
v := metric.Value

switch reason {
default:
case "build_canceled":
ms.Builder.FailsByReason.BuildCanceled.Set(metric.Value)
mx.Builder.FailsByReason.BuildCanceled.Set(v)
case "build_target_not_reachable_error":
ms.Builder.FailsByReason.BuildTargetNotReachableError.Set(metric.Value)
mx.Builder.FailsByReason.BuildTargetNotReachableError.Set(v)
case "command_not_supported_error":
ms.Builder.FailsByReason.CommandNotSupportedError.Set(metric.Value)
mx.Builder.FailsByReason.CommandNotSupportedError.Set(v)
case "dockerfile_empty_error":
ms.Builder.FailsByReason.DockerfileEmptyError.Set(metric.Value)
mx.Builder.FailsByReason.DockerfileEmptyError.Set(v)
case "dockerfile_syntax_error":
ms.Builder.FailsByReason.DockerfileSyntaxError.Set(metric.Value)
mx.Builder.FailsByReason.DockerfileSyntaxError.Set(v)
case "error_processing_commands_error":
ms.Builder.FailsByReason.ErrorProcessingCommandsError.Set(metric.Value)
mx.Builder.FailsByReason.ErrorProcessingCommandsError.Set(v)
case "missing_onbuild_arguments_error":
ms.Builder.FailsByReason.MissingOnbuildArgumentsError.Set(metric.Value)
mx.Builder.FailsByReason.MissingOnbuildArgumentsError.Set(v)
case "unknown_instruction_error":
ms.Builder.FailsByReason.UnknownInstructionError.Set(metric.Value)
mx.Builder.FailsByReason.UnknownInstructionError.Set(v)
}
}
}

func isSwarmManager(raw prometheus.Metrics) bool {
return raw.FindByName("swarm_node_manager").Max() == 1
}

func collectSwarmManager(mx *metrics, raw prometheus.Metrics) {
v := raw.FindByName("swarm_manager_configs_total").Max()
mx.SwarmManager.Configs.Set(v)

v = raw.FindByName("swarm_manager_networks_total").Max()
mx.SwarmManager.Networks.Set(v)

v = raw.FindByName("swarm_manager_secrets_total").Max()
mx.SwarmManager.Secrets.Set(v)

v = raw.FindByName("swarm_manager_services_total").Max()
mx.SwarmManager.Services.Set(v)

v = raw.FindByName("swarm_manager_leader").Max()
mx.SwarmManager.IsLeader.Set(v)

for _, metric := range raw.FindByName("swarm_manager_nodes") {
state := metric.Labels.Get("state")
if state == "" {
continue
}
v := metric.Value

switch state {
default:
case "disconnected":
mx.SwarmManager.Nodes.PerState.Disconnected.Set(v)
case "down":
mx.SwarmManager.Nodes.PerState.Down.Set(v)
case "ready":
mx.SwarmManager.Nodes.PerState.Ready.Set(v)
case "unknown":
mx.SwarmManager.Nodes.PerState.Unknown.Set(v)
}
mx.SwarmManager.Nodes.Total.Add(v)
}

for _, metric := range raw.FindByName("swarm_manager_tasks_total") {
state := metric.Labels.Get("state")
if state == "" {
continue
}
v := metric.Value

switch state {
default:
case "accepted":
mx.SwarmManager.Tasks.PerState.Accepted.Set(v)
case "assigned":
mx.SwarmManager.Tasks.PerState.Assigned.Set(v)
case "complete":
mx.SwarmManager.Tasks.PerState.Complete.Set(v)
case "failed":
mx.SwarmManager.Tasks.PerState.Failed.Set(v)
case "new":
mx.SwarmManager.Tasks.PerState.New.Set(v)
case "orphaned":
mx.SwarmManager.Tasks.PerState.Orphaned.Set(v)
case "pending":
mx.SwarmManager.Tasks.PerState.Pending.Set(v)
case "preparing":
mx.SwarmManager.Tasks.PerState.Preparing.Set(v)
case "ready":
mx.SwarmManager.Tasks.PerState.Ready.Set(v)
case "rejected":
mx.SwarmManager.Tasks.PerState.Rejected.Set(v)
case "remove":
mx.SwarmManager.Tasks.PerState.Remove.Set(v)
case "running":
mx.SwarmManager.Tasks.PerState.Running.Set(v)
case "shutdown":
mx.SwarmManager.Tasks.PerState.Shutdown.Set(v)
case "starting":
mx.SwarmManager.Tasks.PerState.Starting.Set(v)
}
mx.SwarmManager.Tasks.Total.Add(v)
}
}
21 changes: 16 additions & 5 deletions modules/docker_engine/docker_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ type Config struct {
// DockerEngine DockerEngine module.
type DockerEngine struct {
module.Base
Config `yaml:",inline"`
prom prometheus.Prometheus
Config `yaml:",inline"`
prom prometheus.Prometheus
isSwarmManager bool
}

// Cleanup makes cleanup.
Expand Down Expand Up @@ -74,13 +75,23 @@ func (de *DockerEngine) Init() bool {
}

// Check makes check.
func (de DockerEngine) Check() bool {
func (de *DockerEngine) Check() bool {
return len(de.Collect()) > 0
}

// Charts creates Charts.
func (DockerEngine) Charts() *Charts {
return charts.Copy()
func (de DockerEngine) Charts() *Charts {
if !de.isSwarmManager {
return charts.Copy()
}

c := charts.Copy()
err := c.Add(*swarmManagerCharts.Copy()...)
if err != nil {
de.Error(err)
return nil
}
return c
}

// Collect collects metrics.
Expand Down
Loading

0 comments on commit cca0d2c

Please sign in to comment.