Skip to content
This repository has been archived by the owner on Mar 27, 2024. It is now read-only.

docker_engine: collect docker swarm metrics #213

Merged
merged 5 commits into from
May 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions modules/docker_engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,42 @@ It produces the following charts:
4. **Health Checks** in events/s
* fails

<br>

If docker in [swarm mode](https://docs.docker.com/engine/swarm/) and the instance is swarm manager additional charts will be added:

1. **Swarm Manager Leader** in bool
* is_leader

2. **Swarm Manager Object Store** in count
* nodes
* services
* tasks
* networks
* secrets
* configs

3. **Swarm Manager Nodes Per State** in count
* ready
* down
* unknown
* disconnected

4. **Swarm Manager Tasks Per State** in count
* running
* failed
* ready
* rejected
* starting
* shutdown
* new
* orphaned
* preparing
* pending
* complete
* remove
* accepted
* assigned

### configuration

Expand Down
71 changes: 71 additions & 0 deletions modules/docker_engine/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ var charts = Charts{
Title: "Container Actions",
Units: "actions/s",
Fam: "containers",
Ctx: "docker_engine.engine_daemon_container_actions",
Type: module.Stacked,
Dims: Dims{
{ID: "container_actions_changes", Name: "changes", Algo: module.Incremental},
Expand All @@ -29,6 +30,7 @@ var charts = Charts{
Title: "Containers In Various States",
Units: "count",
Fam: "containers",
Ctx: "docker_engine.engine_daemon_container_states_containers",
Type: module.Stacked,
Dims: Dims{
{ID: "container_states_running", Name: "running"},
Expand All @@ -41,6 +43,7 @@ var charts = Charts{
Title: "Builder Builds Fails By Reason",
Units: "fails/s",
Fam: "builder",
Ctx: "docker_engine.builder_builds_failed_total",
Type: module.Stacked,
Dims: Dims{
{ID: "builder_fails_build_canceled", Name: "build_canceled", Algo: module.Incremental},
Expand All @@ -58,8 +61,76 @@ var charts = Charts{
Title: "Health Checks",
Units: "events/s",
Fam: "health checks",
Ctx: "docker_engine.engine_daemon_health_checks_failed_total",
Dims: Dims{
{ID: "health_checks_failed", Name: "fails", Algo: module.Incremental},
},
},
}

var swarmManagerCharts = Charts{
{
ID: "swarm_manager_leader",
Title: "Swarm Manager Leader",
Units: "bool",
Fam: "swarm",
Ctx: "docker_engine.swarm_manager_leader",
Dims: Dims{
{ID: "swarm_manager_leader", Name: "is_leader"},
},
},
{
ID: "swarm_manager_object_store",
Title: "Swarm Manager Object Store",
Units: "count",
Fam: "swarm",
Type: module.Stacked,
Ctx: "docker_engine.swarm_manager_object_store",
Dims: Dims{
{ID: "swarm_manager_nodes_total", Name: "nodes"},
{ID: "swarm_manager_services_total", Name: "services"},
{ID: "swarm_manager_tasks_total", Name: "tasks"},
{ID: "swarm_manager_networks_total", Name: "networks"},
{ID: "swarm_manager_secrets_total", Name: "secrets"},
{ID: "swarm_manager_configs_total", Name: "configs"},
},
},
{
ID: "swarm_manager_nodes_per_state",
Title: "Swarm Manager Nodes Per State",
Units: "count",
Fam: "swarm",
Ctx: "docker_engine.swarm_manager_nodes_per_state",
Type: module.Stacked,
Dims: Dims{
{ID: "swarm_manager_nodes_state_ready", Name: "ready"},
{ID: "swarm_manager_nodes_state_down", Name: "down"},
{ID: "swarm_manager_nodes_state_unknown", Name: "unknown"},
{ID: "swarm_manager_nodes_state_disconnected", Name: "disconnected"},
},
},
{
ID: "swarm_manager_tasks_per_state",
Title: "Swarm Manager Tasks Per State",
Units: "count",
Fam: "swarm",
Ctx: "docker_engine.swarm_manager_tasks_per_state",
Type: module.Stacked,
Dims: Dims{
{ID: "swarm_manager_tasks_state_running", Name: "running"},
{ID: "swarm_manager_tasks_state_failed", Name: "failed"},
{ID: "swarm_manager_tasks_state_ready", Name: "ready"},
{ID: "swarm_manager_tasks_state_rejected", Name: "rejected"},
{ID: "swarm_manager_tasks_state_starting", Name: "starting"},
{ID: "swarm_manager_tasks_state_shutdown", Name: "shutdown"},
{ID: "swarm_manager_tasks_state_new", Name: "new"},
{ID: "swarm_manager_tasks_state_orphaned", Name: "orphaned"},
{ID: "swarm_manager_tasks_state_preparing", Name: "preparing"},
{ID: "swarm_manager_tasks_state_pending", Name: "pending"},
{ID: "swarm_manager_tasks_state_complete", Name: "complete"},
{ID: "swarm_manager_tasks_state_remove", Name: "remove"},
{ID: "swarm_manager_tasks_state_accepted", Name: "accepted"},
{ID: "swarm_manager_tasks_state_assigned", Name: "assigned"},
},
},
}
158 changes: 132 additions & 26 deletions modules/docker_engine/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,178 @@ func (de *DockerEngine) collect() (map[string]int64, error) {

var mx metrics

collectHealthChecks(raw, &mx)
collectContainerActions(raw, &mx)
collectContainerStates(raw, &mx)
collectBuilderBuildsFails(raw, &mx)
collectHealthChecks(&mx, raw)
collectContainerActions(&mx, raw)
collectContainerStates(&mx, raw)
collectBuilderBuildsFails(&mx, raw)

if isSwarmManager(raw) {
de.isSwarmManager = true
mx.SwarmManager = &swarmManager{}
collectSwarmManager(&mx, raw)
}

return stm.ToMap(mx), nil

}

func collectHealthChecks(raw prometheus.Metrics, mx *metrics) {
m := raw.FindByName("engine_daemon_health_checks_failed_total")
mx.HealthChecks.Failed.Set(m.Max())
func collectHealthChecks(mx *metrics, raw prometheus.Metrics) {
mx.HealthChecks.Failed.Set(raw.FindByName("engine_daemon_health_checks_failed_total").Max())
}

func collectContainerActions(raw prometheus.Metrics, mx *metrics) {
func collectContainerActions(mx *metrics, raw prometheus.Metrics) {
for _, metric := range raw.FindByName("engine_daemon_container_actions_seconds_count") {
action := metric.Labels.Get("action")
if action == "" {
continue
}
v := metric.Value

switch action {
default:
case "changes":
mx.Container.Actions.Changes.Set(metric.Value)
mx.Container.Actions.Changes.Set(v)
case "commit":
mx.Container.Actions.Commit.Set(metric.Value)
mx.Container.Actions.Commit.Set(v)
case "create":
mx.Container.Actions.Create.Set(metric.Value)
mx.Container.Actions.Create.Set(v)
case "delete":
mx.Container.Actions.Delete.Set(metric.Value)
mx.Container.Actions.Delete.Set(v)
case "start":
mx.Container.Actions.Start.Set(metric.Value)
mx.Container.Actions.Start.Set(v)
}
}
}

func collectContainerStates(raw prometheus.Metrics, mx *metrics) {
func collectContainerStates(mx *metrics, raw prometheus.Metrics) {
for _, metric := range raw.FindByName("engine_daemon_container_states_containers") {
state := metric.Labels.Get("state")
if state == "" {
continue
}
v := metric.Value

switch state {
default:
case "paused":
mx.Container.States.Paused.Set(metric.Value)
mx.Container.States.Paused.Set(v)
case "running":
mx.Container.States.Running.Set(metric.Value)
mx.Container.States.Running.Set(v)
case "stopped":
mx.Container.States.Stopped.Set(metric.Value)
mx.Container.States.Stopped.Set(v)
}
}
}

func collectBuilderBuildsFails(raw prometheus.Metrics, ms *metrics) {
func collectBuilderBuildsFails(mx *metrics, raw prometheus.Metrics) {
for _, metric := range raw.FindByName("builder_builds_failed_total") {
reason := metric.Labels.Get("reason")
if reason == "" {
continue
}
v := metric.Value

switch reason {
default:
case "build_canceled":
ms.Builder.FailsByReason.BuildCanceled.Set(metric.Value)
mx.Builder.FailsByReason.BuildCanceled.Set(v)
case "build_target_not_reachable_error":
ms.Builder.FailsByReason.BuildTargetNotReachableError.Set(metric.Value)
mx.Builder.FailsByReason.BuildTargetNotReachableError.Set(v)
case "command_not_supported_error":
ms.Builder.FailsByReason.CommandNotSupportedError.Set(metric.Value)
mx.Builder.FailsByReason.CommandNotSupportedError.Set(v)
case "dockerfile_empty_error":
ms.Builder.FailsByReason.DockerfileEmptyError.Set(metric.Value)
mx.Builder.FailsByReason.DockerfileEmptyError.Set(v)
case "dockerfile_syntax_error":
ms.Builder.FailsByReason.DockerfileSyntaxError.Set(metric.Value)
mx.Builder.FailsByReason.DockerfileSyntaxError.Set(v)
case "error_processing_commands_error":
ms.Builder.FailsByReason.ErrorProcessingCommandsError.Set(metric.Value)
mx.Builder.FailsByReason.ErrorProcessingCommandsError.Set(v)
case "missing_onbuild_arguments_error":
ms.Builder.FailsByReason.MissingOnbuildArgumentsError.Set(metric.Value)
mx.Builder.FailsByReason.MissingOnbuildArgumentsError.Set(v)
case "unknown_instruction_error":
ms.Builder.FailsByReason.UnknownInstructionError.Set(metric.Value)
mx.Builder.FailsByReason.UnknownInstructionError.Set(v)
}
}
}

func isSwarmManager(raw prometheus.Metrics) bool {
return raw.FindByName("swarm_node_manager").Max() == 1
}

func collectSwarmManager(mx *metrics, raw prometheus.Metrics) {
v := raw.FindByName("swarm_manager_configs_total").Max()
mx.SwarmManager.Configs.Set(v)

v = raw.FindByName("swarm_manager_networks_total").Max()
mx.SwarmManager.Networks.Set(v)

v = raw.FindByName("swarm_manager_secrets_total").Max()
mx.SwarmManager.Secrets.Set(v)

v = raw.FindByName("swarm_manager_services_total").Max()
mx.SwarmManager.Services.Set(v)

v = raw.FindByName("swarm_manager_leader").Max()
mx.SwarmManager.IsLeader.Set(v)

for _, metric := range raw.FindByName("swarm_manager_nodes") {
state := metric.Labels.Get("state")
if state == "" {
continue
}
v := metric.Value

switch state {
default:
case "disconnected":
mx.SwarmManager.Nodes.PerState.Disconnected.Set(v)
case "down":
mx.SwarmManager.Nodes.PerState.Down.Set(v)
case "ready":
mx.SwarmManager.Nodes.PerState.Ready.Set(v)
case "unknown":
mx.SwarmManager.Nodes.PerState.Unknown.Set(v)
}
mx.SwarmManager.Nodes.Total.Add(v)
}

for _, metric := range raw.FindByName("swarm_manager_tasks_total") {
state := metric.Labels.Get("state")
if state == "" {
continue
}
v := metric.Value

switch state {
default:
case "accepted":
mx.SwarmManager.Tasks.PerState.Accepted.Set(v)
case "assigned":
mx.SwarmManager.Tasks.PerState.Assigned.Set(v)
case "complete":
mx.SwarmManager.Tasks.PerState.Complete.Set(v)
case "failed":
mx.SwarmManager.Tasks.PerState.Failed.Set(v)
case "new":
mx.SwarmManager.Tasks.PerState.New.Set(v)
case "orphaned":
mx.SwarmManager.Tasks.PerState.Orphaned.Set(v)
case "pending":
mx.SwarmManager.Tasks.PerState.Pending.Set(v)
case "preparing":
mx.SwarmManager.Tasks.PerState.Preparing.Set(v)
case "ready":
mx.SwarmManager.Tasks.PerState.Ready.Set(v)
case "rejected":
mx.SwarmManager.Tasks.PerState.Rejected.Set(v)
case "remove":
mx.SwarmManager.Tasks.PerState.Remove.Set(v)
case "running":
mx.SwarmManager.Tasks.PerState.Running.Set(v)
case "shutdown":
mx.SwarmManager.Tasks.PerState.Shutdown.Set(v)
case "starting":
mx.SwarmManager.Tasks.PerState.Starting.Set(v)
}
mx.SwarmManager.Tasks.Total.Add(v)
}
}
21 changes: 16 additions & 5 deletions modules/docker_engine/docker_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ type Config struct {
// DockerEngine DockerEngine module.
type DockerEngine struct {
module.Base
Config `yaml:",inline"`
prom prometheus.Prometheus
Config `yaml:",inline"`
prom prometheus.Prometheus
isSwarmManager bool
}

// Cleanup makes cleanup.
Expand Down Expand Up @@ -74,13 +75,23 @@ func (de *DockerEngine) Init() bool {
}

// Check makes check.
func (de DockerEngine) Check() bool {
func (de *DockerEngine) Check() bool {
return len(de.Collect()) > 0
}

// Charts creates Charts.
func (DockerEngine) Charts() *Charts {
return charts.Copy()
func (de DockerEngine) Charts() *Charts {
if !de.isSwarmManager {
return charts.Copy()
}

c := charts.Copy()
err := c.Add(*swarmManagerCharts.Copy()...)
if err != nil {
de.Error(err)
return nil
}
return c
}

// Collect collects metrics.
Expand Down
Loading