Skip to content

Add nginx metrics to prometheus #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 30, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
825 changes: 422 additions & 403 deletions Godeps/Godeps.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion controllers/gce/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ type LoadBalancerController struct {
func NewLoadBalancerController(kubeClient client.Interface, clusterManager *ClusterManager, resyncPeriod time.Duration, namespace string) (*LoadBalancerController, error) {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(glog.Infof)
eventBroadcaster.StartRecordingToSink(unversionedcore.EventSinkImpl{
eventBroadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{
Interface: kubeClient.Core().Events(""),
})
lbc := LoadBalancerController{
Expand Down
233 changes: 233 additions & 0 deletions controllers/nginx/pkg/cmd/controller/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
/*
Copyright 2016 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"path/filepath"

"github.com/golang/glog"

common "github.com/ncabatoff/process-exporter"
"github.com/ncabatoff/process-exporter/proc"
"github.com/prometheus/client_golang/prometheus"
)

type exeMatcher struct {
name string
args []string
}

func (em exeMatcher) MatchAndName(nacl common.NameAndCmdline) (bool, string) {
if len(nacl.Cmdline) == 0 {
return false, ""
}
cmd := filepath.Base(nacl.Cmdline[0])
return em.name == cmd, ""
}

func (n *NGINXController) setupMonitor(args []string) {
pc, err := newProcessCollector(true, exeMatcher{"nginx", args})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why run this in the nginx controller vs in the generic controller and report Status through an interface method? prometheus library seems pretty general purpose, and we could just ask the backend to return a map of like:

map [string]string{
  "num_procs": 10, 
  "read_bytes": 123,
...
}

Which the generic_controller converts into some export format (could be prometheus, could be write directly to some database etc)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because the generic_controller already exposes prometheus metrics for the go process. Using this approach each backend can decide what to export and the comments for each variable (and how to extract the information)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(could be prometheus, could be write directly to some database etc)

can we take this as an improvement and iterate?

if err != nil {
glog.Fatalf("unexpedted error registering nginx collector: %v", err)
}
err = prometheus.Register(pc)
if err != nil {
glog.Warningf("unexpected error registering nginx collector: %v", err)
}
}

var (
numprocsDesc = prometheus.NewDesc(
"nginx_num_procs",
"number of processes",
nil, nil)

cpuSecsDesc = prometheus.NewDesc(
"nginx_cpu_seconds_total",
"Cpu usage in seconds",
nil, nil)

readBytesDesc = prometheus.NewDesc(
"nginx_read_bytes_total",
"number of bytes read",
nil, nil)

writeBytesDesc = prometheus.NewDesc(
"nginx_write_bytes_total",
"number of bytes written",
nil, nil)

memResidentbytesDesc = prometheus.NewDesc(
"nginx_resident_memory_bytes",
"number of bytes of memory in use",
nil, nil)

memVirtualbytesDesc = prometheus.NewDesc(
"nginx_virtual_memory_bytes",
"number of bytes of memory in use",
nil, nil)

startTimeDesc = prometheus.NewDesc(
"nginx_oldest_start_time_seconds",
"start time in seconds since 1970/01/01",
nil, nil)

activeDesc = prometheus.NewDesc(
"nginx_active_connections",
"total number of active connections",
nil, nil)

acceptedDesc = prometheus.NewDesc(
"nginx_accepted_connections",
"total number of accepted client connections",
nil, nil)

handledDesc = prometheus.NewDesc(
"nginx_handled_connections",
"total number of handled connections",
nil, nil)

requestsDesc = prometheus.NewDesc(
"nginx_total_requests",
"total number of client requests",
nil, nil)

readingDesc = prometheus.NewDesc(
"nginx_current_reading_connections",
"current number of connections where nginx is reading the request header",
nil, nil)

writingDesc = prometheus.NewDesc(
"nginx_current_writing_connections",
"current number of connections where nginx is writing the response back to the client",
nil, nil)

waitingDesc = prometheus.NewDesc(
"nginx_current_waiting_connections",
"current number of idle client connections waiting for a request",
nil, nil)
)

type (
scrapeRequest struct {
results chan<- prometheus.Metric
done chan struct{}
}

namedProcessCollector struct {
scrapeChan chan scrapeRequest
*proc.Grouper
fs *proc.FS
}
)

func newProcessCollector(
children bool,
n common.MatchNamer) (*namedProcessCollector, error) {

fs, err := proc.NewFS("/proc")
if err != nil {
return nil, err
}
p := &namedProcessCollector{
scrapeChan: make(chan scrapeRequest),
Grouper: proc.NewGrouper(children, n),
fs: fs,
}
_, err = p.Update(p.fs.AllProcs())
if err != nil {
return nil, err
}

go p.start()

return p, nil
}

// Describe implements prometheus.Collector.
func (p *namedProcessCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- cpuSecsDesc
ch <- numprocsDesc
ch <- readBytesDesc
ch <- writeBytesDesc
ch <- memResidentbytesDesc
ch <- memVirtualbytesDesc
ch <- startTimeDesc
}

// Collect implements prometheus.Collector.
func (p *namedProcessCollector) Collect(ch chan<- prometheus.Metric) {
req := scrapeRequest{results: ch, done: make(chan struct{})}
p.scrapeChan <- req
<-req.done
}

func (p *namedProcessCollector) start() {
for req := range p.scrapeChan {
ch := req.results
p.scrape(ch)
req.done <- struct{}{}
}
}

func (p *namedProcessCollector) scrape(ch chan<- prometheus.Metric) {
s, err := getNginxStatus()
if err != nil {
glog.Warningf("unexpected error obtaining nginx status info: %v", err)
return
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's a lot of repetition here, though I haven't spent time thinking about a better solution

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be isolated to this backend.
In case of caddy or haproxy there's already an exporter:
https://github.com/miekg/caddy-prometheus
https://github.com/prometheus/haproxy_exporter

ch <- prometheus.MustNewConstMetric(activeDesc,
prometheus.GaugeValue, float64(s.Active))
ch <- prometheus.MustNewConstMetric(acceptedDesc,
prometheus.GaugeValue, float64(s.Accepted))
ch <- prometheus.MustNewConstMetric(handledDesc,
prometheus.GaugeValue, float64(s.Handled))
ch <- prometheus.MustNewConstMetric(requestsDesc,
prometheus.GaugeValue, float64(s.Requests))
ch <- prometheus.MustNewConstMetric(readingDesc,
prometheus.GaugeValue, float64(s.Reading))
ch <- prometheus.MustNewConstMetric(writingDesc,
prometheus.GaugeValue, float64(s.Writing))
ch <- prometheus.MustNewConstMetric(waitingDesc,
prometheus.GaugeValue, float64(s.Waiting))

_, err = p.Update(p.fs.AllProcs())
if err != nil {
glog.Warningf("unexpected error obtaining nginx process info: %v", err)
return
}

for gname, gcounts := range p.Groups() {
glog.Infof("%v", gname)
glog.Infof("%v", gcounts)
ch <- prometheus.MustNewConstMetric(numprocsDesc,
prometheus.GaugeValue, float64(gcounts.Procs))
ch <- prometheus.MustNewConstMetric(memResidentbytesDesc,
prometheus.GaugeValue, float64(gcounts.Memresident))
ch <- prometheus.MustNewConstMetric(memVirtualbytesDesc,
prometheus.GaugeValue, float64(gcounts.Memvirtual))
ch <- prometheus.MustNewConstMetric(startTimeDesc,
prometheus.GaugeValue, float64(gcounts.OldestStartTime.Unix()))
ch <- prometheus.MustNewConstMetric(cpuSecsDesc,
prometheus.CounterValue, gcounts.Cpu)
ch <- prometheus.MustNewConstMetric(readBytesDesc,
prometheus.CounterValue, float64(gcounts.ReadBytes))
ch <- prometheus.MustNewConstMetric(writeBytesDesc,
prometheus.CounterValue, float64(gcounts.WriteBytes))
}
}
68 changes: 58 additions & 10 deletions controllers/nginx/pkg/cmd/controller/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,30 @@ package main

import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"net"
"net/http"
"os"
"os/exec"
"syscall"
"time"

"github.com/golang/glog"

"k8s.io/kubernetes/pkg/api"

"k8s.io/ingress/core/pkg/ingress"
"k8s.io/ingress/core/pkg/ingress/defaults"

"errors"

"k8s.io/ingress/controllers/nginx/pkg/config"
ngx_template "k8s.io/ingress/controllers/nginx/pkg/template"
"k8s.io/ingress/controllers/nginx/pkg/version"
"k8s.io/ingress/core/pkg/ingress"
"k8s.io/ingress/core/pkg/ingress/defaults"
)

const (
ngxHealthPort = 18080
ngxHealthPath = "/healthz"
ngxStatusPath = "/internal_nginx_status"
)

var (
Expand Down Expand Up @@ -78,6 +84,7 @@ Error loading new template : %v
}

n.t = ngxTpl

go n.Start()

return n
Expand All @@ -93,15 +100,56 @@ type NGINXController struct {
// Start start a new NGINX master process running in foreground.
func (n NGINXController) Start() {
glog.Info("starting NGINX process...")

done := make(chan error, 1)
cmd := exec.Command(n.binary, "-c", cfgPath)
n.start(cmd, done)

// if the nginx master process dies the workers continue to process requests,
// passing checks but in case of updates in ingress no updates will be
// reflected in the nginx configuration which can lead to confusion and report
// issues because of this behavior.
// To avoid this issue we restart nginx in case of errors.
for {
err := <-done
if exitError, ok := err.(*exec.ExitError); ok {
waitStatus := exitError.Sys().(syscall.WaitStatus)
glog.Warningf(`
-------------------------------------------------------------------------------
NGINX master process died (%v): %v
-------------------------------------------------------------------------------
`, waitStatus.ExitStatus(), err)
}
cmd.Process.Release()
cmd = exec.Command(n.binary, "-c", cfgPath)
// we wait until the workers are killed
for {
conn, err := net.DialTimeout("tcp", "127.0.0.1:80", 1*time.Second)
if err == nil {
conn.Close()
break
}
time.Sleep(1 * time.Second)
}
// start a new nginx master process
n.start(cmd, done)
}
}

func (n *NGINXController) start(cmd *exec.Cmd, done chan error) {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
glog.Fatalf("nginx error: %v", err)
done <- err
return
}
if err := cmd.Wait(); err != nil {
glog.Errorf("nginx error: %v", err)
}

n.setupMonitor(cmd.Args)

go func() {
done <- cmd.Wait()
}()
}

// Reload checks if the running configuration file is different
Expand Down Expand Up @@ -260,7 +308,7 @@ func (n NGINXController) Name() string {

// Check returns if the nginx healthz endpoint is returning ok (status code 200)
func (n NGINXController) Check(_ *http.Request) error {
res, err := http.Get("http://127.0.0.1:18080/healthz")
res, err := http.Get(fmt.Sprintf("http://localhost:%v%v", ngxHealthPort, ngxHealthPath))
if err != nil {
return err
}
Expand Down
Loading