Skip to content

Commit ac6930b

Browse files
authored
Merge pull request #36 from aledbf/prometheus-nginx
Add nginx metrics to prometheus
2 parents 666cbf5 + f7011d2 commit ac6930b

File tree

117 files changed

+7440
-4828
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+7440
-4828
lines changed

Godeps/Godeps.json

+422-403
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controllers/gce/controller/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ type LoadBalancerController struct {
9090
func NewLoadBalancerController(kubeClient client.Interface, clusterManager *ClusterManager, resyncPeriod time.Duration, namespace string) (*LoadBalancerController, error) {
9191
eventBroadcaster := record.NewBroadcaster()
9292
eventBroadcaster.StartLogging(glog.Infof)
93-
eventBroadcaster.StartRecordingToSink(unversionedcore.EventSinkImpl{
93+
eventBroadcaster.StartRecordingToSink(&unversionedcore.EventSinkImpl{
9494
Interface: kubeClient.Core().Events(""),
9595
})
9696
lbc := LoadBalancerController{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
/*
2+
Copyright 2016 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"path/filepath"
21+
22+
"github.com/golang/glog"
23+
24+
common "github.com/ncabatoff/process-exporter"
25+
"github.com/ncabatoff/process-exporter/proc"
26+
"github.com/prometheus/client_golang/prometheus"
27+
)
28+
29+
type exeMatcher struct {
30+
name string
31+
args []string
32+
}
33+
34+
func (em exeMatcher) MatchAndName(nacl common.NameAndCmdline) (bool, string) {
35+
if len(nacl.Cmdline) == 0 {
36+
return false, ""
37+
}
38+
cmd := filepath.Base(nacl.Cmdline[0])
39+
return em.name == cmd, ""
40+
}
41+
42+
func (n *NGINXController) setupMonitor(args []string) {
43+
pc, err := newProcessCollector(true, exeMatcher{"nginx", args})
44+
if err != nil {
45+
glog.Fatalf("unexpedted error registering nginx collector: %v", err)
46+
}
47+
err = prometheus.Register(pc)
48+
if err != nil {
49+
glog.Warningf("unexpected error registering nginx collector: %v", err)
50+
}
51+
}
52+
53+
var (
54+
numprocsDesc = prometheus.NewDesc(
55+
"nginx_num_procs",
56+
"number of processes",
57+
nil, nil)
58+
59+
cpuSecsDesc = prometheus.NewDesc(
60+
"nginx_cpu_seconds_total",
61+
"Cpu usage in seconds",
62+
nil, nil)
63+
64+
readBytesDesc = prometheus.NewDesc(
65+
"nginx_read_bytes_total",
66+
"number of bytes read",
67+
nil, nil)
68+
69+
writeBytesDesc = prometheus.NewDesc(
70+
"nginx_write_bytes_total",
71+
"number of bytes written",
72+
nil, nil)
73+
74+
memResidentbytesDesc = prometheus.NewDesc(
75+
"nginx_resident_memory_bytes",
76+
"number of bytes of memory in use",
77+
nil, nil)
78+
79+
memVirtualbytesDesc = prometheus.NewDesc(
80+
"nginx_virtual_memory_bytes",
81+
"number of bytes of memory in use",
82+
nil, nil)
83+
84+
startTimeDesc = prometheus.NewDesc(
85+
"nginx_oldest_start_time_seconds",
86+
"start time in seconds since 1970/01/01",
87+
nil, nil)
88+
89+
activeDesc = prometheus.NewDesc(
90+
"nginx_active_connections",
91+
"total number of active connections",
92+
nil, nil)
93+
94+
acceptedDesc = prometheus.NewDesc(
95+
"nginx_accepted_connections",
96+
"total number of accepted client connections",
97+
nil, nil)
98+
99+
handledDesc = prometheus.NewDesc(
100+
"nginx_handled_connections",
101+
"total number of handled connections",
102+
nil, nil)
103+
104+
requestsDesc = prometheus.NewDesc(
105+
"nginx_total_requests",
106+
"total number of client requests",
107+
nil, nil)
108+
109+
readingDesc = prometheus.NewDesc(
110+
"nginx_current_reading_connections",
111+
"current number of connections where nginx is reading the request header",
112+
nil, nil)
113+
114+
writingDesc = prometheus.NewDesc(
115+
"nginx_current_writing_connections",
116+
"current number of connections where nginx is writing the response back to the client",
117+
nil, nil)
118+
119+
waitingDesc = prometheus.NewDesc(
120+
"nginx_current_waiting_connections",
121+
"current number of idle client connections waiting for a request",
122+
nil, nil)
123+
)
124+
125+
type (
126+
scrapeRequest struct {
127+
results chan<- prometheus.Metric
128+
done chan struct{}
129+
}
130+
131+
namedProcessCollector struct {
132+
scrapeChan chan scrapeRequest
133+
*proc.Grouper
134+
fs *proc.FS
135+
}
136+
)
137+
138+
func newProcessCollector(
139+
children bool,
140+
n common.MatchNamer) (*namedProcessCollector, error) {
141+
142+
fs, err := proc.NewFS("/proc")
143+
if err != nil {
144+
return nil, err
145+
}
146+
p := &namedProcessCollector{
147+
scrapeChan: make(chan scrapeRequest),
148+
Grouper: proc.NewGrouper(children, n),
149+
fs: fs,
150+
}
151+
_, err = p.Update(p.fs.AllProcs())
152+
if err != nil {
153+
return nil, err
154+
}
155+
156+
go p.start()
157+
158+
return p, nil
159+
}
160+
161+
// Describe implements prometheus.Collector.
162+
func (p *namedProcessCollector) Describe(ch chan<- *prometheus.Desc) {
163+
ch <- cpuSecsDesc
164+
ch <- numprocsDesc
165+
ch <- readBytesDesc
166+
ch <- writeBytesDesc
167+
ch <- memResidentbytesDesc
168+
ch <- memVirtualbytesDesc
169+
ch <- startTimeDesc
170+
}
171+
172+
// Collect implements prometheus.Collector.
173+
func (p *namedProcessCollector) Collect(ch chan<- prometheus.Metric) {
174+
req := scrapeRequest{results: ch, done: make(chan struct{})}
175+
p.scrapeChan <- req
176+
<-req.done
177+
}
178+
179+
func (p *namedProcessCollector) start() {
180+
for req := range p.scrapeChan {
181+
ch := req.results
182+
p.scrape(ch)
183+
req.done <- struct{}{}
184+
}
185+
}
186+
187+
func (p *namedProcessCollector) scrape(ch chan<- prometheus.Metric) {
188+
s, err := getNginxStatus()
189+
if err != nil {
190+
glog.Warningf("unexpected error obtaining nginx status info: %v", err)
191+
return
192+
}
193+
194+
ch <- prometheus.MustNewConstMetric(activeDesc,
195+
prometheus.GaugeValue, float64(s.Active))
196+
ch <- prometheus.MustNewConstMetric(acceptedDesc,
197+
prometheus.GaugeValue, float64(s.Accepted))
198+
ch <- prometheus.MustNewConstMetric(handledDesc,
199+
prometheus.GaugeValue, float64(s.Handled))
200+
ch <- prometheus.MustNewConstMetric(requestsDesc,
201+
prometheus.GaugeValue, float64(s.Requests))
202+
ch <- prometheus.MustNewConstMetric(readingDesc,
203+
prometheus.GaugeValue, float64(s.Reading))
204+
ch <- prometheus.MustNewConstMetric(writingDesc,
205+
prometheus.GaugeValue, float64(s.Writing))
206+
ch <- prometheus.MustNewConstMetric(waitingDesc,
207+
prometheus.GaugeValue, float64(s.Waiting))
208+
209+
_, err = p.Update(p.fs.AllProcs())
210+
if err != nil {
211+
glog.Warningf("unexpected error obtaining nginx process info: %v", err)
212+
return
213+
}
214+
215+
for gname, gcounts := range p.Groups() {
216+
glog.Infof("%v", gname)
217+
glog.Infof("%v", gcounts)
218+
ch <- prometheus.MustNewConstMetric(numprocsDesc,
219+
prometheus.GaugeValue, float64(gcounts.Procs))
220+
ch <- prometheus.MustNewConstMetric(memResidentbytesDesc,
221+
prometheus.GaugeValue, float64(gcounts.Memresident))
222+
ch <- prometheus.MustNewConstMetric(memVirtualbytesDesc,
223+
prometheus.GaugeValue, float64(gcounts.Memvirtual))
224+
ch <- prometheus.MustNewConstMetric(startTimeDesc,
225+
prometheus.GaugeValue, float64(gcounts.OldestStartTime.Unix()))
226+
ch <- prometheus.MustNewConstMetric(cpuSecsDesc,
227+
prometheus.CounterValue, gcounts.Cpu)
228+
ch <- prometheus.MustNewConstMetric(readBytesDesc,
229+
prometheus.CounterValue, float64(gcounts.ReadBytes))
230+
ch <- prometheus.MustNewConstMetric(writeBytesDesc,
231+
prometheus.CounterValue, float64(gcounts.WriteBytes))
232+
}
233+
}

controllers/nginx/pkg/cmd/controller/nginx.go

+58-10
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,30 @@ package main
1818

1919
import (
2020
"bytes"
21+
"errors"
2122
"fmt"
2223
"io/ioutil"
24+
"net"
2325
"net/http"
2426
"os"
2527
"os/exec"
28+
"syscall"
29+
"time"
2630

2731
"github.com/golang/glog"
28-
2932
"k8s.io/kubernetes/pkg/api"
3033

31-
"k8s.io/ingress/core/pkg/ingress"
32-
"k8s.io/ingress/core/pkg/ingress/defaults"
33-
34-
"errors"
35-
3634
"k8s.io/ingress/controllers/nginx/pkg/config"
3735
ngx_template "k8s.io/ingress/controllers/nginx/pkg/template"
3836
"k8s.io/ingress/controllers/nginx/pkg/version"
37+
"k8s.io/ingress/core/pkg/ingress"
38+
"k8s.io/ingress/core/pkg/ingress/defaults"
39+
)
40+
41+
const (
42+
ngxHealthPort = 18080
43+
ngxHealthPath = "/healthz"
44+
ngxStatusPath = "/internal_nginx_status"
3945
)
4046

4147
var (
@@ -78,6 +84,7 @@ Error loading new template : %v
7884
}
7985

8086
n.t = ngxTpl
87+
8188
go n.Start()
8289

8390
return n
@@ -93,15 +100,56 @@ type NGINXController struct {
93100
// Start start a new NGINX master process running in foreground.
94101
func (n NGINXController) Start() {
95102
glog.Info("starting NGINX process...")
103+
104+
done := make(chan error, 1)
96105
cmd := exec.Command(n.binary, "-c", cfgPath)
106+
n.start(cmd, done)
107+
108+
// if the nginx master process dies the workers continue to process requests,
109+
// passing checks but in case of updates in ingress no updates will be
110+
// reflected in the nginx configuration which can lead to confusion and report
111+
// issues because of this behavior.
112+
// To avoid this issue we restart nginx in case of errors.
113+
for {
114+
err := <-done
115+
if exitError, ok := err.(*exec.ExitError); ok {
116+
waitStatus := exitError.Sys().(syscall.WaitStatus)
117+
glog.Warningf(`
118+
-------------------------------------------------------------------------------
119+
NGINX master process died (%v): %v
120+
-------------------------------------------------------------------------------
121+
`, waitStatus.ExitStatus(), err)
122+
}
123+
cmd.Process.Release()
124+
cmd = exec.Command(n.binary, "-c", cfgPath)
125+
// we wait until the workers are killed
126+
for {
127+
conn, err := net.DialTimeout("tcp", "127.0.0.1:80", 1*time.Second)
128+
if err == nil {
129+
conn.Close()
130+
break
131+
}
132+
time.Sleep(1 * time.Second)
133+
}
134+
// start a new nginx master process
135+
n.start(cmd, done)
136+
}
137+
}
138+
139+
func (n *NGINXController) start(cmd *exec.Cmd, done chan error) {
97140
cmd.Stdout = os.Stdout
98141
cmd.Stderr = os.Stderr
99142
if err := cmd.Start(); err != nil {
100143
glog.Fatalf("nginx error: %v", err)
144+
done <- err
145+
return
101146
}
102-
if err := cmd.Wait(); err != nil {
103-
glog.Errorf("nginx error: %v", err)
104-
}
147+
148+
n.setupMonitor(cmd.Args)
149+
150+
go func() {
151+
done <- cmd.Wait()
152+
}()
105153
}
106154

107155
// Reload checks if the running configuration file is different
@@ -260,7 +308,7 @@ func (n NGINXController) Name() string {
260308

261309
// Check returns if the nginx healthz endpoint is returning ok (status code 200)
262310
func (n NGINXController) Check(_ *http.Request) error {
263-
res, err := http.Get("http://127.0.0.1:18080/healthz")
311+
res, err := http.Get(fmt.Sprintf("http://localhost:%v%v", ngxHealthPort, ngxHealthPath))
264312
if err != nil {
265313
return err
266314
}

0 commit comments

Comments
 (0)