Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(prometheus): extra labels from nginx var for http request metrics #7549

Merged
merged 10 commits into from
Aug 1, 2022
66 changes: 57 additions & 9 deletions apisix/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ local get_services = require("apisix.http.service").services
local get_consumers = require("apisix.consumer").consumers
local get_upstreams = require("apisix.upstream").upstreams
local clear_tab = core.table.clear
local tab_insert_tail = core.table.insert_tail
local get_stream_routes = router.stream_routes
local get_protos = require("apisix.plugins.grpc-transcode.proto").protos
local service_fetch = require("apisix.http.service").get
local latency_details = require("apisix.utils.log-util").latency_details_in_ms
local xrpc = require("apisix.stream.xrpc")
local unpack = unpack


local ngx_capture
Expand All @@ -65,6 +67,28 @@ local function gen_arr(...)
return inner_tab_arr
end

local ngx_var_label_values_tbl = {}

local function ngx_var_label_values(ctx, name)
clear_tab(ngx_var_label_values_tbl)

local attr = plugin.plugin_attr("prometheus")
local custom_labels = attr.custom_labels

if custom_labels and custom_labels[name] then
local labels = custom_labels[name]
for _, name in ipairs(labels) do
local val = ctx.var[name]
if val == nil then
val = "nil"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest using "", which is language-independent.

end
core.table.insert(ngx_var_label_values_tbl, val)
end
end

return ngx_var_label_values_tbl
end


local _M = {}

Expand Down Expand Up @@ -102,6 +126,8 @@ function _M.http_init(prometheus_enabled_in_stream)
metric_prefix = attr.metric_prefix
end

local custom_labels = attr.custom_labels

prometheus = base_prometheus.init("prometheus-metrics", metric_prefix)

metrics.connections = prometheus:gauge("nginx_http_current_connections",
Expand Down Expand Up @@ -136,17 +162,29 @@ function _M.http_init(prometheus_enabled_in_stream)
-- The consumer label indicates the name of consumer corresponds to the
-- request to the route/service, it will be an empty string if there is
-- no consumer in request.
local labels = {"code", "route", "matched_uri", "matched_host", "service", "consumer", "node"}
if custom_labels and custom_labels.http_status then
tab_insert_tail(labels, unpack(custom_labels.http_status))
end
metrics.status = prometheus:counter("http_status",
"HTTP status codes per service in APISIX",
{"code", "route", "matched_uri", "matched_host", "service", "consumer", "node"})
labels)

local labels = {"type", "route", "service", "consumer", "node"}
if custom_labels and custom_labels.http_latency then
tab_insert_tail(labels, unpack(custom_labels.http_latency))
end
metrics.latency = prometheus:histogram("http_latency",
"HTTP request latency in milliseconds per service in APISIX",
{"type", "route", "service", "consumer", "node"}, DEFAULT_BUCKETS)
labels, DEFAULT_BUCKETS)

local labels = {"type", "route", "service", "consumer", "node"}
if custom_labels and custom_labels.bandwidth then
tab_insert_tail(labels, unpack(custom_labels.bandwidth))
end
metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes consumed per service in APISIX",
{"type", "route", "service", "consumer", "node"})
labels)

if prometheus_enabled_in_stream then
init_stream_metrics()
Expand Down Expand Up @@ -208,25 +246,35 @@ function _M.http_log(conf, ctx)

metrics.status:inc(1,
gen_arr(vars.status, route_id, matched_uri, matched_host,
service_id, consumer_name, balancer_ip))
service_id, consumer_name, balancer_ip,
unpack(ngx_var_label_values(ctx, "http_status"))))

local latency, upstream_latency, apisix_latency = latency_details(ctx)
local latency_ngx_var_label_values = ngx_var_label_values(ctx, "http_latency")

metrics.latency:observe(latency,
gen_arr("request", route_id, service_id, consumer_name, balancer_ip))
gen_arr("request", route_id, service_id, consumer_name, balancer_ip,
unpack(latency_ngx_var_label_values)))

if upstream_latency then
metrics.latency:observe(upstream_latency,
gen_arr("upstream", route_id, service_id, consumer_name, balancer_ip))
gen_arr("upstream", route_id, service_id, consumer_name, balancer_ip,
unpack(latency_ngx_var_label_values)))
end

metrics.latency:observe(apisix_latency,
gen_arr("apisix", route_id, service_id, consumer_name, balancer_ip))
gen_arr("apisix", route_id, service_id, consumer_name, balancer_ip,
unpack(latency_ngx_var_label_values)))

local bandwidth_ngx_var_label_values = ngx_var_label_values(ctx, "bandwidth")

metrics.bandwidth:inc(vars.request_length,
gen_arr("ingress", route_id, service_id, consumer_name, balancer_ip))
gen_arr("ingress", route_id, service_id, consumer_name, balancer_ip,
unpack(bandwidth_ngx_var_label_values)))

metrics.bandwidth:inc(vars.bytes_sent,
gen_arr("egress", route_id, service_id, consumer_name, balancer_ip))
gen_arr("egress", route_id, service_id, consumer_name, balancer_ip,
unpack(bandwidth_ngx_var_label_values)))
end


Expand Down
8 changes: 8 additions & 0 deletions conf/config-default.yaml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,14 @@ plugin_attr:
export_addr:
ip: 127.0.0.1
port: 9091
# custom_labels:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be better to use:

metrics:
    xxx:
       extra_labels:
           label_name_xxx: $label_var
     yyy:

So we can add more options to a metric in the future. And we don't need to require the label name should be the variable name.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And please add a comment to show that the commented configurations are the example but not the default value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The labels of extra_labels should be a list, because fields of dictionary in lua table are out of order, which would cause the metric full name may change each time it reloads the conf.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can provide order on the implementation side, for example sorting them before using them.

# http_status:
# - upstream_addr
# - upstream_status
# http_latency:
# - upstream_addr
# bandwidth:
# - upstream_addr
server-info:
report_ttl: 60 # live time for server info in etcd (unit: second)
dubbo-proxy:
Expand Down
23 changes: 23 additions & 0 deletions docs/en/latest/plugins/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,29 @@ plugin_attr:
export_uri: /apisix/metrics
```

### Specifying `custom_labels`

For http request related metrics, you could specify extra labels, which match the nginx variables.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use APISIX variable like elsewhere in the doc.


If you specify label for nonexist nginx variable, the label value would be "nil".

Currently, only below metrics are supported:

* http_status
* http_latency
* bandwidth


Here is a configuration example:

```yaml title="conf/config.yaml"
plugin_attr:
prometheus:
custom_labels:
http_status:
- upstream_addr
- upstream_status

## API

This Plugin will add the API endpoint `/apisix/prometheus/metrics` or your custom export URI for exposing the metrics.
Expand Down
144 changes: 144 additions & 0 deletions t/plugin/prometheus4.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
BEGIN {
if ($ENV{TEST_NGINX_CHECK_LEAK}) {
$SkipReason = "unavailable for the hup tests";

} else {
$ENV{TEST_NGINX_USE_HUP} = 1;
undef $ENV{TEST_NGINX_USE_STAP};
}
}

use t::APISIX 'no_plan';

add_block_preprocessor(sub {
my ($block) = @_;

if ((!defined $block->error_log) && (!defined $block->no_error_log)) {
$block->set_value("no_error_log", "[error]");
}

if (!defined $block->request) {
$block->set_value("request", "GET /t");
}
});

run_tests;

__DATA__

=== TEST 1: pre-create public API route
--- config
location /t {
content_by_lua_block {

local t = require("lib.test_admin").test
local code = t('/apisix/admin/routes/metrics',
ngx.HTTP_PUT,
[[{
"plugins": {
"public-api": {}
},
"uri": "/apisix/prometheus/metrics"
}]]
)
if code >= 300 then
ngx.status = code
return
end
}
}
--- error_code: 200
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we don't need to specify --- error_code: 200 as it is checked by default




=== TEST 2: set route
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/10',
ngx.HTTP_PUT,
[[{
"plugins": {
"prometheus": {}
},
"upstream": {
"nodes": {
"127.0.0.1:1980": 1
},
"type": "roundrobin"
},
"uri": "/hello"
}]]
)

if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed



=== TEST 3: client request
--- yaml_config
plugin_attr:
prometheus:
custom_labels:
bandwidth:
- upstream_addr
- upstream_status
--- request
GET /hello
--- error_code: 200
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we don't need to specify --- error_code: 200 as it is checked by default




=== TEST 4: fetch the prometheus metric data
--- request
GET /apisix/prometheus/metrics
--- response_body eval
qr/apisix_bandwidth\{type="egress",route="10",service="",consumer="",node="127.0.0.1",upstream_addr="127.0.0.1:1980",upstream_status="200"\} \d+/



=== TEST 5: client request, label with nonexist ngx variable
--- yaml_config
plugin_attr:
prometheus:
custom_labels:
http_status:
- dummy
bandwidth:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need the bandwidth?

- upstream_addr
- upstream_status
--- request
GET /hello
--- error_code: 200



=== TEST 6: fetch the prometheus metric data, with nil label
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is no longer a nil label?

--- request
GET /apisix/prometheus/metrics
--- response_body eval
qr/apisix_http_status\{code="200",route="10",matched_uri="\/hello",matched_host="",service="",consumer="",node="127.0.0.1",dummy="nil"\} \d+/