-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bug: health checker target leak #10636
Comments
yep. this API will expose the health checker internal status (include I this the metric import by this commit ....
-- update upstream_status metrics
local stats = control.get_health_checkers()
for _, stat in ipairs(stats) do
for _, node in ipairs(stat.nodes) do
metrics.upstream_status:set((node.status == "healthy") and 1 or 0,
gen_arr(stat.name, node.ip, node.port))
end
end
core.response.set_header("content_type", "text/plain")
return 200, core.table.concat(prometheus:metric_data())
end handler = _M.dump_plugin_metadata,
},
get_health_checkers = _get_health_checkers,
} local function _get_health_checkers()
local infos = {}
local routes = get_routes()
iter_and_add_healthcheck_info(infos, routes)
local services = get_services()
iter_and_add_healthcheck_info(infos, services)
local upstreams = get_upstreams()
iter_and_add_healthcheck_info(infos, upstreams)
return infos
end local function iter_and_add_healthcheck_info(infos, values)
if not values then
return
end
for _, value in core.config_util.iterate_values(values) do
local checks = value.value.checks or (value.value.upstream and value.value.upstream.checks)
if checks then
local info = extra_checker_info(value)
... local function extra_checker_info(value)
if not healthcheck then
healthcheck = require("resty.healthcheck")
end
local name = upstream_mod.get_healthchecker_name(value)
local nodes, err = healthcheck.get_target_list(name, "upstream-healthcheck")
if err then
core.log.error("healthcheck.get_target_list failed: ", err)
end
return {
name = value.key,
nodes = nodes,
}
end function _M.get_target_list(name, shm_name)
local self = {
name = name,
shm_name = shm_name,
log = checker.log,
}
self.shm = ngx.shared[tostring(shm_name)]
assert(self.shm, ("no shm found by name '%s'"):format(shm_name))
self.TARGET_STATE = SHM_PREFIX .. self.name .. ":state"
self.TARGET_COUNTER = SHM_PREFIX .. self.name .. ":counter"
self.TARGET_LIST = SHM_PREFIX .. self.name .. ":target_list"
self.TARGET_LIST_LOCK = SHM_PREFIX .. self.name .. ":target_list_lock"
self.LOG_PREFIX = LOG_PREFIX .. "(" .. self.name .. ") "
local ok, err = locking_target_list(self, function(target_list)
self.targets = target_list
for _, target in ipairs(self.targets) do
local state_key = key_for(self.TARGET_STATE, target.ip, target.port, target.hostname)
target.status = INTERNAL_STATES[self.shm:get(state_key)]
if not target.hostheader then
target.hostheader = nil
end
end
return true
end)
....
return self.targets
end |
I have add a new test case for question 2. After testing, I found that the target of this health chcker have some bug
For implementation, you can refer to the test case included in the PR below. The reason why it has not been fixed yet is because the implementation logic of etcd is relatively complex. I am still evaluating the code design plan. |
duplicated: #10500 EDIT: the above mentioned issue is not a duplicate |
The PR that fixed this bug has been reverted as it significantly increased CPU load. Another way to fix this bug would be to configure metrics expiry: apisix/conf/config-default.yaml Line 608 in ee2a759
|
Current Behavior
When the upstream is updated/deleted, the upstream health status data of the Prometheus plug-in does not remove the corresponding upstream node.
Expected Behavior
No response
Error Logs
No response
Steps to Reproduce
Environment
apisix version
): 2.13.1uname -a
):Linux pekshcsitd54867 3.10.0-1160.76.1.el7.x86_64 change: added doc of how to load plugin. #1 SMP Wed Aug 10 16:21:17 UTC 2022 x86_64 x86_64 x86_64 GNU/Linuxopenresty -V
ornginx -V
):nginx version: openresty/1.21.4.1curl http://127.0.0.1:9090/v1/server_info
):{"boot_time":1702379993,"etcd_version":"3.5.0","id":"aa6db183-43e6-4f8d-a86d-6ed48e76dbaa","hostname":"pekshcsitd54867","version":"2.13.1"}luarocks --version
):The text was updated successfully, but these errors were encountered: