Skip to content

Commit

Permalink
tests(healthchecks) harden a flaky healthcheck test case
Browse files Browse the repository at this point in the history
The regression test for issue #3304 was flaky because it launches two
Kong nodes and it waited for the second one to be ready by reading the
logs. This is not a reliable way of determining if a node is immediately
ready to proxy a configured route.

Reversing the order of proxy calls in the test made it fail more
consistently, which helped debugging the issue.

This changes the check to verify if the router has been rebuilt,
using a dummy route for triggering the routing rebuild before the
proper test starts. (Thanks @thibaultcha for the idea!)

The changes are also backported to `spec-old-api/`.

From #3454
  • Loading branch information
hishamhm authored and thibaultcha committed May 11, 2018
1 parent fd4db25 commit 0da5722
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 45 deletions.
65 changes: 41 additions & 24 deletions spec-old-api/02-integration/05-proxy/09-balancer_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ end
local add_upstream
local patch_upstream
local get_upstream_health
local get_router_version
local add_target
local add_api
local patch_api
Expand All @@ -318,8 +319,8 @@ do
end
end

local function api_send(method, path, body)
local api_client = helpers.admin_client()
local function api_send(method, path, body, forced_port)
local api_client = helpers.admin_client(nil, forced_port)
local res, err = api_client:send({
method = method,
path = path,
Expand Down Expand Up @@ -353,8 +354,17 @@ do
get_upstream_health = function(upstream_name)
local path = "/upstreams/" .. upstream_name .."/health"
local status, body = api_send("GET", path)
assert.same(200, status)
return body
if status == 200 then
return body
end
end

get_router_version = function(forced_port)
local path = "/cache/api_router:version"
local status, body = api_send("GET", path, nil, forced_port)
if status == 200 then
return body.message
end
end

do
Expand Down Expand Up @@ -419,26 +429,32 @@ local function poll_wait_health(upstream_name, localhost, port, value)
local expire = ngx.now() + hard_timeout
while ngx.now() < expire do
local health = get_upstream_health(upstream_name)
for _, d in ipairs(health.data) do
if d.target == localhost .. ":" .. port and d.health == value then
return
if health then
for _, d in ipairs(health.data) do
if d.target == localhost .. ":" .. port and d.health == value then
return
end
end
end
ngx.sleep(0.01) -- poll-wait
end
end


local function file_contains(filename, searched)
local fd = assert(io.open(filename, "r"))
for line in fd:lines() do
if line:find(searched, 1, true) then
fd:close()
return true
end
end
fd:close()
return false
local function wait_for_router_update(old_rv, localhost, proxy_port, admin_port)
-- add dummy upstream just to rebuild router
local dummy_upstream_name = add_upstream()
local dummy_port = add_target(dummy_upstream_name, localhost)
local dummy_api_host = add_api(dummy_upstream_name)
local dummy_server = http_server(localhost, dummy_port, { math.huge })

helpers.wait_until(function()
client_requests(1, dummy_api_host, "127.0.0.1", proxy_port)
local rv = get_router_version(admin_port)
return rv ~= old_rv
end, 10)

dummy_server:done()
end


Expand All @@ -448,6 +464,7 @@ local localhosts = {
hostname = "localhost",
}


for _, strategy in helpers.each_strategy() do

describe("Ring-balancer #" .. strategy, function()
Expand Down Expand Up @@ -493,24 +510,24 @@ for _, strategy in helpers.each_strategy() do

it("does not perform health checks when disabled (#3304)", function()

local upstream_name = add_upstream({})
local old_rv = get_router_version(9011)

local upstream_name = add_upstream()
local port = add_target(upstream_name, localhost)
local api_host = add_api(upstream_name)

helpers.wait_until(function()
return file_contains("servroot2/logs/error.log", "balancer:targets")
end, 10)
wait_for_router_update(old_rv, localhost, 9010, 9011)

-- server responds, then fails, then responds again
local server = http_server(localhost, port, { 20, 20, 20 })

local seq = {
{ port = 9000, oks = 10, fails = 0, last_status = 200 },
{ port = 9010, oks = 10, fails = 0, last_status = 200 },
{ port = 9000, oks = 0, fails = 10, last_status = 500 },
{ port = 9010, oks = 0, fails = 10, last_status = 500 },
{ port = 9000, oks = 10, fails = 0, last_status = 200 },
{ port = 9010, oks = 0, fails = 10, last_status = 500 },
{ port = 9000, oks = 0, fails = 10, last_status = 500 },
{ port = 9010, oks = 10, fails = 0, last_status = 200 },
{ port = 9000, oks = 10, fails = 0, last_status = 200 },
}
for i, test in ipairs(seq) do
local oks, fails, last_status = client_requests(10, api_host, "127.0.0.1", test.port)
Expand Down
51 changes: 32 additions & 19 deletions spec/02-integration/05-proxy/09-balancer_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ end
local add_upstream
local patch_upstream
local get_upstream_health
local get_router_version
local add_target
local add_api
local patch_api
Expand All @@ -318,8 +319,8 @@ do
end
end

local function api_send(method, path, body)
local api_client = helpers.admin_client()
local function api_send(method, path, body, forced_port)
local api_client = helpers.admin_client(nil, forced_port)
local res, err = api_client:send({
method = method,
path = path,
Expand Down Expand Up @@ -358,6 +359,14 @@ do
end
end

get_router_version = function(forced_port)
local path = "/cache/router:version"
local status, body = api_send("GET", path, nil, forced_port)
if status == 200 then
return body.message
end
end

do
local port = FIRST_PORT
gen_port = function()
Expand Down Expand Up @@ -436,16 +445,20 @@ local function poll_wait_health(upstream_name, localhost, port, value)
end


local function file_contains(filename, searched)
local fd = assert(io.open(filename, "r"))
for line in fd:lines() do
if line:find(searched, 1, true) then
fd:close()
return true
end
end
fd:close()
return false
local function wait_for_router_update(old_rv, localhost, proxy_port, admin_port)
-- add dummy upstream just to rebuild router
local dummy_upstream_name = add_upstream()
local dummy_port = add_target(dummy_upstream_name, localhost)
local dummy_api_host = add_api(dummy_upstream_name)
local dummy_server = http_server(localhost, dummy_port, { math.huge })

helpers.wait_until(function()
client_requests(1, dummy_api_host, "127.0.0.1", proxy_port)
local rv = get_router_version(admin_port)
return rv ~= old_rv
end, 10)

dummy_server:done()
end


Expand Down Expand Up @@ -501,24 +514,24 @@ for _, strategy in helpers.each_strategy() do

it("does not perform health checks when disabled (#3304)", function()

local upstream_name = add_upstream({})
local old_rv = get_router_version(9011)

local upstream_name = add_upstream()
local port = add_target(upstream_name, localhost)
local api_host = add_api(upstream_name)

helpers.wait_until(function()
return file_contains("servroot2/logs/error.log", "balancer:targets")
end, 10)
wait_for_router_update(old_rv, localhost, 9010, 9011)

-- server responds, then fails, then responds again
local server = http_server(localhost, port, { 20, 20, 20 })

local seq = {
{ port = 9000, oks = 10, fails = 0, last_status = 200 },
{ port = 9010, oks = 10, fails = 0, last_status = 200 },
{ port = 9000, oks = 0, fails = 10, last_status = 500 },
{ port = 9010, oks = 0, fails = 10, last_status = 500 },
{ port = 9000, oks = 10, fails = 0, last_status = 200 },
{ port = 9010, oks = 0, fails = 10, last_status = 500 },
{ port = 9000, oks = 0, fails = 10, last_status = 500 },
{ port = 9010, oks = 10, fails = 0, last_status = 200 },
{ port = 9000, oks = 10, fails = 0, last_status = 200 },
}
for i, test in ipairs(seq) do
local oks, fails, last_status = client_requests(10, api_host, "127.0.0.1", test.port)
Expand Down
4 changes: 2 additions & 2 deletions spec/helpers.lua
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ end

--- returns a pre-configured `http_client` for the Kong admin port.
-- @name admin_client
local function admin_client(timeout)
local function admin_client(timeout, forced_port)
local admin_ip, admin_port
for _, entry in ipairs(conf.admin_listeners) do
if entry.ssl == false then
Expand All @@ -404,7 +404,7 @@ local function admin_client(timeout)
end
end
assert(admin_ip, "No http-admin found in the configuration")
return http_client(admin_ip, admin_port, timeout)
return http_client(admin_ip, forced_port or admin_port, timeout)
end

--- returns a pre-configured `http_client` for the Kong admin SSL port.
Expand Down

0 comments on commit 0da5722

Please sign in to comment.