kong/spec/02-integration/05-proxy/10-balancer/01-healthchecks_spec.lua (1,981 lines of code) (raw):
local bu = require "spec.fixtures.balancer_utils"
local cjson = require "cjson"
local helpers = require "spec.helpers"
local utils = require "kong.tools.utils"
local https_server = helpers.https_server
for _, strategy in helpers.each_strategy() do
local bp
local DB_UPDATE_PROPAGATION = strategy == "cassandra" and 0.1 or 0
local DB_UPDATE_FREQUENCY = strategy == "cassandra" and 0.1 or 0.1
local proxy_port_1 = 9000
local proxy_port_ssl = 9443
local proxy_port_grpc = 9002
local admin_port_1 = 9001
local default_admin_listen = "127.0.0.1:".. admin_port_1 .. ",[::1]:" .. admin_port_1
local default_proxy_listen = "127.0.0.1:".. proxy_port_1 .. ",[::1]:" .. proxy_port_1 .. ", " ..
"127.0.0.1:".. proxy_port_ssl .. " http2 ssl,[::1]:" .. proxy_port_ssl .. " http2 ssl, " ..
"127.0.0.1:".. proxy_port_grpc .. " http2,[::1]:" .. proxy_port_grpc .. " http2"
describe("Healthcheck #" .. strategy, function()
lazy_setup(function()
bp = bu.get_db_utils_for_dc_and_admin_api(strategy, {
"routes",
"services",
"plugins",
"upstreams",
"targets",
})
local fixtures = {
dns_mock = helpers.dns_mock.new()
}
fixtures.dns_mock:SRV {
name = "my.srv.test.com",
target = "a.my.srv.test.com",
port = 80, -- port should fail to connect
}
fixtures.dns_mock:A {
name = "a.my.srv.test.com",
address = "127.0.0.1",
}
fixtures.dns_mock:A {
name = "multiple-ips.test",
address = "127.0.0.1",
}
fixtures.dns_mock:A {
name = "multiple-ips.test",
address = "127.0.0.2",
}
fixtures.dns_mock:SRV {
name = "srv-changes-port.test",
target = "a-changes-port.test",
port = 90, -- port should fail to connect
}
fixtures.dns_mock:A {
name = "a-changes-port.test",
address = "127.0.0.3",
}
fixtures.dns_mock:A {
name = "another.multiple-ips.test",
address = "127.0.0.1",
}
fixtures.dns_mock:A {
name = "another.multiple-ips.test",
address = "127.0.0.2",
}
assert(helpers.start_kong({
database = strategy,
dns_resolver = "127.0.0.1",
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
db_update_frequency = DB_UPDATE_FREQUENCY,
db_update_propagation = DB_UPDATE_PROPAGATION,
}, nil, nil, fixtures))
end)
lazy_teardown(function()
helpers.stop_kong()
end)
it("2-level dns sets the proper health-check", function()
-- Issue is that 2 level dns hits a mismatch between a name
-- in the second level, and the IP address that failed.
-- Typically an SRV pointing to an A record will result in a
-- internal balancer structure Address that hold a name rather
-- than an IP. So when Kong reports IP xyz failed to connect,
-- and the healthchecker marks it as down. That IP will not be
-- found in the balancer (since its only known by name), and hence
-- and error is returned that the target could not be disabled.
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
-- the following port will not be used, will be overwritten by
-- the mocked SRV record.
bu.add_target(bp, upstream_id, "my.srv.test.com", 80)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
-- we do not set up servers, since we want the connection to get refused
-- Go hit the api with requests, 1x round the balancer
local oks, fails, last_status = bu.client_requests(bu.SLOTS, api_host)
assert.same(0, oks)
assert.same(bu.SLOTS, fails)
assert.same(503, last_status)
end, 15)
helpers.pwait_until(function ()
local health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.equals("UNHEALTHY", health.data[1].health)
end, 15)
end)
it("a target that resolves to 2 IPs reports health separately", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
-- the following port will not be used, will be overwritten by
-- the mocked SRV record.
bu.add_target(bp, upstream_id, "multiple-ips.test", 80)
local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, })
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
-- we do not set up servers, since we want the connection to get refused
-- Go hit the api with requests
local oks, fails, last_status = bu.client_requests(bu.SLOTS, api_host)
assert.same(0, oks)
assert.same(bu.SLOTS, fails)
assert.same(503, last_status)
end, 15)
local health
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
local status = bu.put_target_address_health(upstream_id, "multiple-ips.test:80", "127.0.0.2:80", "healthy")
assert.same(204, status)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("HEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("HEALTHY", health.data[1].data.addresses[2].health)
end, 15)
local status = bu.put_target_address_health(upstream_id, "multiple-ips.test:80", "127.0.0.2:80", "unhealthy")
assert.same(204, status)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
end)
it("a target that resolves to 2 IPs reports health separately (upstream with hostname set)", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
host_header = "another.multiple-ips.test",
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
-- the following port will not be used, will be overwritten by
-- the mocked SRV record.
bu.add_target(bp, upstream_id, "multiple-ips.test", 80)
local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, })
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
-- we do not set up servers, since we want the connection to get refused
-- Go hit the api with requests, 1x round the balancer
local oks, fails, last_status = bu.client_requests(bu.SLOTS, api_host)
assert.same(0, oks)
assert.same(bu.SLOTS, fails)
assert.same(503, last_status)
end, 15)
local health
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
local status = bu.put_target_address_health(upstream_id, "multiple-ips.test:80", "127.0.0.2:80", "healthy")
assert.same(204, status)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("HEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("HEALTHY", health.data[1].data.addresses[2].health)
end, 15)
local status = bu.put_target_address_health(upstream_id, "multiple-ips.test:80", "127.0.0.2:80", "unhealthy")
assert.same(204, status)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
end)
it("a target that resolves to an SRV record that changes port", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
-- the following port will not be used, will be overwritten by
-- the mocked SRV record.
bu.add_target(bp, upstream_id, "srv-changes-port.test", 80)
local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, })
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
-- we do not set up servers, since we want the connection to get refused
-- Go hit the api with requests, 1x round the balancer
local oks, fails, last_status = bu.client_requests(bu.SLOTS, api_host)
assert.same(0, oks)
assert.same(bu.SLOTS, fails)
assert.same(503, last_status)
end, 15)
local health
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("a-changes-port.test", health.data[1].data.addresses[1].ip)
assert.same(90, health.data[1].data.addresses[1].port)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
local status = bu.put_target_address_health(upstream_id, "srv-changes-port.test:80", "a-changes-port.test:90", "healthy")
assert.same(204, status)
end, 15)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("a-changes-port.test", health.data[1].data.addresses[1].ip)
assert.same(90, health.data[1].data.addresses[1].port)
assert.equals("HEALTHY", health.data[1].health)
assert.equals("HEALTHY", health.data[1].data.addresses[1].health)
end, 15)
end)
it("a target that has healthchecks disabled", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = 0,
tcp_failures = 0,
timeouts = 0,
},
},
active = {
healthy = {
interval = 0,
},
unhealthy = {
interval = 0,
},
},
}
})
bu.add_target(bp, upstream_id, "multiple-ips.test", 80)
bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
local health = bu.get_upstream_health(upstream_name)
assert.is_truthy(health.data[1].health)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.equals("HEALTHCHECKS_OFF", health.data[1].health)
assert.equals("HEALTHCHECKS_OFF", health.data[1].data.addresses[1].health)
end, 15)
end)
it("an upstream that is removed and readed keeps the health status", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
-- the following port will not be used, will be overwritten by
-- the mocked SRV record.
bu.add_target(bp, upstream_id, "multiple-ips.test", 80)
local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, })
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
-- we do not set up servers, since we want the connection to get refused
-- Go hit the api with requests
local oks, fails, last_status = bu.client_requests(bu.SLOTS, api_host)
assert.same(0, oks)
assert.same(bu.SLOTS, fails)
assert.same(503, last_status)
end, 15)
local health
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
local status = bu.put_target_address_health(upstream_id, "multiple-ips.test:80", "127.0.0.2:80", "healthy")
assert.same(204, status)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("HEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("HEALTHY", health.data[1].data.addresses[2].health)
end, 15)
local status = bu.put_target_address_health(upstream_id, "multiple-ips.test:80", "127.0.0.2:80", "unhealthy")
assert.same(204, status)
helpers.pwait_until(function ()
health = bu.get_upstream_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
-- remove the upstream
if strategy ~= "off" then
bu.remove_upstream(bp, upstream_id)
end
-- add the upstream again
bu.begin_testcase_setup_update(strategy, bp)
local new_upstream_name, new_upstream_id = bu.add_upstream(bp, {
name = upstream_name,
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
-- upstreams are different
assert.are_not.equals(upstream_id, new_upstream_id)
-- but new upstream name is the same as before
assert.are.equals(upstream_name, new_upstream_name)
-- also the target is the same
bu.add_target(bp, new_upstream_id, "multiple-ips.test", 80)
bu.add_api(bp, new_upstream_name, { connect_timeout = 100, })
bu.end_testcase_setup(strategy, bp)
-- so health must be same as before
local health
helpers.pwait_until(function ()
health = bu.get_upstream_health(new_upstream_name)
assert.is_truthy(health.data[1].data)
assert.is.table(health)
assert.is.table(health.data)
assert.is.table(health.data[1])
assert.same("127.0.0.1", health.data[1].data.addresses[1].ip)
assert.same("127.0.0.2", health.data[1].data.addresses[2].ip)
assert.equals("UNHEALTHY", health.data[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[1].health)
assert.equals("UNHEALTHY", health.data[1].data.addresses[2].health)
end, 15)
end)
end)
describe("mTLS #" .. strategy, function()
local get_name
do
local n = 0
get_name = function()
n = n + 1
return string.format("name%04d.test", n)
end
end
lazy_setup(function()
bp = bu.get_db_utils_for_dc_and_admin_api(strategy, {
"services",
"routes",
"upstreams",
"targets",
})
local fixtures = {
dns_mock = helpers.dns_mock.new()
}
fixtures.dns_mock:A {
name = "notlocalhost.test",
address = "127.0.0.1",
}
assert(helpers.start_kong({
database = strategy,
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
client_ssl = true,
client_ssl_cert = "spec/fixtures/kong_spec.crt",
client_ssl_cert_key = "spec/fixtures/kong_spec.key",
db_update_frequency = 0.1,
stream_listen = "off",
plugins = "bundled,fail-once-auth",
}, nil, nil, fixtures))
end)
lazy_teardown(function()
helpers.stop_kong()
end)
it("create active health checks -- global certificate", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
type = "https",
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = 1,
},
}
}
})
bu.add_target(bp, upstream_id, "notlocalhost.test", 15555)
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
local health = bu.get_balancer_health(upstream_name)
assert.is_truthy(health.data)
assert.is.table(health)
assert.is.table(health.data)
end, 15)
bu.poll_wait_health(upstream_id, "notlocalhost.test", "15555", "UNHEALTHY")
end)
it("#db create active health checks -- upstream certificate", function()
local ssl_fixtures = require "spec.fixtures.ssl"
local client = assert(helpers.admin_client())
local res = client:post("/certificates", {
body = {
cert = ssl_fixtures.cert,
key = ssl_fixtures.key,
snis = { get_name(), get_name() },
},
headers = { ["Content-Type"] = "application/json" },
})
local body = assert.res_status(201, res)
local certificate = cjson.decode(body)
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
type = "https",
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = 1,
},
}
},
client_certificate = certificate,
})
bu.add_target(bp, upstream_id, "notlocalhost.test", 15555)
bu.end_testcase_setup(strategy, bp)
helpers.pwait_until(function ()
local health = bu.get_balancer_health(upstream_name)
assert.is_truthy(health.data)
assert.is.table(health)
assert.is.table(health.data)
end, 15)
bu.poll_wait_health(upstream_id, "notlocalhost.test", "15555", "UNHEALTHY")
end)
end)
describe("Ring-balancer #" .. strategy, function()
lazy_setup(function()
bp = bu.get_db_utils_for_dc_and_admin_api(strategy, {
"services",
"routes",
"upstreams",
"targets",
})
assert(helpers.start_kong({
database = strategy,
dns_resolver = "127.0.0.1",
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
lua_ssl_trusted_certificate = "spec/fixtures/kong_spec.crt",
stream_listen = "off",
db_update_frequency = DB_UPDATE_FREQUENCY,
db_update_propagation = DB_UPDATE_PROPAGATION,
plugins = "bundled,fail-once-auth",
}))
end)
lazy_teardown(function()
helpers.stop_kong()
end)
describe("#healthchecks (#cluster #db)", function()
-- second node ports are Kong test ports + 10
local proxy_port_2 = 9010
local admin_port_2 = 9011
lazy_setup(function()
-- start a second Kong instance
helpers.start_kong({
database = strategy,
dns_resolver = "127.0.0.1",
admin_listen = "127.0.0.1:".. admin_port_2 .. ",[::1]:" .. admin_port_2,
proxy_listen = "127.0.0.1:".. proxy_port_2 .. ",[::1]:" .. proxy_port_2,
stream_listen = "off",
prefix = "servroot2",
log_level = "debug",
db_update_frequency = DB_UPDATE_FREQUENCY,
db_update_propagation = DB_UPDATE_PROPAGATION,
})
end)
lazy_teardown(function()
helpers.stop_kong("servroot2")
end)
for mode, localhost in pairs(bu.localhosts) do
describe("#" .. mode, function()
-- FIXME for some reason this test fails only on CI
it("#flaky does not perform health checks when disabled (#3304)", function()
bu.begin_testcase_setup(strategy, bp)
local old_rv = bu.get_router_version(admin_port_2)
local upstream_name, upstream_id = bu.add_upstream(bp)
local port = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name)
bu.wait_for_router_update(bp, old_rv, localhost, proxy_port_1, admin_port_1)
old_rv = bu.get_router_version(admin_port_1)
bu.wait_for_router_update(bp, old_rv, localhost, proxy_port_2, admin_port_2)
bu.end_testcase_setup(strategy, bp)
local server = https_server.new(port, upstream_name)
server:start()
-- server responds, then fails, then responds again
local seq = {
{ healthy = true, port = proxy_port_2, oks = 10, fails = 0, last_status = 200 },
{ healthy = true, port = proxy_port_1, oks = 10, fails = 0, last_status = 200 },
{ healthy = false, port = proxy_port_2, oks = 0, fails = 10, last_status = 500 },
{ healthy = false, port = proxy_port_1, oks = 0, fails = 10, last_status = 500 },
{ healthy = true, port = proxy_port_2, oks = 10, fails = 0, last_status = 200 },
{ healthy = true, port = proxy_port_1, oks = 10, fails = 0, last_status = 200 },
}
for i, test in ipairs(seq) do
if test.healthy then
bu.direct_request(localhost, port, "/healthy")
else
bu.direct_request(localhost, port, "/unhealthy")
end
if mode == "ipv6" then
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port, "HEALTHCHECKS_OFF")
else
bu.poll_wait_health(upstream_id, localhost, port, "HEALTHCHECKS_OFF")
end
local oks, fails, last_status = bu.client_requests(10, api_host, localhost, test.port)
assert.same(test.oks, oks, localhost .. " iteration " .. tostring(i))
assert.same(test.fails, fails, localhost .. " iteration " .. tostring(i))
assert.same(test.last_status, last_status, localhost .. " iteration " .. tostring(i))
end
-- collect server results
local count = server:shutdown()
assert.same(40, count.ok)
assert.same(20, count.fail)
end)
it("#flaky propagates posted health info", function()
bu.begin_testcase_setup(strategy, bp)
local old_rv = bu.get_router_version(admin_port_2)
local _, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config({})
})
local port = bu.add_target(bp, upstream_id, localhost)
bu.wait_for_router_update(bp, old_rv, localhost, proxy_port_2, admin_port_2)
bu.end_testcase_setup(strategy, bp)
local health1 = bu.get_upstream_health(upstream_id, admin_port_1)
local health2 = bu.get_upstream_health(upstream_id, admin_port_2)
assert.same("HEALTHY", health1.data[1].health)
assert.same("HEALTHY", health2.data[1].health)
if mode == "ipv6" then
-- TODO /upstreams does not understand shortened IPv6 addresses
bu.post_target_endpoint(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port, "unhealthy")
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port, "UNHEALTHY", admin_port_1)
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port, "UNHEALTHY", admin_port_2)
else
bu.post_target_endpoint(upstream_id, localhost, port, "unhealthy")
bu.poll_wait_health(upstream_id, localhost, port, "UNHEALTHY", admin_port_1)
bu.poll_wait_health(upstream_id, localhost, port, "UNHEALTHY", admin_port_2)
end
end)
end)
describe("#" .. mode, function()
for _, consistency in ipairs(bu.consistencies) do
describe("Upstream entities #" .. consistency, function()
-- Regression test for a missing invalidation in 0.12rc1
it("created via the API are functional", function()
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp)
local target_port = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp, consistency)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
local server = https_server.new(target_port, localhost)
server:start()
local oks, fails, last_status = bu.client_requests(1, api_host)
assert.same(200, last_status)
assert.same(1, oks)
assert.same(0, fails)
local count = server:shutdown()
assert.same(1, count.ok)
assert.same(0, count.fail)
end)
it("created via the API are functional #grpc", function()
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp)
bu.add_target(bp, upstream_id, localhost, 15002)
local api_host = bu.add_api(bp, upstream_name, {
service_protocol = "grpc",
route_protocol = "grpc",
})
bu.end_testcase_setup(strategy, bp, consistency)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
local grpc_client = helpers.proxy_client_grpc()
local ok, resp = grpc_client({
service = "hello.HelloService.SayHello",
opts = {
["-authority"] = api_host,
}
})
assert.Truthy(ok)
assert.Truthy(resp)
end)
it("properly set the host header", function()
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, { host_header = "localhost" })
local target_port = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp, consistency)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
local server = https_server.new(target_port, "localhost", "http", true)
server:start()
local oks, fails, last_status = bu.client_requests(5, api_host)
assert.same(200, last_status)
assert.same(5, oks)
assert.same(0, fails)
local count = server:shutdown()
assert.same(5, count.ok)
assert.same(0, count.fail)
end)
it("fail with wrong host header", function()
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, { host_header = "localhost" })
local target_port = bu.add_target(bp, upstream_id, "localhost")
local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 100, })
bu.end_testcase_setup(strategy, bp, consistency)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
local server = https_server.new(target_port, "127.0.0.1", "http", true)
server:start()
local oks, fails, last_status = bu.client_requests(5, api_host)
assert.same(400, last_status)
assert.same(0, oks)
assert.same(5, fails)
-- oks and fails must be 0 as localhost should not receive any request
local count = server:shutdown()
assert.same(0, count.ok)
assert.same(0, count.fail)
end)
-- #db == disabled for database=off, because it tests
-- for a PATCH operation
it("#db can have their config partially updated", function()
bu.begin_testcase_setup(strategy, bp)
local _, upstream_id = bu.add_upstream(bp)
bu.end_testcase_setup(strategy, bp, consistency)
bu.begin_testcase_setup_update(strategy, bp)
bu.patch_upstream(upstream_id, {
healthchecks = {
active = {
http_path = "/status",
healthy = {
interval = 0,
successes = 1,
},
unhealthy = {
interval = 0,
http_failures = 1,
},
}
}
})
bu.end_testcase_setup(strategy, bp, consistency)
local updated = {
active = {
type = "http",
concurrency = 10,
healthy = {
http_statuses = { 200, 302 },
interval = 0,
successes = 1
},
http_path = "/status",
https_sni = cjson.null,
https_verify_certificate = true,
headers = cjson.null,
timeout = 1,
unhealthy = {
http_failures = 1,
http_statuses = { 429, 404, 500, 501, 502, 503, 504, 505 },
interval = 0,
tcp_failures = 0,
timeouts = 0
}
},
passive = {
type = "http",
healthy = {
http_statuses = { 200, 201, 202, 203, 204, 205, 206, 207, 208, 226,
300, 301, 302, 303, 304, 305, 306, 307, 308 },
successes = 0
},
unhealthy = {
http_failures = 0,
http_statuses = { 429, 500, 503 },
tcp_failures = 0,
timeouts = 0
}
},
threshold = 0
}
local upstream_data = bu.get_upstream(upstream_id)
assert.same(updated, upstream_data.healthchecks)
end)
-- #db == disabled for database=off, because it tests
-- for a PATCH operation.
-- TODO produce an equivalent test when upstreams are preserved
-- (not rebuilt) across declarative config updates.
it("#db can be renamed without producing stale cache", function()
-- create two upstreams, each with a target pointing to a server
bu.begin_testcase_setup(strategy, bp)
local upstreams = {}
for i = 1, 2 do
upstreams[i] = {}
upstreams[i].name = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {}
})
upstreams[i].port = bu.add_target(bp, upstreams[i].name, localhost)
upstreams[i].api_host = bu.add_api(bp, upstreams[i].name)
end
bu.end_testcase_setup(strategy, bp, consistency)
-- start two servers
local server1 = https_server.new(upstreams[1].port, localhost)
local server2 = https_server.new(upstreams[2].port, localhost)
server1:start()
server2:start()
-- rename upstream 2
local new_name = upstreams[2].name .. "_new"
bu.patch_upstream(upstreams[2].name, {
name = new_name,
})
-- rename upstream 1 to upstream 2's original name
bu.patch_upstream(upstreams[1].name, {
name = upstreams[2].name,
})
helpers.wait_for_all_config_update()
-- hit a request through upstream 1 using the new name
local oks, fails, last_status = bu.client_requests(1, upstreams[2].api_host)
assert.same(200, last_status)
assert.same(1, oks)
assert.same(0, fails)
-- rename upstream 2
bu.patch_upstream(new_name, {
name = upstreams[1].name,
})
helpers.wait_for_all_config_update()
-- a single request to upstream 2 just to make server 2 shutdown
bu.client_requests(1, upstreams[1].api_host)
-- collect results
local count1 = server1:shutdown()
local count2 = server2:shutdown()
assert.same({1, 0}, { count1.ok, count1.fail })
assert.same({1, 0}, { count2.ok, count2.fail })
end)
-- #db == disabled for database=off, because it tests
-- for a PATCH operation.
-- TODO produce an equivalent test when upstreams are preserved
-- (not rebuilt) across declarative config updates.
-- FIXME when using eventual consistency sometimes it takes a long
-- time to stop the original health checker, it may be a bug or not.
it("#db do not leave a stale healthchecker when renamed", function()
if consistency ~= "eventual" then
bu.begin_testcase_setup(strategy, bp)
-- create an upstream
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = 1,
},
}
}
})
local port = bu.add_target(bp, upstream_id, localhost)
local _, service_id = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp, consistency)
-- rename upstream
local new_name = upstream_id .. "_new"
bu.patch_upstream(upstream_id, {
name = new_name
})
-- reconfigure healthchecks
bu.patch_upstream(new_name, {
healthchecks = {
active = {
http_path = "/status",
healthy = {
interval = 0,
successes = 1,
},
unhealthy = {
interval = 0,
http_failures = 1,
},
}
}
})
helpers.wait_for_all_config_update()
-- start server
local server1 = https_server.new(port, localhost)
server1:start()
-- give time for healthchecker to (not!) run
ngx.sleep(bu.HEALTHCHECK_INTERVAL * 3)
bu.begin_testcase_setup_update(strategy, bp)
bu.patch_api(bp, service_id, "http://" .. new_name)
bu.end_testcase_setup(strategy, bp, consistency)
-- collect results
local count = server1:shutdown()
assert.same({0, 0}, { count.ok, count.fail })
assert.truthy(count.status_total < 2)
end
end)
end)
end
describe("#healthchecks", function()
local stream_it = (mode == "ipv6" or strategy == "off") and pending or it
it("do not count Kong-generated errors as failures", function()
bu.begin_testcase_setup(strategy, bp)
-- configure healthchecks with a 1-error threshold
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
healthy = {
successes = 1,
},
unhealthy = {
http_statuses = { 401, 500 },
http_failures = 1,
tcp_failures = 1,
timeouts = 1,
},
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local api_host, service_id = bu.add_api(bp, upstream_name, { connect_timeout = 50, })
-- add a plugin
local plugin_id = utils.uuid()
bp.plugins:insert({
id = plugin_id,
service = { id = service_id },
name = "fail-once-auth",
})
bu.end_testcase_setup(strategy, bp)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
-- start servers, they wont be affected by the 401 error
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
-- run request: fails with 401, but doesn't hit the 1-error threshold
local oks, fails, last_status = bu.client_requests(1, api_host)
assert.same(0, oks)
assert.same(1, fails)
assert.same(401, last_status)
oks, fails, last_status = bu.client_requests(bu.SLOTS * 2, api_host)
assert.same(200, last_status)
assert.truthy(oks > 0)
assert.same(0, fails)
-- collect server results
local count1 = server1:shutdown()
local count2 = server2:shutdown()
-- both servers were fully operational
assert.truthy(count1.ok > 0)
assert.truthy(count2.ok > 0)
assert.same(0, count1.fail)
assert.same(0, count2.fail)
end)
-- FIXME it seems this tests are actually failing
it("#flaky perform passive health checks", function()
for nfails = 1, 3 do
bu.begin_testcase_setup(strategy, bp)
-- configure healthchecks
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = nfails,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
local requests = bu.SLOTS * 2 -- go round the balancer twice
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server2_oks = math.floor(requests / 4)
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
-- Go hit them with our test requests
local client_oks1, client_fails1 = bu.client_requests(bu.SLOTS, api_host)
bu.direct_request(localhost, port2, "/unhealthy")
local client_oks2, client_fails2 = bu.client_requests(bu.SLOTS, api_host)
local client_oks = client_oks1 + client_oks2
local client_fails = client_fails1 + client_fails2
-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()
-- verify
assert.are.equal(requests - server2_oks - nfails, count1.ok)
assert.are.equal(server2_oks, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(nfails, count2.fail)
assert.are.equal(requests - nfails, client_oks)
assert.are.equal(nfails, client_fails)
end
end)
it("threshold for health checks", function()
local fixtures = {
dns_mock = helpers.dns_mock.new()
}
fixtures.dns_mock:A {
name = "health-threshold.test",
address = "127.0.0.1",
}
fixtures.dns_mock:A {
name = "health-threshold.test",
address = "127.0.0.2",
}
fixtures.dns_mock:A {
name = "health-threshold.test",
address = "127.0.0.3",
}
fixtures.dns_mock:A {
name = "health-threshold.test",
address = "127.0.0.4",
}
-- restart Kong
bu.begin_testcase_setup_update(strategy, bp)
helpers.restart_kong({
database = strategy,
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
lua_ssl_trusted_certificate = "spec/fixtures/kong_spec.crt",
db_update_frequency = 0.1,
stream_listen = "off",
plugins = "bundled,fail-once-auth",
}, nil, fixtures)
bu.end_testcase_setup(strategy, bp)
local health_threshold = { 0, 25, 75, 99, 100 }
for i = 1, 5 do
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
},
threshold = health_threshold[i],
}
})
bu.add_target(bp, upstream_id, "health-threshold.test", 80, { weight = 25 })
bu.end_testcase_setup(strategy, bp)
-- 100% healthy
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.1:80", "healthy")
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.2:80", "healthy")
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.3:80", "healthy")
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.4:80", "healthy")
local health
helpers.pwait_until(function ()
health = bu.get_balancer_health(upstream_name)
assert(health.data)
assert.equal(100, health.data.details.weight.available)
assert.is.table(health)
assert.is.table(health.data)
assert.same({
available = 100,
unavailable = 0,
total = 100,
}, health.data.details.weight)
end, 15)
if health_threshold[i] < 100 then
assert.equals("HEALTHY", health.data.health)
else
assert.equals("UNHEALTHY", health.data.health)
end
-- 75% healthy
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.1:80", "unhealthy")
helpers.pwait_until(function ()
health = bu.get_balancer_health(upstream_name)
assert.same({
available = 75,
unavailable = 25,
total = 100,
}, health.data.details.weight)
end, 15)
if health_threshold[i] < 75 then
assert.equals("HEALTHY", health.data.health)
else
assert.equals("UNHEALTHY", health.data.health)
end
-- 50% healthy
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.2:80", "unhealthy")
helpers.pwait_until(function ()
health = bu.get_balancer_health(upstream_name)
assert.same({
available = 50,
unavailable = 50,
total = 100,
}, health.data.details.weight)
end, 15)
if health_threshold[i] < 50 then
assert.equals("HEALTHY", health.data.health)
else
assert.equals("UNHEALTHY", health.data.health)
end
-- 25% healthy
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.3:80", "unhealthy")
helpers.pwait_until(function ()
health = bu.get_balancer_health(upstream_name)
assert.same({
available = 25,
unavailable = 75,
total = 100,
}, health.data.details.weight)
end, 15)
if health_threshold[i] < 25 then
assert.equals("HEALTHY", health.data.health)
else
assert.equals("UNHEALTHY", health.data.health)
end
-- 0% healthy
bu.put_target_address_health(upstream_id, "health-threshold.test:80", "127.0.0.4:80", "unhealthy")
helpers.pwait_until(function ()
health = bu.get_balancer_health(upstream_name)
assert.same({
available = 0,
unavailable = 100,
total = 100,
}, health.data.details.weight)
end, 15)
assert.equals("UNHEALTHY", health.data.health)
end
end)
stream_it("#stream and http modules do not duplicate active health checks", function()
local port1 = bu.gen_port()
local server1 = https_server.new(port1, localhost)
server1:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local _, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
http_path = "/status",
healthy = {
-- using this interval to get the same results when using
-- worker_consistency "strict" or "eventual"
interval = bu.CONSISTENCY_FREQ,
successes = 1,
},
unhealthy = {
interval = bu.CONSISTENCY_FREQ,
http_failures = 1,
},
}
}
})
bu.add_target(bp, upstream_id, localhost, port1)
bu.end_testcase_setup(strategy, bp)
-- collect server results; hitcount
local count1 = server1:shutdown()
assert(count1.status_total < 3)
end)
it("#flaky perform active health checks -- up then down", function()
for nfails = 1, 3 do
local requests = bu.SLOTS * 2 -- go round the balancer twice
local port1 = bu.gen_port()
local port2 = bu.gen_port()
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server2_oks = math.floor(requests / 4)
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = nfails,
},
}
}
})
bu.add_target(bp, upstream_id, localhost, port1)
bu.add_target(bp, upstream_id, localhost, port2)
local api_host = bu.add_api(bp, upstream_name, { connect_timeout = 50, })
bu.end_testcase_setup(strategy, bp)
-- Phase 1: server1 and server2 take requests
local client_oks, client_fails = bu.client_requests(server2_oks * 2, api_host)
-- Phase 2: server2 goes unhealthy
bu.direct_request(localhost, port2, "/unhealthy")
-- Give time for healthchecker to detect
if mode == "ipv6" then
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "UNHEALTHY")
else
bu.poll_wait_health(upstream_id, localhost, port2, "UNHEALTHY")
end
-- Phase 3: server1 takes all requests
do
local p3oks, p3fails = bu.client_requests(requests - (server2_oks * 2), api_host)
client_oks = client_oks + p3oks
client_fails = client_fails + p3fails
end
-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()
-- verify
assert.are.equal(requests - server2_oks, count1.ok)
assert.are.equal(server2_oks, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(0, count2.fail)
assert.are.equal(requests, client_oks)
assert.are.equal(0, client_fails)
end
end)
it("perform active health checks with upstream hostname #flaky", function()
for nfails = 1, 3 do
local requests = bu.SLOTS * 2 -- go round the balancer twice
local port1 = bu.gen_port()
local port2 = bu.gen_port()
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server2_oks = math.floor(requests / 4)
local server1 = https_server.new(port1, "localhost", "http", true)
local server2 = https_server.new(port2, "localhost", "http", true)
server1:start()
server2:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
host_header = "localhost",
healthchecks = bu.healthchecks_config {
active = {
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = nfails,
},
}
}
})
bu.add_target(bp, upstream_id, localhost, port1)
bu.add_target(bp, upstream_id, localhost, port2)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
helpers.wait_for_all_config_update()
-- Phase 1: server1 and server2 take requests
local client_oks, client_fails = bu.client_requests(server2_oks * 2, api_host)
-- Phase 2: server2 goes unhealthy
bu.direct_request("localhost", port2, "/unhealthy")
-- Give time for healthchecker to detect
bu.poll_wait_health(upstream_id, localhost, port2, "UNHEALTHY")
-- Phase 3: server1 takes all requests
do
local p3oks, p3fails = bu.client_requests(requests - (server2_oks * 2), api_host)
client_oks = client_oks + p3oks
client_fails = client_fails + p3fails
end
-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()
-- verify
assert.are.equal(requests - server2_oks, count1.ok)
assert.are.equal(server2_oks, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(0, count2.fail)
assert.are.equal(requests, client_oks)
assert.are.equal(0, client_fails)
end
end)
for _, protocol in ipairs({"http", "https"}) do
-- TODO this test is marked as flaky because add_upstream fails
-- sometimes with "connection reset by peer" error, seems
-- completely unrelated to the functionality being tested.
it("perform active health checks -- automatic recovery #flaky #" .. protocol, function()
for _, nchecks in ipairs({1,3}) do
local port1 = bu.gen_port()
local port2 = bu.gen_port()
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server1 = https_server.new(port1, localhost, protocol, false)
local server2 = https_server.new(port2, localhost, protocol, false)
server1:start()
server2:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
type = protocol,
http_path = "/status",
https_verify_certificate = false,
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = nchecks,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = nchecks,
},
}
}
})
bu.add_target(bp, upstream_id, localhost, port1)
bu.add_target(bp, upstream_id, localhost, port2)
local api_host = bu.add_api(bp, upstream_name, {
service_protocol = protocol
})
bu.end_testcase_setup(strategy, bp)
-- ensure it's healthy at the beginning of the test
bu.direct_request(localhost, port1, "/healthy", protocol)
bu.direct_request(localhost, port2, "/healthy", protocol)
if mode == "ipv6" then
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port1, "HEALTHY")
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "HEALTHY")
else
bu.poll_wait_health(upstream_id, localhost, port1, "HEALTHY")
bu.poll_wait_health(upstream_id, localhost, port2, "HEALTHY")
end
-- 1) server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
-- server2 goes unhealthy
bu.direct_request(localhost, port2, "/unhealthy", protocol)
-- Wait until healthchecker detects
if mode == "ipv6" then
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "UNHEALTHY")
else
bu.poll_wait_health(upstream_id, localhost, port2, "UNHEALTHY")
end
-- 2) server1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- server2 goes healthy again
bu.direct_request(localhost, port2, "/healthy", protocol)
-- Give time for healthchecker to detect
if mode == "ipv6" then
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "HEALTHY")
else
bu.poll_wait_health(upstream_id, localhost, port2, "HEALTHY")
end
-- 3) server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()
-- verify
assert.are.equal(bu.SLOTS * 2, count1.ok)
assert.are.equal(bu.SLOTS, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(0, count2.fail)
assert.are.equal(bu.SLOTS * 3, oks)
assert.are.equal(0, fails)
end
end)
-- FIXME this test is flaky in CI only
it("#flaky perform active health checks on a target that resolves to multiple addresses -- automatic recovery #" .. protocol, function()
local hosts = {}
local fixtures = {
dns_mock = helpers.dns_mock.new()
}
for i = 1, 3 do
hosts[i] = {
hostname = bu.gen_multi_host(),
port1 = bu.gen_port(),
port2 = bu.gen_port(),
}
fixtures.dns_mock:SRV {
name = hosts[i].hostname,
target = localhost,
port = hosts[i].port1,
}
fixtures.dns_mock:SRV {
name = hosts[i].hostname,
target = localhost,
port = hosts[i].port2,
}
end
-- restart Kong
bu.begin_testcase_setup_update(strategy, bp)
helpers.restart_kong({
database = strategy,
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
lua_ssl_trusted_certificate = "spec/fixtures/kong_spec.crt",
db_update_frequency = 0.1,
stream_listen = "off",
plugins = "bundled,fail-once-auth",
}, nil, fixtures)
bu.end_testcase_setup(strategy, bp)
for _, nchecks in ipairs({1,3}) do
local port1 = hosts[nchecks].port1
local port2 = hosts[nchecks].port2
local hostname = hosts[nchecks].hostname
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server1 = https_server.new(port1, hostname, protocol)
local server2 = https_server.new(port2, hostname, protocol)
server1:start()
server2:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
type = protocol,
http_path = "/status",
https_verify_certificate = (protocol == "https" and hostname == "localhost"),
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = nchecks,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = nchecks,
},
}
}
})
bu.add_target(bp, upstream_id, hostname, port1) -- port gets overridden at DNS resolution
local api_host = bu.add_api(bp, upstream_name, {
service_protocol = protocol
})
bu.end_testcase_setup(strategy, bp)
-- 1) server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
-- server2 goes unhealthy
bu.direct_request(localhost, port2, "/unhealthy", protocol, hostname)
-- Wait until healthchecker detects
bu.poll_wait_address_health(upstream_id, hostname, port1, localhost, port2, "UNHEALTHY")
-- 2) server1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- server2 goes healthy again
bu.direct_request(localhost, port2, "/healthy", protocol, hostname)
-- Give time for healthchecker to detect
bu.poll_wait_address_health(upstream_id, hostname, port1, localhost, port2, "HEALTHY")
-- 3) server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- collect server results; hitcount
local count1 = server1:shutdown()
local count2 = server2:shutdown()
-- verify
assert.are.equal(bu.SLOTS * 2, count1.ok)
assert.are.equal(bu.SLOTS, count2.ok)
assert.are.equal(0, count1.fail)
assert.are.equal(0, count2.fail)
assert.are.equal(bu.SLOTS * 3, oks)
assert.are.equal(0, fails)
end
end)
-- FIXME this test is flaky in CI only
it("#flaky perform active health checks on targets that resolve to the same IP -- automatic recovery #" .. protocol, function()
local fixtures = {
dns_mock = helpers.dns_mock.new()
}
fixtures.dns_mock:A {
name = "target1.test",
address = "127.0.0.1",
}
fixtures.dns_mock:A {
name = "target2.test",
address = "127.0.0.1",
}
-- restart Kong
bu.begin_testcase_setup_update(strategy, bp)
helpers.restart_kong({
database = strategy,
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
lua_ssl_trusted_certificate = "spec/fixtures/kong_spec.crt",
db_update_frequency = 0.1,
stream_listen = "off",
plugins = "bundled,fail-once-auth",
}, nil, fixtures)
bu.end_testcase_setup(strategy, bp)
for _, nchecks in ipairs({1,3}) do
local port1 = bu.gen_port()
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server1 = https_server.new(port1, {"target1.test", "target2.test"}, protocol)
server1:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
type = protocol,
http_path = "/status",
https_verify_certificate = false,
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = nchecks,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = nchecks,
},
}
}
})
bu.add_target(bp, upstream_id, "target1.test", port1)
bu.add_target(bp, upstream_id, "target2.test", port1)
local api_host = bu.add_api(bp, upstream_name, {
service_protocol = protocol
})
bu.end_testcase_setup(strategy, bp)
-- 1) target1 and target2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
-- target2 goes unhealthy
bu.direct_request(localhost, port1, "/unhealthy", protocol, "target2.test")
-- Wait until healthchecker detects
bu.poll_wait_health(upstream_id, "target2.test", port1, "UNHEALTHY")
-- 2) target1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- target2 goes healthy again
bu.direct_request(localhost, port1, "/healthy", protocol, "target2.test")
-- Give time for healthchecker to detect
bu.poll_wait_health(upstream_id, "target2.test", port1, "HEALTHY")
-- 3) server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- collect server results; hitcount
local results = server1:shutdown()
---- verify
assert.are.equal(bu.SLOTS * 2, results["target1.test"].ok)
assert.are.equal(bu.SLOTS, results["target2.test"].ok)
assert.are.equal(0, results["target1.test"].fail)
assert.are.equal(0, results["target1.test"].fail)
assert.are.equal(bu.SLOTS * 3, oks)
assert.are.equal(0, fails)
end
end)
end
it("#flaky #db perform active health checks -- automatic recovery #stream", function()
local port1 = bu.gen_port()
local port2 = bu.gen_port()
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server1 = helpers.tcp_server(port1, {
requests = 1000,
prefix = "1 ",
})
local server2 = helpers.tcp_server(port2, {
requests = 1000,
prefix = "2 ",
})
ngx.sleep(0.1)
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
type = "tcp",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
tcp_failures = 1,
},
}
}
})
bu.add_target(bp, upstream_id, localhost, port1)
bu.add_target(bp, upstream_id, localhost, port2)
local _, service_id, route_id = bu.add_api(bp, upstream_name, {
read_timeout = 500,
write_timeout = 500,
route_protocol = "tcp",
})
bu.end_testcase_setup(strategy, bp)
finally(function()
helpers.kill_tcp_server(port1)
helpers.kill_tcp_server(port2)
server1:join()
server2:join()
bp.routes:remove({ id = route_id })
bp.services:remove({ id = service_id })
end)
ngx.sleep(0.5)
-- 1) server1 and server2 take requests
local ok1, ok2 = bu.tcp_client_requests(bu.SLOTS * 2, localhost, 9100)
assert.same(bu.SLOTS, ok1)
assert.same(bu.SLOTS, ok2)
-- server2 goes unhealthy
helpers.kill_tcp_server(port2)
server2:join()
-- Wait until healthchecker detects
-- We cannot use bu.poll_wait_health because health endpoints
-- are not currently available for stream routes.
ngx.sleep(strategy == "cassandra" and 2 or 1)
-- 2) server1 takes all requests
ok1, ok2 = bu.tcp_client_requests(bu.SLOTS * 2, localhost, 9100)
assert.same(bu.SLOTS * 2, ok1)
assert.same(0, ok2)
-- server2 goes healthy again
server2 = helpers.tcp_server(port2, {
requests = 1000,
prefix = "2 ",
})
-- Give time for healthchecker to detect
-- Again, we cannot use bu.poll_wait_health because health endpoints
-- are not currently available for stream routes.
ngx.sleep(strategy == "cassandra" and 2 or 1)
-- 3) server1 and server2 take requests again
ok1, ok2 = bu.tcp_client_requests(bu.SLOTS * 2, localhost, 9100)
assert.same(bu.SLOTS, ok1)
assert.same(bu.SLOTS, ok2)
end)
-- FIXME this test may be reporting a real failure
it("#flaky perform active health checks -- can detect before any proxy traffic", function()
local nfails = 2
local requests = bu.SLOTS * 2 -- go round the balancer twice
local port1 = bu.gen_port()
local port2 = bu.gen_port()
-- setup target servers:
-- server1 will respond all requests
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
active = {
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = nfails,
tcp_failures = nfails,
},
}
}
})
bu.add_target(bp, upstream_id, localhost, port1)
bu.add_target(bp, upstream_id, localhost, port2)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
-- server2 goes unhealthy before the first request
bu.direct_request(localhost, port2, "/unhealthy")
-- restart Kong
bu.begin_testcase_setup_update(strategy, bp)
helpers.restart_kong({
database = strategy,
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
lua_ssl_trusted_certificate = "spec/fixtures/kong_spec.crt",
db_update_frequency = 0.1,
stream_listen = "off",
plugins = "bundled,fail-once-auth",
})
bu.end_testcase_setup(strategy, bp)
-- Give time for healthchecker to detect
if mode == "ipv6" then
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "UNHEALTHY")
else
bu.poll_wait_health(upstream_id, localhost, port2, "UNHEALTHY")
end
-- server1 takes all requests
local client_oks, client_fails = bu.client_requests(requests, api_host)
-- collect server results; hitcount
local results1 = server1:shutdown()
local results2 = server2:shutdown()
-- verify
assert.are.equal(requests, results1.ok)
assert.are.equal(0, results2.ok)
assert.are.equal(0, results1.fail)
assert.are.equal(0, results2.fail)
assert.are.equal(requests, client_oks)
assert.are.equal(0, client_fails)
end)
it("#flaky perform passive health checks -- manual recovery", function()
for nfails = 1, 3 do
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = nfails,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server1_oks = bu.SLOTS * 2 - nfails
local server2_oks = bu.SLOTS
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
-- 1) server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
bu.direct_request(localhost, port2, "/unhealthy")
-- 2) server1 takes all requests once server2 produces
-- `nfails` failures
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- server2 is healthy again
bu.direct_request(localhost, port2, "/healthy")
-- manually bring it back using the endpoint
if mode == "ipv6" then
-- TODO /upstreams does not understand shortened IPv6 addresses
bu.post_target_endpoint(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "healthy")
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "HEALTHY")
else
bu.post_target_endpoint(upstream_id, localhost, port2, "healthy")
bu.poll_wait_health(upstream_id, localhost, port2, "HEALTHY")
end
-- 3) server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- collect server results; hitcount
local results1 = server1:shutdown()
local results2 = server2:shutdown()
-- verify
assert.are.equal(server1_oks, results1.ok)
assert.are.equal(server2_oks, results2.ok)
assert.are.equal(0, results1.fail)
assert.are.equal(nfails, results2.fail)
assert.are.equal(bu.SLOTS * 3 - nfails, oks)
assert.are.equal(nfails, fails)
end
end)
it("perform passive health checks -- manual shutdown", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = 1,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2, target2 = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
-- setup target servers:
-- server2 will only respond for part of the test,
-- then server1 will take over.
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
-- 1) server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
-- manually bring it down using the endpoint
if mode == "ipv6" then
-- TODO /upstreams does not understand shortened IPv6 addresses
bu.put_target_address_health(upstream_id, target2.id, "[0000:0000:0000:0000:0000:0000:0000:0001]:".. port2, "unhealthy")
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "UNHEALTHY")
else
bu.put_target_address_health(upstream_id, target2.id, localhost .. ":" .. port2, "unhealthy")
bu.poll_wait_health(upstream_id, localhost, port2, "UNHEALTHY")
end
-- 2) server1 takes all requests
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- manually bring it back using the endpoint
if mode == "ipv6" then
-- TODO /upstreams does not understand shortened IPv6 addresses
bu.post_target_endpoint(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "healthy")
bu.poll_wait_health(upstream_id, "[0000:0000:0000:0000:0000:0000:0000:0001]", port2, "HEALTHY")
else
bu.put_target_address_health(upstream_id, target2.id, localhost .. ":" .. port2, "healthy")
bu.poll_wait_health(upstream_id, localhost, port2, "HEALTHY")
end
-- 3) server1 and server2 take requests again
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- collect server results; hitcount
local results1 = server1:shutdown()
local results2 = server2:shutdown()
-- verify
assert.are.equal(bu.SLOTS * 2, results1.ok)
assert.are.equal(bu.SLOTS, results2.ok)
assert.are.equal(0, results1.fail)
assert.are.equal(0, results2.fail)
assert.are.equal(bu.SLOTS * 3, oks)
assert.are.equal(0, fails)
end)
it("perform passive health checks -- connection #timeouts", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
timeouts = 1,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local api_host = bu.add_api(bp, upstream_name, {
read_timeout = 2000, -- I think even with a slow CI, 2 seconds is enough for one access.
write_timeout = 2000,
})
bu.end_testcase_setup(strategy, bp)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
-- setup target servers:
-- server2 will only respond for half of the test
-- then will timeout on the following request.
-- Then server1 will take over.
local server1_oks = bu.SLOTS * 1.5
local server2_oks = bu.SLOTS / 2
local server1 = https_server.new(port1, localhost)
local server2 = https_server.new(port2, localhost)
server1:start()
server2:start()
ngx.sleep(bu.CONSISTENCY_FREQ) -- wait for proxy state consistency timer
-- 1) server1 and server2 take requests
local oks, fails = bu.client_requests(bu.SLOTS, api_host)
bu.direct_request(localhost, port2, "/timeout")
-- 2) server1 takes all requests once server2 produces
-- `nfails` failures (even though server2 will be ready
-- to respond 200 again after `nfails`)
do
local o, f = bu.client_requests(bu.SLOTS, api_host)
oks = oks + o
fails = fails + f
end
-- collect server results; hitcount
local results1 = server1:shutdown()
local results2 = server2:shutdown()
-- verify
assert.are.equal(server1_oks, results1.ok)
assert.are.equal(server2_oks, results2.ok)
assert.are.equal(0, results1.fail)
assert.are.equal(1, results2.fail)
assert.are.equal(bu.SLOTS * 2, oks)
assert.are.equal(0, fails)
end)
stream_it("#flaky perform passive health checks -- #stream connection failure", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
tcp_failures = 1,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local port2 = bu.add_target(bp, upstream_id, localhost)
local _, service_id, route_id = bu.add_api(bp, upstream_name, {
read_timeout = 50,
write_timeout = 50,
route_protocol = "tcp",
})
bu.end_testcase_setup(strategy, bp)
finally(function()
bp.routes:remove({ id = route_id })
bp.services:remove({ id = service_id })
end)
-- setup target servers:
-- server2 will only respond for half of the test and will shutdown.
-- Then server1 will take over.
local server1_oks = bu.SLOTS * 1.5
local server2_oks = bu.SLOTS / 2
local server1 = helpers.tcp_server(port1, {
requests = server1_oks,
prefix = "1 ",
})
local server2 = helpers.tcp_server(port2, {
requests = server2_oks,
prefix = "2 ",
})
ngx.sleep(strategy == "cassandra" and 2 or 1)
-- server1 and server2 take requests
-- server1 takes all requests once server2 fails
local ok1, ok2, fails = bu.tcp_client_requests(bu.SLOTS * 2, localhost, 9100)
-- finish up TCP server threads
server1:join()
server2:join()
-- verify
assert.are.equal(server1_oks, ok1)
assert.are.equal(server2_oks, ok2)
assert.are.equal(0, fails)
end)
-- #db == disabled for database=off, because healthcheckers
-- are currently reset when a new configuration is loaded
-- TODO enable this test when upstreams are preserved (not rebuild)
-- across a declarative config updates.
-- TODO marked as flaky as it fails only in CI
it("#flaky #db perform passive health checks -- send #timeouts", function()
-- configure healthchecks
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
healthchecks = bu.healthchecks_config {
passive = {
unhealthy = {
http_failures = 0,
timeouts = 1,
tcp_failures = 0,
}
}
}
})
local port1 = bu.add_target(bp, upstream_id, localhost)
local api_host, service_id = bu.add_api(bp, upstream_name, {
read_timeout = 10,
retries = 0,
})
bu.end_testcase_setup(strategy, bp)
local server1 = https_server.new(port1, localhost)
server1:start()
bu.direct_request(localhost, port1, "/timeout")
local _, _, last_status = bu.client_requests(1, api_host)
local results1 = server1:shutdown()
assert.same(504, last_status)
assert.same(0, results1.ok)
assert.same(1, results1.fail)
bu.begin_testcase_setup_update(strategy, bp)
bu.patch_api(bp, service_id, nil, 60000)
local port2 = bu.add_target(bp, upstream_id, localhost)
bu.end_testcase_setup(strategy, bp)
local server2 = https_server.new(port2, localhost)
server2:start()
_, _, last_status = bu.client_requests(bu.SLOTS, api_host)
assert.same(200, last_status)
local results2 = server2:shutdown()
assert.same(bu.SLOTS, results2.ok)
assert.same(0, results2.fail)
end)
end)
end)
end
end)
end)
describe("Consistent-hashing #" .. strategy, function()
local a_dns_entry_name = "consistent.hashing.test"
lazy_setup(function()
bp = bu.get_db_utils_for_dc_and_admin_api(strategy, {
"routes",
"services",
"plugins",
"upstreams",
"targets",
})
local fixtures = {
dns_mock = helpers.dns_mock.new()
}
fixtures.dns_mock:A {
name = a_dns_entry_name,
address = "127.0.0.1",
}
assert(helpers.start_kong({
database = strategy,
dns_resolver = "127.0.0.1",
admin_listen = default_admin_listen,
proxy_listen = default_proxy_listen,
nginx_conf = "spec/fixtures/custom_nginx.template",
db_update_frequency = DB_UPDATE_FREQUENCY,
db_update_propagation = DB_UPDATE_PROPAGATION,
}, nil, nil, fixtures))
end)
lazy_teardown(function()
helpers.stop_kong()
end)
it("passive healthcheck", function()
local total_requests = 9
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
hash_on = "header",
hash_on_header = "hashme",
healthchecks = bu.healthchecks_config {
passive = {
type = "http",
healthy = {
successes = 1,
},
unhealthy = {
http_failures = 1,
},
}
}
})
local port1 = bu.add_target(bp, upstream_id, a_dns_entry_name)
local port2 = bu.add_target(bp, upstream_id, a_dns_entry_name)
local port3 = bu.add_target(bp, upstream_id, a_dns_entry_name)
local api_host = bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
if strategy ~= "off" then
helpers.wait_for_all_config_update()
end
local server1 = https_server.new(port1, a_dns_entry_name)
local server2 = https_server.new(port2, a_dns_entry_name)
local server3 = https_server.new(port3, a_dns_entry_name)
server1:start()
server2:start()
server3:start()
bu.client_requests(total_requests, {
["Host"] = api_host,
["hashme"] = "just a value",
})
local count1 = server1:shutdown()
local count2 = server2:shutdown()
local count3 = server3:shutdown()
assert(count1.total == 0 or count1.total == total_requests, "counts should either get 0 or all hits")
assert(count2.total == 0 or count2.total == total_requests, "counts should either get 0 or all hits")
assert(count3.total == 0 or count3.total == total_requests, "counts should either get 0 or all hits")
assert.False(count1.total == count2.total and count2.total == count3.total)
local health
helpers.pwait_until(function ()
health = bu.get_balancer_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
assert.is_equal(health.data.health, "HEALTHY")
end, 15)
-- restart the servers, but not the one which received the previous requests
if count1.total == 0 then
server1 = https_server.new(port1, a_dns_entry_name)
server1:start()
else
server1 = nil
end
if count2.total == 0 then
server2 = https_server.new(port2, a_dns_entry_name)
server2:start()
else
server2 = nil
end
if count3.total == 0 then
server3 = https_server.new(port3, a_dns_entry_name)
server3:start()
else
server3 = nil
end
bu.client_requests(total_requests, {
["Host"] = api_host,
["hashme"] = "just a value",
})
if server1 ~= nil then
server1:shutdown()
end
if server2 ~= nil then
server2:shutdown()
end
if server3 ~= nil then
server3:shutdown()
end
helpers.pwait_until(function ()
-- get updated health details
health = bu.get_balancer_health(upstream_name)
assert.is.table(health)
assert.is.table(health.data)
end, 15)
-- the server that received the requests in the first round,
-- should be unhealthy now
for _, host in ipairs(health.data.details.hosts) do
if count1.total ~= 0 and host.port == port1 then
assert.is_false(host.addresses[1].healthy)
break
elseif count2.total ~= 0 and host.port == port2 then
assert.is_false(host.addresses[1].healthy)
break
elseif count3.total ~= 0 and host.port == port3 then
assert.is_false(host.addresses[1].healthy)
break
end
end
-- the upstream should be healthy anyway
assert.is_equal(health.data.health, "HEALTHY")
end)
-- FIXME this test fails on CI but should be ok
it("#flaky active healthcheck", function()
bu.begin_testcase_setup(strategy, bp)
local upstream_name, upstream_id = bu.add_upstream(bp, {
hash_on = "header",
hash_on_header = "hashme",
healthchecks = bu.healthchecks_config {
active = {
type = "http",
http_path = "/status",
healthy = {
interval = bu.HEALTHCHECK_INTERVAL,
successes = 1,
},
unhealthy = {
interval = bu.HEALTHCHECK_INTERVAL,
http_failures = 1,
},
}
}
})
local port1 = bu.add_target(bp, upstream_id, "localhost")
local port2 = bu.add_target(bp, upstream_id, "localhost")
local port3 = bu.add_target(bp, upstream_id, "localhost")
bu.add_api(bp, upstream_name)
bu.end_testcase_setup(strategy, bp)
local server1 = https_server.new(port1, "localhost")
local server2 = https_server.new(port2, "localhost")
local server3 = https_server.new(port3, "localhost")
server1:start()
server2:start()
server3:start()
ngx.sleep(bu.HEALTHCHECK_INTERVAL * 3)
-- get all healthy servers
local all_healthy = bu.get_balancer_health(upstream_name)
-- tell server3 to be unhappy
bu.direct_request("localhost", port3, "/unhealthy")
-- wait active health check to run
ngx.sleep(bu.HEALTHCHECK_INTERVAL * 3)
-- get updated health details
local not_so_healthy = bu.get_balancer_health(upstream_name)
local count1 = server1:shutdown()
local count2 = server2:shutdown()
local count3 = server3:shutdown()
assert(count1.status_ok > 0, "server1 should receive active health checks")
assert(count1.status_fail == 0, "server1 should not fail on active health checks")
assert(count2.status_ok > 0, "server2 should receive active health checks")
assert(count2.status_fail == 0, "server should not fail on active health checks")
assert(count3.status_ok > 0, "server3 should receive active health checks")
assert(count3.status_fail > 0, "server3 should receive active health checks")
assert.is.table(all_healthy)
assert.is.table(all_healthy.data)
assert.is.table(not_so_healthy)
assert.is.table(not_so_healthy.data)
-- all servers should be healthy on first run
for _, host in ipairs(all_healthy.data.details.hosts) do
assert.is_true(host.addresses[1].healthy)
end
-- tand he upstream should be healthy
assert.is_equal(all_healthy.data.health, "HEALTHY")
-- servers on ports 1 and 2 should be healthy, on port 3 should be unhealthy
for _, host in ipairs(not_so_healthy.data.details.hosts) do
if host.port == port1 then
assert.is_true(host.addresses[1].healthy)
elseif host.port == port2 then
assert.is_true(host.addresses[1].healthy)
elseif host.port == port3 then
assert.is_false(host.addresses[1].healthy)
end
end
-- the upstream should be healthy anyway
assert.is_equal(not_so_healthy.data.health, "HEALTHY")
end)
end)
end