Skip to content

Commit

Permalink
feat(balancer) use lua-resty-dns-client healthThreshold attrib (#5206)
Browse files Browse the repository at this point in the history
added unit test healthchecks.threshold param validation
  • Loading branch information
locao authored Nov 11, 2019
1 parent ab8d7bf commit e3a0e0d
Show file tree
Hide file tree
Showing 8 changed files with 262 additions and 22 deletions.
45 changes: 36 additions & 9 deletions autodoc/data/admin-api.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1052,15 +1052,15 @@ return {
GET = {
title = [[Show Upstream health for node]],
description = [[
Displays the health status for all Targets of a given Upstream, according to
the perspective of a specific Kong node. Note that, being node-specific
information, making this same request to different nodes of the Kong cluster
may produce different results. For example, one specific node of the Kong
cluster may be experiencing network issues, causing it to fail to connect to
some Targets: these Targets will be marked as unhealthy by that node
(directing traffic from this node to other Targets that it can successfully
reach), but healthy to all others Kong nodes (which have no problems using that
Target).
Displays the health status for all Targets of a given Upstream, or for
the whole Upstream, according to the perspective of a specific Kong node.
Note that, being node-specific information, making this same request
to different nodes of the Kong cluster may produce different results.
For example, one specific node of the Kong cluster may be experiencing
network issues, causing it to fail to connect to some Targets: these
Targets will be marked as unhealthy by that node (directing traffic from
this node to other Targets that it can successfully reach), but healthy
to all others Kong nodes (which have no problems using that Target).
The `data` field of the response contains an array of Target objects.
The health for each Target is returned in its `health` field:
Expand All @@ -1078,6 +1078,11 @@ return {
(circuit breakers) or [manually](#set-target-as-unhealthy),
its status is displayed as `UNHEALTHY`. The load balancer is not directing
any traffic to this Target via this Upstream.
When the request query parameter `balancer_health` is set to `1`, the
`data` field of the response refers to the whole Upstream, and its `health`
attribute is defined by the state of all of Upstream's Targets, according
to the field [health checker's threshold][healthchecks.threshold].
]],
endpoint = [[
<div class="endpoint get">/upstreams/{name or id}/health/</div>
Expand All @@ -1086,6 +1091,11 @@ return {
---:| ---
`name or id`<br>**required** | The unique identifier **or** the name of the Upstream for which to display Target health.
]],
request_query = [[
Attributes | Description
---:| ---
`balancer_health`<br>*optional* | If set to 1, Kong will return the health status of the whole Upstream.
]],
response = [[
```
HTTP 200 OK
Expand Down Expand Up @@ -1115,6 +1125,22 @@ return {
]
}
```
If `balancer_health=1`:
```
HTTP 200 OK
```
```json
{
"data": {
"health": "HEALTHY",
"id": "07131005-ba30-4204-a29f-0927d53257b4"
},
"next": null,
"node_id": "cbb297c0-14a9-46bc-ad91-1d0ef9b42df9"
}
```
]],
},

Expand Down Expand Up @@ -1153,6 +1179,7 @@ return {
["healthchecks.passive.unhealthy.tcp_failures"] = { description = [[Number of TCP failures in proxied traffic to consider a target unhealthy, as observed by passive health checks.]] },
["healthchecks.passive.unhealthy.timeouts"] = { description = [[Number of timeouts in proxied traffic to consider a target unhealthy, as observed by passive health checks.]] },
["healthchecks.passive.unhealthy.http_failures"] = { description = [[Number of HTTP failures in proxied traffic (as defined by `healthchecks.passive.unhealthy.http_statuses`) to consider a target unhealthy, as observed by passive health checks.]] },
["healthchecks.threshold"] = { description = [[The minimum percentage of the upstream's targets' weight that must be available for the whole upstream to be considered healthy.]] },
tags = {
description = [[
An optional set of strings associated with the Upstream, for grouping and filtering.
Expand Down
20 changes: 15 additions & 5 deletions kong/api/routes/upstreams.lua
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,21 @@ return {
return kong.response.exit(404, { message = "Not found" })
end

local node_id, err = kong.node.get_id()
if err then
kong.log.err("failed to get node id: ", err)
end

if tostring(self.params.balancer_health) == "1" then
local upstream_pk = db.upstreams.schema:extract_pk_values(upstream)
local balancer_health = db.targets:get_balancer_health(upstream_pk)
return kong.response.exit(200, {
data = balancer_health,
next = null,
node_id = node_id,
})
end

self.params.targets = db.upstreams.schema:extract_pk_values(upstream)
local targets_with_health, _, err_t, offset =
endpoints.page_collection(self, db, db.targets.schema, "page_for_upstream_with_health")
Expand All @@ -71,11 +86,6 @@ return {
self.params.upstreams,
escape_uri(offset)) or null

local node_id, err = kong.node.get_id()
if err then
kong.log.err("failed getting node id: ", err)
end

return kong.response.exit(200, {
data = targets_with_health,
offset = offset,
Expand Down
10 changes: 10 additions & 0 deletions kong/db/dao/targets.lua
Original file line number Diff line number Diff line change
Expand Up @@ -364,4 +364,14 @@ function _TARGETS:post_health(upstream_pk, target, address, is_healthy)
end


function _TARGETS:get_balancer_health(upstream_pk)
local health_info, err = balancer.get_balancer_health(upstream_pk.id)
if err then
ngx.log(ngx.ERR, "failed getting upstream health: ", err)
end

return health_info
end


return _TARGETS
8 changes: 8 additions & 0 deletions kong/db/schema/entities/upstreams.lua
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ local check_verify_certificate = Schema.define {
}


local health_threshold = Schema.define {
type = "number",
default = 0,
between = { 0, 100 },
}


local NO_DEFAULT = {}


Expand Down Expand Up @@ -145,6 +152,7 @@ end


local healthchecks_fields, healthchecks_defaults = gen_fields(healthchecks_config)
healthchecks_fields[#healthchecks_fields+1] = { ["threshold"] = health_threshold }


local r = {
Expand Down
58 changes: 53 additions & 5 deletions kong/runloop/balancer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,13 @@ do
end

creating[upstream.id] = true
local health_threshold = upstream.healthchecks and
upstream.healthchecks.threshold or nil

local balancer, err = balancer_types[upstream.algorithm].new({
wheelSize = upstream.slots, -- will be ignored by least-connections
dns = dns_client,
healthThreshold = health_threshold,
})

if not balancer then
Expand Down Expand Up @@ -984,6 +987,19 @@ local function unsubscribe_from_healthcheck_events(callback)
end


local function is_upstream_using_healthcheck(upstream)
if upstream ~= nil then
return upstream.healthchecks.active.healthy.interval ~= 0
or upstream.healthchecks.active.unhealthy.interval ~= 0
or upstream.healthchecks.passive.unhealthy.tcp_failures ~= 0
or upstream.healthchecks.passive.unhealthy.timeouts ~= 0
or upstream.healthchecks.passive.unhealthy.http_failures ~= 0
end

return false
end


--------------------------------------------------------------------------------
-- Get healthcheck information for an upstream.
-- @param upstream_id the id of the upstream.
Expand All @@ -998,11 +1014,7 @@ local function get_upstream_health(upstream_id)
return nil, "upstream not found"
end

local using_hc = upstream.healthchecks.active.healthy.interval ~= 0
or upstream.healthchecks.active.unhealthy.interval ~= 0
or upstream.healthchecks.passive.unhealthy.tcp_failures ~= 0
or upstream.healthchecks.passive.unhealthy.timeouts ~= 0
or upstream.healthchecks.passive.unhealthy.http_failures ~= 0
local using_hc = is_upstream_using_healthcheck(upstream)

local balancer = balancers[upstream_id]
if not balancer then
Expand Down Expand Up @@ -1036,6 +1048,41 @@ local function get_upstream_health(upstream_id)
end


--------------------------------------------------------------------------------
-- Get healthcheck information for a balancer.
-- @param upstream_id the id of the upstream.
-- @return table with balancer health info
local function get_balancer_health(upstream_id)

local upstream = get_upstream_by_id(upstream_id)
if not upstream then
return nil, "upstream not found"
end

local balancer = balancers[upstream_id]
if not balancer then
return nil, "balancer not found"
end

local healthchecker
local health = "HEALTHCHECKS_OFF"
if is_upstream_using_healthcheck(upstream) then
healthchecker = healthcheckers[balancer]
if not healthchecker then
return nil, "healthchecker not found"
end

local balancer_status = balancer:getStatus()
health = balancer_status.healthy and "HEALTHY" or "UNHEALTHY"
end

return {
health = health,
id = upstream_id,
}
end


--------------------------------------------------------------------------------
-- for unit-testing purposes only
local function _get_healthchecker(balancer)
Expand All @@ -1062,6 +1109,7 @@ return {
unsubscribe_from_healthcheck_events = unsubscribe_from_healthcheck_events,
get_upstream_health = get_upstream_health,
get_upstream_by_id = get_upstream_by_id,
get_balancer_health = get_balancer_health,

-- ones below are exported for test purposes only
_create_balancer = create_balancer,
Expand Down
17 changes: 17 additions & 0 deletions spec/01-unit/01-db/01-schema/09-upstreams_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,11 @@ describe("load upstreams", function()
local status_code = "value should be between 100 and 999"
local integer = "expected an integer"
local boolean = "expected a boolean"
local number = "expected a number"
local invalid_host = "invalid value: "
local invalid_host_port = "must not have a port"
local invalid_ip = "must not be an IP"
local threshold = "value should be between 0 and 100"
local tests = {
{{ active = { timeout = -1 }}, seconds },
{{ active = { timeout = 1e+42 }}, seconds },
Expand Down Expand Up @@ -309,6 +311,10 @@ describe("load upstreams", function()
{{ active = { healthy = { http_statuses = { 1000 }}}}, status_code },
{{ active = { healthy = { http_statuses = { 111.314 }}}}, integer },
{{ active = { healthy = { successes = 0.5 }}}, integer },
{{ active = { unhealthy = { timeouts = 1 }}, threshold = -1}, threshold },
{{ active = { unhealthy = { timeouts = 1 }}, threshold = 101}, threshold },
{{ active = { unhealthy = { timeouts = 1 }}, threshold = "50"}, number },
{{ active = { unhealthy = { timeouts = 1 }}, threshold = true}, number },
--{{ active = { healthy = { successes = 0 }}}, "must be an integer" },
{{ active = { healthy = { successes = -1 }}}, zero_integer },
{{ active = { unhealthy = { interval = -1 }}}, seconds },
Expand Down Expand Up @@ -348,6 +354,11 @@ describe("load upstreams", function()
{{ passive = { unhealthy = { http_failures = 0.5 }}}, integer },
--{{ passive = { unhealthy = { http_failures = 0 }}}, integer },
{{ passive = { unhealthy = { http_failures = -1 }}}, zero_integer },
{{ passive = { unhealthy = { timeouts = 1 }}, threshold = -1}, threshold },
{{ passive = { unhealthy = { timeouts = 1 }}, threshold = 101}, threshold },
{{ passive = { unhealthy = { timeouts = 1 }}, threshold = "50"}, number },
{{ passive = { unhealthy = { timeouts = 1 }}, threshold = true}, number },

--]]
}

Expand Down Expand Up @@ -386,12 +397,18 @@ describe("load upstreams", function()
{ active = { unhealthy = { tcp_failures = 3 }}},
{ active = { unhealthy = { timeouts = 9 }}},
{ active = { unhealthy = { http_failures = 2 }}},
{ active = { unhealthy = { http_failures = 2 }}, threshold = 0},
{ active = { unhealthy = { http_failures = 2 }}, threshold = 50.50},
{ active = { unhealthy = { http_failures = 2 }}, threshold = 100},
{ passive = { healthy = { http_statuses = { 200, 201 } }}},
{ passive = { healthy = { successes = 2 }}},
{ passive = { unhealthy = { http_statuses = { 400, 500 } }}},
{ passive = { unhealthy = { tcp_failures = 8 }}},
{ passive = { unhealthy = { timeouts = 1 }}},
{ passive = { unhealthy = { http_failures = 2 }}},
{ passive = { unhealthy = { http_failures = 2 }}, threshold = 0},
{ passive = { unhealthy = { http_failures = 2 }}, threshold = 50.50},
{ passive = { unhealthy = { http_failures = 2 }}, threshold = 100},
}
for _, test in ipairs(tests) do
local entity = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,8 @@ describe("declarative config: flatten", function()
tcp_failures = 0,
timeouts = 0
}
}
},
threshold = 0
},
host_header = null,
id = "UUID",
Expand Down Expand Up @@ -1408,7 +1409,8 @@ describe("declarative config: flatten", function()
tcp_failures = 0,
timeouts = 0
}
}
},
threshold = 0
},
host_header = null,
id = "UUID",
Expand Down
Loading

0 comments on commit e3a0e0d

Please sign in to comment.