Skip to content

refactor: add healthcheck manager to decouple upstream #12426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 52 commits into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
72634ba
feat: add healthcheck manager to decouple upstream
Revolyssup Jul 14, 2025
2e114fb
fix
Revolyssup Jul 14, 2025
588ea22
fix
Revolyssup Jul 15, 2025
275d179
fix
Revolyssup Jul 15, 2025
6a169a3
fix
Revolyssup Jul 15, 2025
40f5a79
handle both resource paths
Revolyssup Jul 15, 2025
40b0f36
pass test
Revolyssup Jul 15, 2025
4d3736a
fix tests
Revolyssup Jul 15, 2025
be15b46
fix tests
Revolyssup Jul 15, 2025
ef9ded6
fix nil check
Revolyssup Jul 15, 2025
f2485ff
fix
Revolyssup Jul 15, 2025
d65748b
add sleep
Revolyssup Jul 15, 2025
342674b
fix tests
Revolyssup Jul 16, 2025
5dd2f5a
fix lint
Revolyssup Jul 16, 2025
a0e0f3d
fix tests
Revolyssup Jul 16, 2025
af9a26c
fix tests
Revolyssup Jul 16, 2025
562105a
fix lint
Revolyssup Jul 16, 2025
c3fbdf1
fix tests
Revolyssup Jul 16, 2025
e551bbc
fix lint
Revolyssup Jul 16, 2025
9f19c2c
fix logic
Revolyssup Jul 16, 2025
45bba02
fix tests
Revolyssup Jul 16, 2025
d962d76
fix lint
Revolyssup Jul 16, 2025
8c3a884
fix test
Revolyssup Jul 16, 2025
c717105
refactor
Revolyssup Jul 17, 2025
b724093
reset timer to 1 s
Revolyssup Jul 17, 2025
6404912
fix concurrent timers
Revolyssup Jul 17, 2025
413fdb2
use resource_version and key explicitly
Revolyssup Jul 17, 2025
a4ee844
remove using .parent
Revolyssup Jul 17, 2025
31198de
remove log
Revolyssup Jul 17, 2025
0efb277
fix tests
Revolyssup Jul 17, 2025
2588a40
dont run timer while worker exiting
Revolyssup Jul 17, 2025
c110c51
fix lint and tests
Revolyssup Jul 17, 2025
3de4f23
fix rr-balance
Revolyssup Jul 17, 2025
68796dc
add sleep in healthcheck-stop-checker
Revolyssup Jul 17, 2025
3b94bd9
apply suggestion
Revolyssup Jul 18, 2025
4621bed
fix lint
Revolyssup Jul 18, 2025
47a04c6
add lint
Revolyssup Jul 18, 2025
cc363da
fix lint
Revolyssup Jul 18, 2025
9288707
fix stop-checker
Revolyssup Jul 18, 2025
98a65e5
change warn to info
Revolyssup Jul 21, 2025
66fc383
apply suggestions
Revolyssup Jul 21, 2025
9a56f59
fix lint
Revolyssup Jul 21, 2025
f11e391
fix CI
Revolyssup Jul 21, 2025
30f12a5
put timer in local
Revolyssup Jul 21, 2025
17b7397
apply copilot suggestion
Revolyssup Jul 22, 2025
15979b3
apply suggestions
Revolyssup Jul 23, 2025
e725ad5
skip creating checker if up_conf.checks nil
Revolyssup Jul 23, 2025
da27e15
Merge branch 'master' of github.com:apache/apisix into revolyssup/ref…
Revolyssup Jul 31, 2025
4359182
apply suggestions
Revolyssup Jul 31, 2025
1dca2f1
fix lint
Revolyssup Jul 31, 2025
90e993b
fix
Revolyssup Jul 31, 2025
0a4e23f
apply suggestions
Revolyssup Jul 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions apisix/balancer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ local balancer = require("ngx.balancer")
local core = require("apisix.core")
local priority_balancer = require("apisix.balancer.priority")
local apisix_upstream = require("apisix.upstream")
local healthcheck_manager = require("apisix.healthcheck_manager")
local ipairs = ipairs
local is_http = ngx.config.subsystem == "http"
local enable_keepalive = balancer.enable_keepalive and is_http
Expand All @@ -28,7 +29,6 @@ local set_timeouts = balancer.set_timeouts
local ngx_now = ngx.now
local str_byte = string.byte


local module_name = "balancer"
local pickers = {}

Expand Down Expand Up @@ -75,7 +75,8 @@ local function fetch_health_nodes(upstream, checker)
local port = upstream.checks and upstream.checks.active and upstream.checks.active.port
local up_nodes = core.table.new(0, #nodes)
for _, node in ipairs(nodes) do
local ok, err = checker:get_target_status(node.host, port or node.port, host)
local ok, err = healthcheck_manager.fetch_node_status(checker,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can cache healthcheck_manager.fetch_node_status, a short local function

Copy link
Contributor Author

@Revolyssup Revolyssup Jul 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean using the lrucache with checker as key?

node.host, port or node.port, host)
if ok then
up_nodes = transform_node(up_nodes, node)
elseif err then
Expand Down
16 changes: 3 additions & 13 deletions apisix/control/v1.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ local plugin = require("apisix.plugin")
local get_routes = require("apisix.router").http_routes
local get_services = require("apisix.http.service").services
local upstream_mod = require("apisix.upstream")
local healthcheck_manager = require("apisix.healthcheck_manager")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might make sense to put it in the same directory as the batch processor manager. The root directory isn't the right place for it.

local get_upstreams = upstream_mod.upstreams
local collectgarbage = collectgarbage
local ipairs = ipairs
Expand Down Expand Up @@ -66,14 +67,13 @@ function _M.schema()
return 200, schema
end


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a reminder, please try to avoid pointless formatting changes, this noise will affect review efficiency.

local healthcheck
local function extra_checker_info(value)
if not healthcheck then
healthcheck = require("resty.healthcheck")
end

local name = upstream_mod.get_healthchecker_name(value)
local name = healthcheck_manager.get_healthchecker_name(value.value)
local nodes, err = healthcheck.get_target_list(name, "upstream-healthcheck")
if err then
core.log.error("healthcheck.get_target_list failed: ", err)
Expand Down Expand Up @@ -214,7 +214,6 @@ local function iter_and_find_healthcheck_info(values, src_type, src_id)
if not checks then
return nil, str_format("no checker for %s[%s]", src_type, src_id)
end

local info = extra_checker_info(value)
info.type = get_checker_type(checks)
return info
Expand Down Expand Up @@ -249,7 +248,6 @@ function _M.get_health_checker()
if not info then
return 404, {error_msg = err}
end

local out, err = try_render_html({stats={info}})
if out then
core.response.set_header("Content-Type", "text/html")
Expand All @@ -266,9 +264,6 @@ local function iter_add_get_routes_info(values, route_id)
local infos = {}
for _, route in core.config_util.iterate_values(values) do
local new_route = core.table.deepcopy(route)
if new_route.value.upstream and new_route.value.upstream.parent then
new_route.value.upstream.parent = nil
end
-- remove healthcheck info
new_route.checker = nil
new_route.checker_idx = nil
Expand Down Expand Up @@ -312,9 +307,6 @@ local function iter_add_get_upstream_info(values, upstream_id)
for _, upstream in core.config_util.iterate_values(values) do
local new_upstream = core.table.deepcopy(upstream)
core.table.insert(infos, new_upstream)
if new_upstream.value and new_upstream.value.parent then
new_upstream.value.parent = nil
end
-- check the upstream id
if upstream_id and upstream.value.id == upstream_id then
return new_upstream
Expand All @@ -332,6 +324,7 @@ function _M.dump_all_upstreams_info()
return 200, infos
end


function _M.dump_upstream_info()
local upstreams = get_upstreams()
local uri_segs = core.utils.split_uri(ngx_var.uri)
Expand All @@ -354,9 +347,6 @@ local function iter_add_get_services_info(values, svc_id)
local infos = {}
for _, svc in core.config_util.iterate_values(values) do
local new_svc = core.table.deepcopy(svc)
if new_svc.value.upstream and new_svc.value.upstream.parent then
new_svc.value.upstream.parent = nil
end
-- remove healthcheck info
new_svc.checker = nil
new_svc.checker_idx = nil
Expand Down
Loading
Loading