Skip to content

Commit 59629c0

Browse files
committed
H2O: Improve worker thread load balancing
Use a simple eBPF program that assigns incoming connections to worker threads in a mostly round-robin fashion. Signed-off-by: Anton Kirilov <antonvkirilov@proton.me>
1 parent 4d6146b commit 59629c0

File tree

7 files changed

+177
-28
lines changed

7 files changed

+177
-28
lines changed

frameworks/C/h2o/CMakeLists.txt

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
cmake_minimum_required(VERSION 3.18.0)
22
project(h2o-app)
3+
find_library(BPF_LIB bpf REQUIRED)
34
find_library(CRYPTO_LIB crypto REQUIRED)
45
find_library(H2O_LIB h2o-evloop REQUIRED)
56
find_library(MUSTACHE_C_LIB mustache_c REQUIRED)
@@ -8,27 +9,57 @@ find_library(PQ_LIB pq REQUIRED)
89
find_library(SSL_LIB ssl REQUIRED)
910
find_library(YAJL_LIB yajl REQUIRED)
1011
find_library(Z_LIB z REQUIRED)
12+
find_path(ASM_INCLUDE asm/types.h REQUIRED)
13+
find_path(BPF_INCLUDE bpf/libbpf.h REQUIRED)
1114
find_path(H2O_INCLUDE h2o.h REQUIRED)
1215
find_path(MUSTACHE_C_INCLUDE mustache.h REQUIRED)
1316
find_path(NUMA_INCLUDE numaif.h REQUIRED)
1417
find_path(OPENSSL_INCLUDE openssl/ssl.h REQUIRED)
1518
find_path(PQ_INCLUDE postgresql/libpq-fe.h REQUIRED)
1619
find_path(YAJL_INCLUDE yajl/yajl_gen.h REQUIRED)
17-
include_directories(src ${H2O_INCLUDE} ${MUSTACHE_C_INCLUDE} ${NUMA_INCLUDE} ${OPENSSL_INCLUDE})
18-
include_directories(${PQ_INCLUDE} ${YAJL_INCLUDE})
20+
find_program(BPFTOOL_BIN bpftool REQUIRED)
21+
find_program(CLANG_BIN clang REQUIRED)
22+
include_directories(src ${CMAKE_BINARY_DIR}/generated-headers ${BPF_INCLUDE} ${H2O_INCLUDE})
23+
include_directories(${MUSTACHE_C_INCLUDE} ${NUMA_INCLUDE} ${OPENSSL_INCLUDE} ${PQ_INCLUDE})
24+
include_directories(${YAJL_INCLUDE})
1925
set(CMAKE_C_STANDARD 11)
2026
set(CMAKE_C_STANDARD_REQUIRED ON)
2127
add_compile_definitions(H2O_USE_LIBUV=0)
2228
set(COMMON_OPTIONS -flto=auto -pthread)
23-
add_compile_options(-pedantic -Wall -Wextra ${COMMON_OPTIONS})
29+
set(WARNING_OPTIONS -pedantic -Wall -Wextra)
30+
add_compile_options(${COMMON_OPTIONS} ${WARNING_OPTIONS})
2431
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -D_FORTIFY_SOURCE=2")
2532
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
2633
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3")
27-
file(GLOB_RECURSE SOURCES "src/*.c")
28-
add_executable(${PROJECT_NAME} ${SOURCES})
34+
add_custom_command(
35+
OUTPUT ${CMAKE_BINARY_DIR}/generated-headers/socket_load_balancer.h
36+
COMMAND ${CLANG_BIN}
37+
-c
38+
-DNDEBUG
39+
-g
40+
-I ${ASM_INCLUDE}
41+
-I ${BPF_INCLUDE}
42+
-mcpu=v3
43+
-o ${CMAKE_BINARY_DIR}/socket_load_balancer.o
44+
-O3
45+
-std=gnu11
46+
-target bpf
47+
${WARNING_OPTIONS}
48+
${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/socket_load_balancer.c
49+
COMMAND ${BPFTOOL_BIN} gen skeleton ${CMAKE_BINARY_DIR}/socket_load_balancer.o >
50+
${CMAKE_BINARY_DIR}/generated-headers/socket_load_balancer.h
51+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/socket_load_balancer.c
52+
VERBATIM)
53+
add_custom_target(
54+
generated_headers
55+
DEPENDS ${CMAKE_BINARY_DIR}/generated-headers/socket_load_balancer.h)
56+
file(GLOB_RECURSE HANDLER_SOURCES "src/handlers/*.c")
57+
file(GLOB SOURCES "src/*.c")
58+
add_executable(${PROJECT_NAME} ${HANDLER_SOURCES} ${SOURCES})
59+
add_dependencies(${PROJECT_NAME} generated_headers)
2960
target_link_libraries(${PROJECT_NAME} ${COMMON_OPTIONS})
30-
target_link_libraries(${PROJECT_NAME} ${H2O_LIB} m ${MUSTACHE_C_LIB} ${NUMA_LIB} ${PQ_LIB})
31-
target_link_libraries(${PROJECT_NAME} ${SSL_LIB} ${CRYPTO_LIB} ${YAJL_LIB} ${Z_LIB})
61+
target_link_libraries(${PROJECT_NAME} ${BPF_LIB} ${H2O_LIB} m ${MUSTACHE_C_LIB} ${NUMA_LIB})
62+
target_link_libraries(${PROJECT_NAME} ${PQ_LIB} ${SSL_LIB} ${CRYPTO_LIB} ${YAJL_LIB} ${Z_LIB})
3263
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin)
3364
file(GLOB TEMPLATES "template/*")
3465
install(FILES ${TEMPLATES} DESTINATION share/${PROJECT_NAME}/template)

frameworks/C/h2o/README.md

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
1-
# h2o
1+
# h2o-app
22

33
This is a framework implementation using the [H2O](https://h2o.examp1e.net) HTTP server. It
44
builds directly on top of `libh2o` instead of running the standalone server.
55

66
## Requirements
77

8-
[CMake](https://cmake.org), [H2O](https://h2o.examp1e.net), [libpq](https://www.postgresql.org),
9-
[mustache-c](https://github.com/x86-64/mustache-c), [numactl](https://github.com/numactl/numactl),
10-
[OpenSSL](https://www.openssl.org), [YAJL](https://lloyd.github.io/yajl)
8+
[bpftool](https://bpftool.dev/),
9+
[Clang](https://clang.llvm.org/),
10+
[CMake](https://cmake.org/),
11+
[GNU C Library](https://www.gnu.org/software/libc),
12+
[H2O](https://h2o.examp1e.net/),
13+
[libbpf](https://github.com/libbpf/libbpf),
14+
[libpq](https://www.postgresql.org/),
15+
[Linux](https://kernel.org/),
16+
[mustache-c](https://github.com/x86-64/mustache-c),
17+
[numactl](https://github.com/numactl/numactl),
18+
[OpenSSL](https://www.openssl.org/),
19+
[YAJL](https://lloyd.github.io/yajl)
1120

1221
## Test implementations
1322

@@ -30,4 +39,4 @@ options respectively.
3039

3140
## Contact
3241

33-
Anton Kirilov <antonvkirilov@gmail.com>
42+
Anton Kirilov <antonvkirilov@proton.me>

frameworks/C/h2o/h2o.dockerfile

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,19 @@ FROM "ubuntu:${UBUNTU_VERSION}" AS compile
66

77
RUN echo "[timing] Installing system packages: $(date)"
88
ARG DEBIAN_FRONTEND=noninteractive
9-
RUN apt-get -yqq update && \
10-
apt-get -yqq install \
9+
RUN apt-get install \
10+
--no-install-recommends \
11+
-qqUy \
1112
autoconf \
13+
automake \
1214
bison \
15+
bpftool \
1316
clang \
1417
cmake \
1518
curl \
1619
flex \
20+
gcc \
21+
libbpf-dev \
1722
libbrotli-dev \
1823
libcap-dev \
1924
libnuma-dev \
@@ -40,9 +45,7 @@ RUN curl -LSs "https://github.com/h2o/h2o/archive/${H2O_VERSION}.tar.gz" | \
4045
tar --strip-components=1 -xz && \
4146
cmake \
4247
-B build \
43-
-DCMAKE_AR=/usr/bin/gcc-ar \
4448
-DCMAKE_C_FLAGS="-flto=auto -march=native -mtune=native" \
45-
-DCMAKE_RANLIB=/usr/bin/gcc-ranlib \
4649
-DWITH_MRUBY=on \
4750
-G Ninja \
4851
-S . && \
@@ -79,8 +82,10 @@ FROM "ubuntu:${UBUNTU_VERSION}"
7982

8083
RUN echo "[timing] Installing final system packages: $(date)"
8184
ARG DEBIAN_FRONTEND=noninteractive
82-
RUN apt-get -yqq update && \
83-
apt-get -yqq install \
85+
RUN apt-get install \
86+
--no-install-recommends \
87+
-qqUy \
88+
libbpf1 \
8489
libnuma1 \
8590
libpq5 \
8691
liburing2 \
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
Copyright (c) 2025 Anton Valentinov Kirilov
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
5+
associated documentation files (the "Software"), to deal in the Software without restriction,
6+
including without limitation the rights to use, copy, modify, merge, publish, distribute,
7+
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
8+
furnished to do so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all copies or
11+
substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
14+
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
16+
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
17+
OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
18+
*/
19+
20+
// TODO: Switch to the standard atomics (<stdatomic.h>) after
21+
// the system header file mess gets sorted for eBPF.
22+
#include <stdbool.h>
23+
#include <stddef.h>
24+
#include <linux/bpf.h>
25+
#include <bpf/bpf_helpers.h>
26+
27+
// We need a finite number of iterations to keep the eBPF verifier happy.
28+
#define MAX_ITERATIONS 42
29+
30+
static size_t thread_idx;
31+
size_t thread_num = 1;
32+
33+
SEC("socket") int socket_load_balancer(void *skb)
34+
{
35+
(void) skb;
36+
37+
// TODO: Use __atomic_load_n() after LLVM starts supporting it for eBPF.
38+
size_t idx = *(const volatile size_t *) &thread_idx;
39+
int ret = thread_num;
40+
41+
__atomic_thread_fence(__ATOMIC_RELAXED);
42+
43+
for (size_t i = 0; i < MAX_ITERATIONS; i++) {
44+
const size_t new_idx = (idx + 1) % thread_num;
45+
46+
if (__atomic_compare_exchange_n(&thread_idx,
47+
&idx,
48+
new_idx,
49+
false,
50+
__ATOMIC_RELAXED,
51+
__ATOMIC_RELAXED)) {
52+
ret = idx;
53+
break;
54+
}
55+
}
56+
57+
return ret;
58+
}

frameworks/C/h2o/src/event_loop.c

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,16 @@
4646

4747
static void accept_connection(h2o_socket_t *listener, const char *err);
4848
static void accept_http_connection(h2o_socket_t *listener, const char *err);
49-
static int get_listener_socket(const char *bind_address, uint16_t port);
49+
static int get_listener_socket(bool is_main_thread,
50+
int bpf_fd,
51+
const char *bind_address,
52+
uint16_t port);
5053
static void on_close_connection(void *data);
5154
static void process_messages(h2o_multithread_receiver_t *receiver, h2o_linklist_t *messages);
5255
static void shutdown_server(h2o_socket_t *listener, const char *err);
53-
static void start_accept_polling(const config_t *config,
56+
static void start_accept_polling(bool is_main_thread,
57+
int bpf_fd,
58+
const config_t *config,
5459
h2o_socket_cb accept_cb,
5560
bool is_https,
5661
event_loop_t *loop);
@@ -98,7 +103,10 @@ static void accept_http_connection(h2o_socket_t *listener, const char *err)
98103
ctx->event_loop.h2o_accept_ctx.ssl_ctx = ssl_ctx;
99104
}
100105

101-
static int get_listener_socket(const char *bind_address, uint16_t port)
106+
static int get_listener_socket(bool is_main_thread,
107+
int bpf_fd,
108+
const char *bind_address,
109+
uint16_t port)
102110
{
103111
int ret = -1;
104112
char buf[16];
@@ -148,6 +156,15 @@ static int get_listener_socket(const char *bind_address, uint16_t port)
148156
LOCAL_CHECK_ERRNO(setsockopt, s, IPPROTO_TCP, TCP_FASTOPEN, &option, sizeof(option));
149157
LOCAL_CHECK_ERRNO(bind, s, iter->ai_addr, iter->ai_addrlen);
150158
LOCAL_CHECK_ERRNO(listen, s, INT_MAX);
159+
160+
if (is_main_thread && bpf_fd >= 0)
161+
LOCAL_CHECK_ERRNO(setsockopt,
162+
s,
163+
SOL_SOCKET,
164+
SO_ATTACH_REUSEPORT_EBPF,
165+
&bpf_fd,
166+
sizeof(bpf_fd));
167+
151168
ret = s;
152169
break;
153170

@@ -256,16 +273,17 @@ static void shutdown_server(h2o_socket_t *listener, const char *err)
256273
}
257274
}
258275

259-
static void start_accept_polling(const config_t *config,
276+
static void start_accept_polling(bool is_main_thread,
277+
int bpf_fd,
278+
const config_t *config,
260279
h2o_socket_cb accept_cb,
261280
bool is_https,
262281
event_loop_t *loop)
263282
{
264-
const int listener_sd = get_listener_socket(config->bind_address,
283+
const int listener_sd = get_listener_socket(is_main_thread,
284+
bpf_fd,
285+
config->bind_address,
265286
is_https ? config->https_port : config->port);
266-
// Let all the threads race to call accept() on the socket; since the latter is
267-
// non-blocking, that will virtually act as load balancing, and SO_REUSEPORT
268-
// will make it efficient.
269287
h2o_socket_t * const h2o_socket = h2o_evloop_socket_create(loop->h2o_ctx.loop,
270288
listener_sd,
271289
H2O_SOCKET_FLAG_DONT_READ);
@@ -345,13 +363,18 @@ void initialize_event_loop(bool is_main_thread,
345363

346364
if (global_data->ssl_ctx) {
347365
loop->h2o_accept_ctx.ssl_ctx = global_data->ssl_ctx;
348-
start_accept_polling(config, accept_connection, true, loop);
366+
start_accept_polling(is_main_thread,
367+
global_data->bpf_fd,
368+
config,
369+
accept_connection,
370+
true,
371+
loop);
349372
// Assume that the majority of the connections use HTTPS,
350373
// so HTTP can take a few extra operations.
351374
accept_cb = accept_http_connection;
352375
}
353376

354-
start_accept_polling(config, accept_cb, false, loop);
377+
start_accept_polling(is_main_thread, global_data->bpf_fd, config, accept_cb, false, loop);
355378
h2o_multithread_register_receiver(loop->h2o_ctx.queue,
356379
h2o_receiver,
357380
process_messages);

frameworks/C/h2o/src/global_data.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "handlers/request_handler_data.h"
3131

3232
struct global_thread_data_t;
33+
struct socket_load_balancer;
3334
struct thread_context_t;
3435

3536
typedef struct config_t {
@@ -55,8 +56,10 @@ typedef struct {
5556
h2o_logger_t *file_logger;
5657
struct global_thread_data_t *global_thread_data;
5758
h2o_socket_t *signals;
59+
struct socket_load_balancer *socket_load_balancer;
5860
SSL_CTX *ssl_ctx;
5961
size_t memory_alignment;
62+
int bpf_fd;
6063
int signal_fd;
6164
h2o_buffer_prototype_t buffer_prototype;
6265
h2o_globalconf_t h2o_config;

frameworks/C/h2o/src/main.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include <stdlib.h>
2727
#include <string.h>
2828
#include <unistd.h>
29+
#include <bpf/bpf.h>
30+
#include <bpf/libbpf.h>
2931
#include <h2o/serverutil.h>
3032
#include <sys/resource.h>
3133
#include <sys/signalfd.h>
@@ -38,6 +40,7 @@
3840
#include "global_data.h"
3941
#include "list.h"
4042
#include "request_handler.h"
43+
#include "socket_load_balancer.h"
4144
#include "thread.h"
4245
#include "tls.h"
4346
#include "utility.h"
@@ -81,6 +84,8 @@ static void free_global_data(global_data_t *global_data)
8184
if (global_data->file_logger)
8285
global_data->file_logger->dispose(global_data->file_logger);
8386

87+
close(global_data->bpf_fd);
88+
socket_load_balancer__destroy(global_data->socket_load_balancer);
8489
cleanup_request_handlers(&global_data->request_handler_data);
8590
h2o_config_dispose(&global_data->h2o_config);
8691

@@ -93,6 +98,7 @@ static int initialize_global_data(const config_t *config, global_data_t *global_
9398
sigset_t signals;
9499

95100
memset(global_data, 0, sizeof(*global_data));
101+
global_data->bpf_fd = -1;
96102
global_data->buffer_prototype._initial_buf.capacity = H2O_SOCKET_INITIAL_INPUT_BUFFER_SIZE;
97103
global_data->memory_alignment = get_maximum_cache_line_size();
98104
CHECK_ERRNO(sigemptyset, &signals);
@@ -134,6 +140,20 @@ static int initialize_global_data(const config_t *config, global_data_t *global_
134140
global_data->file_logger = h2o_access_log_register(pathconf, log_handle);
135141
}
136142

143+
global_data->socket_load_balancer = socket_load_balancer__open();
144+
145+
if (global_data->socket_load_balancer) {
146+
global_data->socket_load_balancer->data->thread_num = config->thread_num;
147+
148+
if (socket_load_balancer__load(global_data->socket_load_balancer)) {
149+
socket_load_balancer__destroy(global_data->socket_load_balancer);
150+
global_data->socket_load_balancer = NULL;
151+
}
152+
else
153+
global_data->bpf_fd =
154+
bpf_program__fd(global_data->socket_load_balancer->progs.socket_load_balancer);
155+
}
156+
137157
global_data->global_thread_data = initialize_global_thread_data(config, global_data);
138158

139159
if (global_data->global_thread_data) {

0 commit comments

Comments
 (0)