Skip to content

Commit 3ce4c20

Browse files
Fix socket cpu migration
Socket cpu migration can lead to two problems: performance degradation and response reordering, which leads to broken HTTP1. Previously we use RSS and RPS to prevent it, but there were several problems in our scripts: - we exclude loopback interfaces from setup, because we don't take into account response reordering problem. - we don't take into account that some interfaces have some suffix lile @if14, and we should remove it from device name in our scripts. - we don't try to setup combined RSS queues, only RX queues, but there are a lot of cases when network interface has only combined queues. - we don't take into account overflow when we calculate 1 << x, when x is greater or equal then 64. - we don't take into account overflow when we write value, which is greater then (1 << 32) - 1 in rps_cpus, when we setup RPS. - we don't setup RPS for network interface if, RSS setup fails. - we don't ban irqs for irqbalance for each network device immediately. But if there are a lot of devices there is a big race between setting RSS for first device and ban irqs for it. This race is anought for irqbalance daemon to change our settings. This patch fix all this problems. Closes #2075
1 parent 5bafc9d commit 3ce4c20

File tree

2 files changed

+148
-45
lines changed

2 files changed

+148
-45
lines changed

scripts/tempesta.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Tempesta FW service script.
44
#
55
# Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
6-
# Copyright (C) 2015-2022 Tempesta Technologies, Inc.
6+
# Copyright (C) 2015-2024 Tempesta Technologies, Inc.
77
#
88
# This program is free software; you can redistribute it and/or modify it
99
# under the terms of the GNU General Public License as published by
@@ -46,12 +46,12 @@ tls_mod=tempesta_tls
4646
tdb_mod=tempesta_db
4747
tfw_mod=tempesta_fw
4848
declare -r LONG_OPTS="help,load,unload,start,stop,restart,reload"
49-
50-
# Exclude loopback interface since it needn't any tuning here: it hasn't RSS
51-
# while RPS just add unnecessary overhead for it (traffic redistribution, IPIs
52-
# introduction etc.).
53-
declare devs=$(ip addr show up | grep -P '^[0-9]+' | grep -Pv '\bLOOPBACK\b' \
54-
| awk '{ sub(/:/, "", $2); print $2}')
49+
# We should setup network queues for all existing network interfaces
50+
# to prevent socket CPU migration, which leads to response reordering
51+
# and broken HTTP1. Some network interfaces have some strange suffix
52+
# like @if14, and we should remove it from device name.
53+
declare devs=$(ip addr show up | grep -P '^[0-9]+' \
54+
| awk '{ sub(/:/, "", $2); split($2,a,"@"); print a[1] }')
5555

5656
usage()
5757
{

scripts/tfw_lib.sh

Lines changed: 141 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Common utilities for Tempesta scripts
22
#
3-
# Copyright (C) 2016-2022 Tempesta Technologies, Inc.
3+
# Copyright (C) 2016-2024 Tempesta Technologies, Inc.
44
#
55
# This program is free software; you can redistribute it and/or modify it
66
# under the terms of the GNU General Public License as published by
@@ -54,7 +54,6 @@ irqbalance_ban_irqs()
5454
args_str=""
5555
sysd_conf_var="ExecStart"
5656

57-
echo "...ban IRQs for irqbalance..."
5857
for irq in ${IRQS_GLOB_LIST[@]}; do
5958
args_str=$args_str"--banirq=$irq "
6059
done
@@ -92,19 +91,83 @@ irqbalance_ban_irqs()
9291
systemctl restart irqbalance.service >/dev/null
9392
}
9493

94+
# This function prepares cpu mask for RSS and RPS.
95+
# It takes into account that we can't calculate
96+
# value, which is greater when (1 << 63) and can't
97+
# wtite in the rps_cpus/smp_affinity value, which
98+
# is greater then (1 << 32) -1 (we need to write it
99+
# using comma).
100+
make_cpu_mask()
101+
{
102+
delta=$1
103+
mask32=$2
104+
val=$3
105+
cond=$4
106+
res=0
107+
108+
# If delta is less than 32, we can directly calculate
109+
# and write this value. For RPS it will looks like
110+
# fff..... and for RSS it will looks like 1000....
111+
if [[ $delta -lt 32 ]]; then
112+
res=`printf %x $(( $((1 << delta )) - val))`
113+
else
114+
res="$mask32"
115+
fi
116+
delta=$(( delta - 32 ))
117+
118+
# For RPS cond is 0, because (1 << delta) - val is zero
119+
# and we don't need to write most significat zero bits.
120+
while [[ $delta -ge $cond ]]
121+
do
122+
if [[ $delta -lt 32 ]]; then
123+
res=`printf %x $(( $((1 << delta )) - val))`",$res"
124+
else
125+
res="$mask32,$res"
126+
fi
127+
delta=$(( delta - 32 ))
128+
done
129+
130+
echo "$res"
131+
}
132+
133+
make_cpu_rss_mask()
134+
{
135+
delta=$1
136+
# Enable only one CPU (1 << delta)
137+
mask32="00000000"
138+
# Used to calculate (1 << delta) - $val to
139+
# implement only one common functionfor RPS and RSS
140+
val=0
141+
cond=0
142+
143+
echo `make_cpu_mask $delta $mask32 $val $cond`
144+
}
145+
146+
make_cpu_rps_mask()
147+
{
148+
delta=$1
149+
# enable all CPUs, which is less than (1 << delta) - 1
150+
mask32="ffffffff"
151+
# used to calculate (1 << delta) - $val to
152+
# implement only one common functionfor RPS and RSS
153+
val=1
154+
cond=1
155+
156+
echo `make_cpu_mask $delta $mask32 $val $cond`
157+
}
158+
95159
distribute_queues()
96160
{
97161
dev=$1
98162
RXQ_MAX=$2
163+
type=$3
99164

100-
echo "...set rx channels to $RXQ_MAX, please wait..."
101165
# Set maximum number of available channels for better
102166
# packets hashing.
103-
res=$(ethtool -L $dev rx $RXQ_MAX 2>&1)
167+
res=$(ethtool -L $dev $type $RXQ_MAX 2>&1)
104168
if [ $? -ne 0 -a -z "$(echo $res | grep -P '^rx unmodified, ignoring')" ]
105169
then
106-
printf "Error: cannot set new queues count for %s:\n %s\n" \
107-
$dev "$res"
170+
echo "Error: cannot set new queues count for $dev: $res"
108171
return
109172
fi
110173

@@ -135,21 +198,64 @@ distribute_queues()
135198
for i in ${irqs[@]}; do
136199
# Wrap around CPU mask if number of queues is
137200
# larger than CPUS_N.
138-
if [ $(calc "$i - $irq0") -gt $CPUS_N ]; then
201+
if [[ $(calc "$i - $irq0") -gt $CPUS_N ]]; then
139202
irq0=$i;
140203
fi
141-
perl -le '
142-
my $a = 1 << ('$i' - '$irq0');
143-
if ($a <= 0x80000000) {
144-
printf("%x\n", $a)
145-
} else {
146-
$a = $a / 0x100000000;
147-
printf("%x,00000000\n", $a)
148-
}
149-
' > /proc/irq/$i/smp_affinity
204+
delta=$(( i - irq0 ))
205+
mask=`make_cpu_rss_mask $delta`
206+
echo "$mask" > "/proc/irq/$i/smp_affinity"
150207
done
151208

152-
IRQS_GLOB_LIST+=(${irqs[@]})
209+
IRQS_GLOB_LIST=(${irqs[@]})
210+
if [ ${#IRQS_GLOB_LIST[@]} -ne 0 -a -f $SYSD_IRQB_PATH \
211+
-a -f $IRQB_CONF_PATH ]; then
212+
systemctl status irqbalance.service >/dev/null
213+
[ $? -ne 0 ] || irqbalance_ban_irqs
214+
fi
215+
216+
echo "0"
217+
}
218+
219+
distribute_rx_queues()
220+
{
221+
dev=$1
222+
min_queues=$2
223+
error="Rx queues for $dev not found"
224+
225+
queues_str=$(ethtool -l $dev 2>/dev/null \
226+
| grep -m 1 RX | sed -e 's/RX\:\s*//')
227+
queues=$(printf '%d' "$queues_str" 2>/dev/null)
228+
if [ -n "$queues" -a ${queues:-0} -gt $min_queues ]; then
229+
# Switch off RPS for multi-queued interfaces.
230+
for rx in $TFW_NETDEV_PATH/$dev/queues/rx-*; do
231+
echo 0 > $rx/rps_cpus
232+
done
233+
234+
error=`distribute_queues $dev $queues "rx"`
235+
fi
236+
237+
echo "$error"
238+
}
239+
240+
distribute_combined_queues()
241+
{
242+
dev=$1
243+
min_queues=$2
244+
error="Combined queues for $dev not found"
245+
246+
queues_str=$(ethtool -l $dev 2>/dev/null \
247+
| grep -m 1 Combined | sed -e 's/Combined\:\s*//')
248+
queues=$(printf '%d' "$queues_str" 2>/dev/null)
249+
if [ -n "$queues" -a ${queues:-0} -gt $min_queues ]; then
250+
# Switch off RPS for multi-queued interfaces.
251+
for rx in $TFW_NETDEV_PATH/$dev/queues/rx-*; do
252+
echo 0 > $rx/rps_cpus
253+
done
254+
255+
error=`distribute_queues $dev $queues "combined"`
256+
fi
257+
258+
echo "$error"
153259
}
154260

155261
# Enable RSS for networking interfaces. Enable RPS for those devices which
@@ -158,33 +264,30 @@ tfw_set_net_queues()
158264
{
159265
devs=$1
160266
min_queues=$(calc "$CPUS_N / 2")
161-
cpu_mask=$(perl -le 'printf("%x", (1 << '$CPUS_N') - 1)')
267+
cpu_mask=`make_cpu_rps_mask $CPUS_N`
162268

269+
# Iterate over all existing devices and to setup RSS or
270+
# if it is not supported RPS for each device.
163271
for dev in $devs; do
164-
queues_str=$(ethtool -l $dev 2>/dev/null \
165-
| grep -m 1 RX | sed -e 's/RX\:\s*//')
166-
queues=$(printf '%d' "$queues_str" 2>/dev/null)
167-
if [ -n "$queues" -a ${queues:-0} -gt $min_queues ]; then
168-
# Switch off RPS for multi-queued interfaces.
169-
for rx in $TFW_NETDEV_PATH/$dev/queues/rx-*; do
170-
echo 0 > $rx/rps_cpus
171-
done
172-
173-
echo "...distribute $dev queues"
174-
distribute_queues $dev $queues
175-
else
272+
echo "...distribute $dev rx queues"
273+
# First try to setup RX queues.
274+
error=`distribute_rx_queues $dev $min_queues`
275+
if [[ "$error" != "0" ]]; then
276+
echo "$error"
277+
echo "...distribute $dev combined queues"
278+
# If RX queues setup fails try to setup
279+
# combined queues.
280+
error=`distribute_combined_queues $dev $min_queues`
281+
fi
282+
if [[ "$error" != "0" ]]; then
283+
echo "$error"
176284
echo "...enable RPS on $dev"
177-
for rx in $TFW_NETDEV_PATH/$dev/queues/rx-*; do
178-
echo $cpu_mask > $rx/rps_cpus
285+
for rx_queue in $TFW_NETDEV_PATH/$dev/queues/rx-*
286+
do
287+
echo $cpu_mask > $rx_queue/rps_cpus
179288
done
180289
fi
181290
done
182-
183-
if [ ${#IRQS_GLOB_LIST[@]} -ne 0 -a -f $SYSD_IRQB_PATH \
184-
-a -f $IRQB_CONF_PATH ]; then
185-
systemctl status irqbalance.service >/dev/null
186-
[ $? -ne 0 ] || irqbalance_ban_irqs
187-
fi
188291
}
189292

190293
tfw_irqbalance_revert()

0 commit comments

Comments
 (0)