Skip to content

Commit 2c97b77

Browse files
Collaplse skbs in socket write queue
- According our investigation collapsing skb in socket write queue significantly increases perfomance for small responses. - Fix `tcp_can_coalesce_send_queue_head`. We should make Tempesta FW checks before checking `len <= skb->len`. - We should also check `SKBTX_SHARED_FRAG` flag because collapsing skbs with defferent value of this flag is prohibited.
1 parent a72e107 commit 2c97b77

File tree

1 file changed

+124
-22
lines changed

1 file changed

+124
-22
lines changed

linux-5.10.35.patch

Lines changed: 124 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2550,7 +2550,7 @@ index f0f67b25c..58fbfb071 100644
25502550
return NULL;
25512551
}
25522552
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
2553-
index f99494637..042f9e38d 100644
2553+
index f99494637..522c265a5 100644
25542554
--- a/net/ipv4/tcp_output.c
25552555
+++ b/net/ipv4/tcp_output.c
25562556
@@ -39,6 +39,9 @@
@@ -2744,27 +2744,108 @@ index f99494637..042f9e38d 100644
27442744

27452745
/* Correct the sequence numbers. */
27462746
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
2747-
@@ -2303,6 +2349,14 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2747+
@@ -2292,24 +2338,186 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
2748+
}
2749+
}
27482750

2749-
if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
2750-
return false;
2751+
+#ifdef CONFIG_SECURITY_TEMPESTA
2752+
+
2753+
+static bool tfw_tcp_skb_can_collapse(struct sock *sk, struct sk_buff *skb,
2754+
+ struct sk_buff *next)
2755+
+{
2756+
+ BUG_ON(!sock_flag(sk, SOCK_TEMPESTA));
2757+
+
2758+
+ if (!tcp_skb_is_last(sk, skb)
2759+
+ && ((skb_tfw_tls_type(skb) != skb_tfw_tls_type(next))
2760+
+ || (skb->mark != next->mark)
2761+
+ || (((skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG)
2762+
+ != (skb_shinfo(next)->tx_flags & SKBTX_SHARED_FRAG)))))
2763+
+ return false;
2764+
+ return true;
2765+
+}
2766+
+
2767+
+#endif
2768+
+
2769+
static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2770+
{
2771+
struct sk_buff *skb, *next;
2772+
2773+
skb = tcp_send_head(sk);
2774+
tcp_for_write_queue_from_safe(skb, next, sk) {
27512775
+#ifdef CONFIG_SECURITY_TEMPESTA
27522776
+ /* Do not coalesce tempesta skbs with tls type or set mark. */
2753-
+ if ((next != ((struct sk_buff *)&(sk)->sk_write_queue))
2754-
+ && ((skb_tfw_tls_type(skb) != skb_tfw_tls_type(next))
2755-
+ || (sock_flag(sk, SOCK_TEMPESTA)
2756-
+ && (skb->mark != next->mark))))
2777+
+ if (sock_flag(sk, SOCK_TEMPESTA)
2778+
+ && !tfw_tcp_skb_can_collapse(sk, skb, next))
27572779
+ return false;
27582780
+#endif
2781+
if (len <= skb->len)
2782+
break;
27592783

2784+
if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
2785+
return false;
2786+
-
27602787
len -= skb->len;
27612788
}
2762-
@@ -2310,6 +2364,76 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
2789+
27632790
return true;
27642791
}
27652792

27662793
+#ifdef CONFIG_SECURITY_TEMPESTA
27672794
+
2795+
+/* First skb in the write queue is smaller than ideal packet size.
2796+
+ * Check if we can move payload from the second skb in the queue.
2797+
+ */
2798+
+static unsigned int tfw_tcp_grow_skb(struct sock *sk, struct sk_buff *skb,
2799+
+ unsigned int amount)
2800+
+{
2801+
+ struct sk_buff *next = skb->next;
2802+
+ unsigned int nlen;
2803+
+ bool stolen;
2804+
+ int delta;
2805+
+
2806+
+ if (tcp_skb_is_last(sk, skb))
2807+
+ return 0;
2808+
+
2809+
+ if (!tfw_tcp_skb_can_collapse(sk, skb, next))
2810+
+ return 0;
2811+
+
2812+
+ if (!tcp_skb_can_collapse(skb, next))
2813+
+ return 0;
2814+
+
2815+
+ nlen = next->len;
2816+
+ BUG_ON(!nlen);
2817+
+
2818+
+ if (amount < nlen)
2819+
+ return 0;
2820+
+
2821+
+ if (amount > nlen
2822+
+ && (unlikely(TCP_SKB_CB(next)->eor) || tcp_has_tx_tstamp(next)))
2823+
+ return 0;
2824+
+
2825+
+ if (!skb_try_coalesce(skb, next, &stolen, &delta))
2826+
+ return 0;
2827+
+
2828+
+ TCP_SKB_CB(skb)->end_seq += nlen;
2829+
+ TCP_SKB_CB(next)->seq += nlen;
2830+
+
2831+
+ if (TCP_SKB_CB(next)->tcp_flags &TCPHDR_FIN)
2832+
+ TCP_SKB_CB(skb)->end_seq++;
2833+
+ /* We've eaten all the data from this skb.
2834+
+ * Throw it away. */
2835+
+ TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next)->tcp_flags;
2836+
+ /* If this is the last SKB we copy and eor is set
2837+
+ * we need to propagate it to the new skb.
2838+
+ */
2839+
+ TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next)->eor;
2840+
+ tcp_skb_collapse_tstamp(skb, next);
2841+
+ tcp_unlink_write_queue(next, sk);
2842+
+ sk_wmem_queued_add(sk, delta - next->truesize);
2843+
+ sk_mem_charge(sk, delta - next->truesize);
2844+
+ kfree_skb_partial(next, stolen);
2845+
+
2846+
+ return nlen;
2847+
+}
2848+
+
27682849
+/**
27692850
+ * The next funtion is called from places: from `tcp_write_xmit`
27702851
+ * (a usual case) and from `tcp_write_wakeup`. In other places where
@@ -2831,12 +2912,27 @@ index f99494637..042f9e38d 100644
28312912
+ max_size -= TLS_MAX_OVERHEAD; \
28322913
+} while(0)
28332914
+
2915+
+static void tfw_coalesce_send_queue(struct sock *sk, struct sk_buff *skb,
2916+
+ unsigned int amount, unsigned int mss_now)
2917+
+{
2918+
+ unsigned int nlen;
2919+
+
2920+
+ if (!sock_flag(sk, SOCK_TEMPESTA))
2921+
+ return;
2922+
+
2923+
+ amount = amount > skb->len ? amount - skb->len : 0;
2924+
+ while (amount && (nlen = tfw_tcp_grow_skb(sk, skb, amount)))
2925+
+ amount -= nlen;
2926+
+
2927+
+ tcp_set_skb_tso_segs(skb, mss_now);
2928+
+}
2929+
+
28342930
+#endif
28352931
+
28362932
/* Create a new MTU probe if we are ready.
28372933
* MTU probe is regularly attempting to increase the path MTU by
28382934
* deliberately sending larger packets. This discovers routing
2839-
@@ -2330,6 +2454,9 @@ static int tcp_mtu_probe(struct sock *sk)
2935+
@@ -2330,6 +2538,9 @@ static int tcp_mtu_probe(struct sock *sk)
28402936
int copy, len;
28412937
int mss_now;
28422938
int interval;
@@ -2846,15 +2942,15 @@ index f99494637..042f9e38d 100644
28462942

28472943
/* Not currently probing/verifying,
28482944
* not in recovery,
2849-
@@ -2382,6 +2509,7 @@ static int tcp_mtu_probe(struct sock *sk)
2945+
@@ -2382,6 +2593,7 @@ static int tcp_mtu_probe(struct sock *sk)
28502946
return 0;
28512947
}
28522948

28532949
+ TFW_ADJUST_TLS_OVERHEAD(probe_size);
28542950
if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
28552951
return -1;
28562952

2857-
@@ -2402,6 +2530,10 @@ static int tcp_mtu_probe(struct sock *sk)
2953+
@@ -2402,6 +2614,10 @@ static int tcp_mtu_probe(struct sock *sk)
28582954
nskb->csum = 0;
28592955
nskb->ip_summed = CHECKSUM_PARTIAL;
28602956

@@ -2865,7 +2961,7 @@ index f99494637..042f9e38d 100644
28652961
tcp_insert_write_queue_before(nskb, skb, sk);
28662962
tcp_highest_sack_replace(sk, skb, nskb);
28672963

2868-
@@ -2440,6 +2572,24 @@ static int tcp_mtu_probe(struct sock *sk)
2964+
@@ -2440,6 +2656,24 @@ static int tcp_mtu_probe(struct sock *sk)
28692965
}
28702966
tcp_init_tso_segs(nskb, nskb->len);
28712967

@@ -2890,7 +2986,7 @@ index f99494637..042f9e38d 100644
28902986
/* We're ready to send. If this fails, the probe will
28912987
* be resegmented into mss-sized pieces by tcp_write_xmit().
28922988
*/
2893-
@@ -2666,7 +2816,17 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2989+
@@ -2666,11 +2900,23 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
28942990
cwnd_quota,
28952991
max_segs),
28962992
nonagle);
@@ -2909,7 +3005,13 @@ index f99494637..042f9e38d 100644
29093005
if (skb->len > limit &&
29103006
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
29113007
break;
2912-
@@ -2681,7 +2841,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
3008+
3009+
+ tfw_coalesce_send_queue(sk, skb, limit, mss_now);
3010+
+
3011+
if (tcp_small_queue_check(sk, skb, 0))
3012+
break;
3013+
3014+
@@ -2681,7 +2927,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
29133015
*/
29143016
if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
29153017
break;
@@ -2924,15 +3026,15 @@ index f99494637..042f9e38d 100644
29243026
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
29253027
break;
29263028

2927-
@@ -2866,6 +3032,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
3029+
@@ -2866,6 +3118,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
29283030
sk_gfp_mask(sk, GFP_ATOMIC)))
29293031
tcp_check_probe_timer(sk);
29303032
}
29313033
+EXPORT_SYMBOL(__tcp_push_pending_frames);
29323034

29333035
/* Send _single_ skb sitting at the send head. This function requires
29343036
* true push pending frames to setup probe timer etc.
2935-
@@ -3183,7 +3350,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
3037+
@@ -3183,7 +3436,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
29363038
cur_mss, GFP_ATOMIC))
29373039
return -ENOMEM; /* We'll try again later. */
29383040
} else {
@@ -2941,31 +3043,31 @@ index f99494637..042f9e38d 100644
29413043
return -ENOMEM;
29423044

29433045
diff = tcp_skb_pcount(skb);
2944-
@@ -3374,6 +3541,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
3046+
@@ -3374,6 +3627,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
29453047
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
29463048
mem_cgroup_charge_skmem(sk->sk_memcg, amt);
29473049
}
29483050
+EXPORT_SYMBOL(sk_forced_mem_schedule);
29493051

29503052
/* Send a FIN. The caller locks the socket for us.
29513053
* We should try to send a FIN packet really hard, but eventually give up.
2952-
@@ -3421,6 +3589,7 @@ void tcp_send_fin(struct sock *sk)
3054+
@@ -3421,6 +3675,7 @@ void tcp_send_fin(struct sock *sk)
29533055
}
29543056
__tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
29553057
}
29563058
+EXPORT_SYMBOL(tcp_send_fin);
29573059

29583060
/* We get here when a process closes a file descriptor (either due to
29593061
* an explicit close() or as a byproduct of exit()'ing) and there
2960-
@@ -3454,6 +3623,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
3062+
@@ -3454,6 +3709,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
29613063
*/
29623064
trace_tcp_send_reset(sk, NULL);
29633065
}
29643066
+EXPORT_SYMBOL(tcp_send_active_reset);
29653067

29663068
/* Send a crossed SYN-ACK during socket establishment.
29673069
* WARNING: This routine must only be called when we have already sent
2968-
@@ -4044,6 +4214,17 @@ int tcp_write_wakeup(struct sock *sk, int mib)
3070+
@@ -4044,6 +4300,17 @@ int tcp_write_wakeup(struct sock *sk, int mib)
29693071
if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
29703072
skb->len > mss) {
29713073
seg_size = min(seg_size, mss);
@@ -2983,7 +3085,7 @@ index f99494637..042f9e38d 100644
29833085
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
29843086
if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
29853087
skb, seg_size, mss, GFP_ATOMIC))
2986-
@@ -4052,6 +4233,15 @@ int tcp_write_wakeup(struct sock *sk, int mib)
3088+
@@ -4052,6 +4319,15 @@ int tcp_write_wakeup(struct sock *sk, int mib)
29873089
tcp_set_skb_tso_segs(skb, mss);
29883090

29893091
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;

0 commit comments

Comments
 (0)