3030
3131#define TFW_MAX_CLOSED_STREAMS 5
3232
33+ static struct kmem_cache * stream_sched_cache ;
34+
35+ static int
36+ tfw_h2_stream_sched_cache_create (void )
37+ {
38+ stream_sched_cache = kmem_cache_create ("tfw_stream_sched_cache" ,
39+ sizeof (TfwStreamSchedEntry ),
40+ 0 , 0 , NULL );
41+ if (!stream_sched_cache )
42+ return - ENOMEM ;
43+
44+ return 0 ;
45+ }
46+
47+ static void
48+ tfw_h2_stream_sched_cache_destroy (void )
49+ {
50+ kmem_cache_destroy (stream_sched_cache );
51+ }
52+
3353/**
3454 * Usually client firstly send SETTINGS frame to a server, so:
3555 * - we don't have many streams to iterate over in this function
4060static void
4161tfw_h2_apply_wnd_sz_change (TfwH2Ctx * ctx , long int delta )
4262{
43- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
63+ TfwH2Conn * conn = ctx -> conn ;
4464 TfwStream * stream , * next ;
4565
4666 /*
@@ -71,7 +91,7 @@ static void
7191tfw_h2_apply_settings_entry (TfwH2Ctx * ctx , unsigned short id ,
7292 unsigned int val )
7393{
74- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
94+ TfwH2Conn * conn = ctx -> conn ;
7595 TfwSettings * dest = & ctx -> rsettings ;
7696 long int delta ;
7797
@@ -120,7 +140,7 @@ tfw_h2_apply_settings_entry(TfwH2Ctx *ctx, unsigned short id,
120140int
121141tfw_h2_check_settings_entry (TfwH2Ctx * ctx , unsigned short id , unsigned int val )
122142{
123- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
143+ TfwH2Conn * conn = ctx -> conn ;
124144
125145 assert_spin_locked (& ((TfwConn * )conn )-> sk -> sk_lock .slock );
126146
@@ -163,7 +183,7 @@ tfw_h2_check_settings_entry(TfwH2Ctx *ctx, unsigned short id, unsigned int val)
163183void
164184tfw_h2_save_settings_entry (TfwH2Ctx * ctx , unsigned short id , unsigned int val )
165185{
166- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
186+ TfwH2Conn * conn = ctx -> conn ;
167187
168188 assert_spin_locked (& ((TfwConn * )conn )-> sk -> sk_lock .slock );
169189
@@ -178,7 +198,7 @@ tfw_h2_save_settings_entry(TfwH2Ctx *ctx, unsigned short id, unsigned int val)
178198void
179199tfw_h2_apply_new_settings (TfwH2Ctx * ctx )
180200{
181- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
201+ TfwH2Conn * conn = ctx -> conn ;
182202 unsigned int id ;
183203
184204 assert_spin_locked (& ((TfwConn * )conn )-> sk -> sk_lock .slock );
@@ -195,24 +215,85 @@ tfw_h2_apply_new_settings(TfwH2Ctx *ctx)
195215int
196216tfw_h2_init (void )
197217{
198- return tfw_h2_stream_cache_create ();
218+ int r ;
219+
220+ r = tfw_h2_stream_cache_create ();
221+ if (unlikely (r ))
222+ return r ;
223+
224+ r = tfw_h2_stream_sched_cache_create ();
225+ if (unlikely (r )) {
226+ tfw_h2_stream_cache_destroy ();
227+ return r ;
228+ }
229+
230+ return 0 ;
199231}
200232
201233void
202234tfw_h2_cleanup (void )
203235{
236+ tfw_h2_stream_sched_cache_destroy ();
204237 tfw_h2_stream_cache_destroy ();
205238}
206239
240+ static inline void
241+ tfw_h2_context_init_sched_entry_list (TfwH2Ctx * ctx , TfwStreamSchedList * list )
242+ {
243+ int i ;
244+
245+ #define LIST_SZ_DFLT \
246+ (PAGE_SIZE - sizeof(TfwH2Ctx)) / sizeof(TfwStreamSchedEntry)
247+
248+ for (i = LIST_SZ_DFLT - 1 ; i >= 0 ; i -- ) {
249+ list -> entries [i ].owner = NULL ;
250+ list -> entries [i ].next_free = ctx -> sched .free_list ;
251+ ctx -> sched .free_list = & list -> entries [i ];
252+ }
253+
254+ #undef LIST_SZ_DFLT
255+ }
256+
257+ TfwH2Ctx *
258+ tfw_h2_context_alloc (void )
259+ {
260+ struct page * page ;
261+
262+ /*
263+ * Tempesta FW allocates the whole page for http2 context.
264+ * and uses extra memory for streams schedulers for better
265+ * memory locality. Tempesta FW preallocate only streams
266+ * schedulers not the whole streams, because size of
267+ * TfwStream structure is very big. There is no sence
268+ * to preallocate only two streams (count of streams that
269+ * can fit into the remaining memory on the page) and we
270+ * can loose too much memory if we preallocate 10 -15
271+ * streams.
272+ */
273+ page = alloc_page (GFP_ATOMIC );
274+ if (!page )
275+ return NULL ;
276+ return page_address (page );
277+ }
278+
279+ void
280+ tfw_h2_context_free (TfwH2Ctx * ctx )
281+ {
282+ free_page ((unsigned long )ctx );
283+ }
284+
207285int
208- tfw_h2_context_init (TfwH2Ctx * ctx )
286+ tfw_h2_context_init (TfwH2Ctx * ctx , TfwH2Conn * conn )
209287{
210288 TfwStreamQueue * closed_streams = & ctx -> closed_streams ;
211289 TfwStreamQueue * idle_streams = & ctx -> idle_streams ;
212290 TfwSettings * lset = & ctx -> lsettings ;
213291 TfwSettings * rset = & ctx -> rsettings ;
292+ TfwStreamSchedList * list =
293+ (TfwStreamSchedList * )((char * )ctx + sizeof (TfwH2Ctx ));
214294
215- bzero_fast (ctx , sizeof (* ctx ));
295+ BUG_ON (!conn || conn -> h2 != ctx );
296+ bzero_fast (ctx , sizeof (* ctx ) + sizeof (* list ));
216297
217298 ctx -> state = HTTP2_RECV_CLI_START_SEQ ;
218299 ctx -> loc_wnd = DEF_WND_SIZE ;
@@ -223,6 +304,7 @@ tfw_h2_context_init(TfwH2Ctx *ctx)
223304 INIT_LIST_HEAD (& idle_streams -> list );
224305
225306 tfw_h2_init_stream_sched (& ctx -> sched );
307+ tfw_h2_context_init_sched_entry_list (ctx , list );
226308
227309 lset -> hdr_tbl_sz = rset -> hdr_tbl_sz = HPACK_TABLE_DEF_SIZE ;
228310 lset -> push = rset -> push = 1 ;
@@ -235,6 +317,7 @@ tfw_h2_context_init(TfwH2Ctx *ctx)
235317
236318 lset -> wnd_sz = DEF_WND_SIZE ;
237319 rset -> wnd_sz = DEF_WND_SIZE ;
320+ ctx -> conn = conn ;
238321
239322 return tfw_hpack_init (& ctx -> hpack , HPACK_TABLE_DEF_SIZE );
240323}
@@ -251,11 +334,65 @@ tfw_h2_context_clear(TfwH2Ctx *ctx)
251334 tfw_hpack_clean (& ctx -> hpack );
252335}
253336
337+
338+ TfwStreamSchedEntry *
339+ tfw_h2_alloc_stream_sched_entry (TfwH2Ctx * ctx )
340+ {
341+ TfwStreamSchedEntry * entry ;
342+
343+ if (unlikely (!ctx -> sched .free_list )) {
344+ /*
345+ * If count of preallocate streams schedulers (56) is
346+ * exceeded use standart kernel allocator. There is no
347+ * sense to allocate the whole page for the new schedulers
348+ * or use special cache for this purpose, because it is a
349+ * very rare case (browsers usually open not more then
350+ * 15 - 20 streams in parallel even if there are much
351+ * more resourses to request). TfwStreamSchedEntry is
352+ * small (64 bytes), so use special cache for allocation.
353+ */
354+ entry = kmem_cache_alloc (stream_sched_cache ,
355+ GFP_ATOMIC | __GFP_ZERO );
356+ } else {
357+ entry = ctx -> sched .free_list ;
358+ BUG_ON (entry -> owner );
359+ ctx -> sched .free_list = entry -> next_free ;
360+ }
361+
362+ return entry ;
363+ }
364+
365+ static inline bool
366+ tfw_h2_stream_sched_is_dflt (TfwH2Ctx * ctx , TfwStreamSchedEntry * entry )
367+ {
368+ char * begin = (char * )ctx + sizeof (TfwH2Ctx );
369+ char * end = (char * )ctx + PAGE_SIZE ;
370+ char * p = (char * )entry ;
371+
372+ return p >= begin && p < end ;
373+ }
374+
375+ void
376+ tfw_h2_free_stream_sched_entry (TfwH2Ctx * ctx , TfwStreamSchedEntry * entry )
377+ {
378+ if (entry -> owner ) {
379+ entry -> owner -> sched = NULL ;
380+ entry -> owner = NULL ;
381+ }
382+
383+ if (likely (tfw_h2_stream_sched_is_dflt (ctx , entry ))) {
384+ entry -> next_free = ctx -> sched .free_list ;
385+ ctx -> sched .free_list = entry ;
386+ } else {
387+ kmem_cache_free (stream_sched_cache , entry );
388+ }
389+ }
390+
254391void
255392tfw_h2_conn_terminate_close (TfwH2Ctx * ctx , TfwH2Err err_code , bool close ,
256393 bool attack )
257394{
258- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
395+ TfwH2Conn * conn = ctx -> conn ;
259396
260397 if (tfw_h2_send_goaway (ctx , err_code , attack ) && close )
261398 tfw_connection_close ((TfwConn * )conn , true);
291428tfw_h2_conn_streams_cleanup (TfwH2Ctx * ctx )
292429{
293430 TfwStream * cur , * next ;
294- TfwH2Conn * conn = container_of ( ctx , TfwH2Conn , h2 ) ;
431+ TfwH2Conn * conn = ctx -> conn ;
295432 TfwStreamSched * sched = & ctx -> sched ;
296433
297434 WARN_ON_ONCE (((TfwConn * )conn )-> stream .msg );
@@ -307,7 +444,7 @@ tfw_h2_conn_streams_cleanup(TfwH2Ctx *ctx)
307444 * No further actions regarding streams dependencies/prio
308445 * is required at this stage.
309446 */
310- tfw_h2_delete_stream (cur );
447+ tfw_h2_delete_stream (ctx , cur );
311448 -- ctx -> streams_num ;
312449 }
313450 sched -> streams = RB_ROOT ;
0 commit comments