1010from cocalc_api import Hub , Project
1111
1212from psycopg2 import pool as pg_pool
13+ from typing import Callable , TypeVar , Any
1314
1415# Database configuration examples (DRY principle)
1516PGHOST_SOCKET_EXAMPLE = "/path/to/cocalc-data/socket"
1617PGHOST_NETWORK_EXAMPLE = "localhost"
1718
19+ T = TypeVar ('T' )
20+
21+
22+ def retry_with_backoff (
23+ func : Callable [[], T ],
24+ max_retries : int = 3 ,
25+ retry_delay : int = 5 ,
26+ error_condition : Callable [[RuntimeError ], bool ] = lambda e : "timeout" in str (e ).lower (),
27+ ) -> T :
28+ """
29+ Retry a function call with exponential backoff for timeout errors.
30+
31+ This helper is useful for operations that may timeout on first attempt due to
32+ cold starts (e.g., kernel launches).
33+
34+ Args:
35+ func: Callable that performs the operation
36+ max_retries: Maximum number of attempts (default: 3)
37+ retry_delay: Delay in seconds between retries (default: 5)
38+ error_condition: Function to determine if an error should trigger retry.
39+ Defaults to checking for "timeout" in error message.
40+
41+ Returns:
42+ The result of the function call
43+
44+ Raises:
45+ RuntimeError: If all retries fail or error condition doesn't match
46+ """
47+ for attempt in range (max_retries ):
48+ try :
49+ return func ()
50+ except RuntimeError as e :
51+ if error_condition (e ) and attempt < max_retries - 1 :
52+ print (f"Attempt { attempt + 1 } timed out, retrying in { retry_delay } s..." )
53+ time .sleep (retry_delay )
54+ else :
55+ raise
56+
1857
1958def assert_valid_uuid (value , description = "value" ):
2059 """
@@ -125,39 +164,110 @@ def project_client(temporary_project, api_key, cocalc_host):
125164 return Project (project_id = temporary_project ['project_id' ], api_key = api_key , host = cocalc_host )
126165
127166
167+ @pytest .fixture (autouse = True )
168+ def cleanup_kernels_after_test (request , project_client ):
169+ """
170+ Clean up excess Jupyter kernels after test classes that use them.
171+
172+ Kernel accumulation happens because the kernel pool reuses kernels, but under
173+ heavy test load, old kernels aren't always properly cleaned up by the pool.
174+ This fixture cleans up accumulated kernels BETWEEN test classes (not between
175+ individual tests) to avoid interfering with the pool's reuse strategy.
176+
177+ The fixture only runs for tests in classes that deal with Jupyter kernels
178+ (TestJupyterExecuteViaHub, TestJupyterExecuteViaProject, TestJupyterKernelManagement)
179+ to avoid interfering with other tests.
180+ """
181+ yield # Allow test to run
182+
183+ # Only cleanup for Jupyter-related tests
184+ test_class = request .cls
185+ if test_class is None :
186+ return
187+
188+ jupyter_test_classes = {
189+ 'TestJupyterExecuteViaHub' ,
190+ 'TestJupyterExecuteViaProject' ,
191+ 'TestJupyterKernelManagement' ,
192+ }
193+
194+ if test_class .__name__ not in jupyter_test_classes :
195+ return
196+
197+ # Clean up accumulated kernels carefully
198+ # Only cleanup if we have more kernels than the pool can manage (> 3)
199+ # This gives some buffer to the pool's reuse mechanism
200+ try :
201+ import time
202+ kernels = project_client .system .list_jupyter_kernels ()
203+
204+ # Only cleanup if significantly over pool size (pool size is 2)
205+ # We use threshold of 3 to trigger cleanup
206+ if len (kernels ) > 3 :
207+ # Keep the 2 most recent kernels (higher PIDs), stop older ones
208+ kernels_sorted = sorted (kernels , key = lambda k : k .get ("pid" , 0 ))
209+ kernels_to_stop = kernels_sorted [:- 2 ] # All but the 2 newest
210+
211+ for kernel in kernels_to_stop :
212+ try :
213+ project_client .system .stop_jupyter_kernel (pid = kernel ["pid" ])
214+ time .sleep (0.1 ) # Small delay between kills
215+ except Exception :
216+ # Silently ignore individual kernel failures
217+ pass
218+ except Exception :
219+ # If listing kernels fails, just continue
220+ pass
221+
222+
128223def ensure_python3_kernel (project_client : Project ):
129224 """
130225 Ensure the default python3 Jupyter kernel is installed in the project.
131226
132227 If not available, install ipykernel and register the kernelspec.
133228 """
134229
135- def has_python_kernel () -> bool :
230+ def try_exec ( command : list [ str ], timeout : int = 60 , capture_stdout : bool = False ) :
136231 try :
137232 result = project_client .system .exec (
138- command = "python3" ,
139- args = [ "-m" , "jupyter" , "kernelspec" , "list" , "--json" ],
140- timeout = 60 ,
233+ command = command [ 0 ] ,
234+ args = command [ 1 : ],
235+ timeout = timeout ,
141236 )
142- data = json .loads (result ["stdout" ])
143- kernelspecs = data .get ("kernelspecs" , {})
144- return "python3" in kernelspecs
237+ return (True , result ["stdout" ] if capture_stdout else None )
238+ except Exception as err :
239+ print (f"Warning: command { command } failed: { err } " )
240+ return (False , None )
241+
242+ def has_python_kernel () -> bool :
243+ ok , stdout = try_exec (
244+ ["python3" , "-m" , "jupyter" , "kernelspec" , "list" , "--json" ],
245+ capture_stdout = True ,
246+ )
247+ if not ok or stdout is None :
248+ return False
249+ try :
250+ data = json .loads (stdout )
251+ return "python3" in data .get ("kernelspecs" , {})
145252 except Exception as err :
146- print (f"Warning: Failed to list kernelspecs : { err } " )
253+ print (f"Warning: Failed to parse kernelspec list : { err } " )
147254 return False
148255
149256 if has_python_kernel ():
150257 return
151258
152259 print ("Installing python3 kernelspec in project..." )
153- project_client .system .exec (
154- command = "python3" ,
155- args = ["-m" , "pip" , "install" , "--user" , "ipykernel" ],
156- timeout = 300 ,
157- )
158- project_client .system .exec (
159- command = "python3" ,
160- args = [
260+ # Install pip if needed
261+ try_exec (["python3" , "-m" , "ensurepip" , "--user" ], timeout = 120 )
262+ # Upgrade pip but ignore errors (not fatal)
263+ try_exec (["python3" , "-m" , "pip" , "install" , "--user" , "--upgrade" , "pip" ], timeout = 120 )
264+
265+ if not try_exec (["python3" , "-m" , "pip" , "install" , "--user" , "ipykernel" ], timeout = 300 ):
266+ raise RuntimeError ("Failed to install ipykernel via pip" )
267+
268+ if not try_exec (
269+ [
270+ "python3" ,
161271 "-m" ,
162272 "ipykernel" ,
163273 "install" ,
@@ -166,7 +276,8 @@ def has_python_kernel() -> bool:
166276 "--display-name=Python 3" ,
167277 ],
168278 timeout = 120 ,
169- )
279+ ):
280+ raise RuntimeError ("Failed to install python3 kernelspec" )
170281
171282 if not has_python_kernel ():
172283 raise RuntimeError ("Failed to ensure python3 kernelspec is installed in project" )
@@ -500,3 +611,39 @@ def cleanup():
500611 request .addfinalizer (cleanup )
501612
502613 yield
614+
615+
616+ @pytest .fixture (scope = "session" , autouse = True )
617+ def cleanup_jupyter_kernels_session (project_client ):
618+ """
619+ Clean up all Jupyter kernels created during the test session.
620+
621+ This session-scoped fixture ensures that all kernels spawned during testing
622+ are properly terminated at the end of the test session. This prevents
623+ orphaned processes from accumulating in the system.
624+
625+ The fixture runs AFTER all tests complete (via yield), ensuring no
626+ interference with test execution while still guaranteeing cleanup.
627+ """
628+ yield # Allow all tests to run first
629+
630+ # After all tests complete, clean up all remaining kernels
631+ try :
632+ kernels = project_client .system .list_jupyter_kernels ()
633+ if kernels :
634+ print (f"\n { '=' * 70 } " )
635+ print (f"CLEANING UP { len (kernels )} JUPYTER KERNELS FROM TEST SESSION" )
636+ print (f"{ '=' * 70 } " )
637+ for kernel in kernels :
638+ try :
639+ pid = kernel .get ("pid" )
640+ result = project_client .system .stop_jupyter_kernel (pid = pid )
641+ if result .get ("success" ):
642+ print (f"✓ Stopped kernel PID { pid } " )
643+ else :
644+ print (f"✗ Failed to stop kernel PID { pid } " )
645+ except Exception as e :
646+ print (f"✗ Error stopping kernel: { e } " )
647+ print (f"{ '=' * 70 } \n " )
648+ except Exception as e :
649+ print (f"Warning: Failed to clean up jupyter kernels: { e } " )
0 commit comments