@@ -57,7 +57,7 @@ use std::str::FromStr;
5757#[ cfg( any( kvm, mshv3) ) ]
5858use std:: sync:: atomic:: { AtomicBool , AtomicU8 , AtomicU64 , Ordering } ;
5959#[ cfg( target_os = "windows" ) ]
60- use std:: sync:: atomic:: { AtomicBool , AtomicU8 , Ordering } ;
60+ use std:: sync:: atomic:: { AtomicU8 , Ordering } ;
6161#[ cfg( any( kvm, mshv3) ) ]
6262use std:: time:: Duration ;
6363
@@ -413,8 +413,34 @@ pub(super) struct WindowsInterruptHandle {
413413 /// (e.g., during host function calls), but is cleared at the start of each new `VirtualCPU::run()` call.
414414 state : AtomicU8 ,
415415
416- partition_handle : windows:: Win32 :: System :: Hypervisor :: WHV_PARTITION_HANDLE ,
417- dropped : AtomicBool ,
416+ /// RwLock protecting the partition handle and dropped state.
417+ ///
418+ /// This lock prevents a race condition between `kill()` calling `WHvCancelRunVirtualProcessor`
419+ /// and `WhpVm::drop()` calling `WHvDeletePartition`. These two Windows Hypervisor Platform APIs
420+ /// must not execute concurrently - if `WHvDeletePartition` frees the partition while
421+ /// `WHvCancelRunVirtualProcessor` is still accessing it, the result is a use-after-free
422+ /// causing STATUS_ACCESS_VIOLATION or STATUS_HEAP_CORRUPTION.
423+ ///
424+ /// The synchronization works as follows:
425+ /// - `kill()` takes a read lock before calling `WHvCancelRunVirtualProcessor`
426+ /// - `set_dropped()` takes a write lock, which blocks until all in-flight `kill()` calls complete,
427+ /// then sets `dropped = true`. This is called from `HyperlightVm::drop()` before `WhpVm::drop()`
428+ /// runs, ensuring no `kill()` is accessing the partition when `WHvDeletePartition` is called.
429+ partition_state : std:: sync:: RwLock < PartitionState > ,
430+ }
431+
432+ /// State protected by the RwLock in `WindowsInterruptHandle`.
433+ ///
434+ /// Contains a copy of the partition handle from `WhpVm` (not an owning reference).
435+ /// The RwLock and `dropped` flag ensure this handle is never used after `WhpVm`
436+ /// deletes the partition.
437+ #[ cfg( target_os = "windows" ) ]
438+ #[ derive( Debug ) ]
439+ pub ( super ) struct PartitionState {
440+ /// Copy of partition handle from `WhpVm`. Only valid while `dropped` is false.
441+ pub ( super ) handle : windows:: Win32 :: System :: Hypervisor :: WHV_PARTITION_HANDLE ,
442+ /// Set true before partition deletion; prevents further use of `handle`.
443+ pub ( super ) dropped : bool ,
418444}
419445
420446#[ cfg( target_os = "windows" ) ]
@@ -468,9 +494,20 @@ impl InterruptHandleImpl for WindowsInterruptHandle {
468494 }
469495
470496 fn set_dropped ( & self ) {
471- // Release ordering to ensure all VM cleanup operations are visible
472- // to any thread that checks dropped() via Acquire
473- self . dropped . store ( true , Ordering :: Release ) ;
497+ // Take write lock to:
498+ // 1. Wait for any in-flight kill() calls (holding read locks) to complete
499+ // 2. Block new kill() calls from starting while we hold the write lock
500+ // 3. Set dropped=true so no future kill() calls will use the handle
501+ // After this returns, no WHvCancelRunVirtualProcessor calls are in progress
502+ // or will ever be made, so WHvDeletePartition can safely be called.
503+ match self . partition_state . write ( ) {
504+ Ok ( mut guard) => {
505+ guard. dropped = true ;
506+ }
507+ Err ( e) => {
508+ log:: error!( "Failed to acquire partition_state write lock: {}" , e) ;
509+ }
510+ }
474511 }
475512}
476513
@@ -486,31 +523,65 @@ impl InterruptHandle for WindowsInterruptHandle {
486523 // Acquire ordering to synchronize with the Release in set_running()
487524 // This ensures we see the running state set by the vcpu thread
488525 let state = self . state . load ( Ordering :: Acquire ) ;
489- if state & Self :: RUNNING_BIT != 0 {
490- unsafe { WHvCancelRunVirtualProcessor ( self . partition_handle , 0 , 0 ) . is_ok ( ) }
491- } else {
492- false
526+ if state & Self :: RUNNING_BIT == 0 {
527+ return false ;
493528 }
529+
530+ // Take read lock to prevent race with WHvDeletePartition in set_dropped().
531+ // Multiple kill() calls can proceed concurrently (read locks don't block each other),
532+ // but set_dropped() will wait for all kill() calls to complete before proceeding.
533+ let guard = match self . partition_state . read ( ) {
534+ Ok ( guard) => guard,
535+ Err ( e) => {
536+ log:: error!( "Failed to acquire partition_state read lock: {}" , e) ;
537+ return false ;
538+ }
539+ } ;
540+
541+ if guard. dropped {
542+ return false ;
543+ }
544+
545+ unsafe { WHvCancelRunVirtualProcessor ( guard. handle , 0 , 0 ) . is_ok ( ) }
494546 }
495547 #[ cfg( gdb) ]
496548 fn kill_from_debugger ( & self ) -> bool {
497549 use windows:: Win32 :: System :: Hypervisor :: WHvCancelRunVirtualProcessor ;
498550
499551 self . state
500552 . fetch_or ( Self :: DEBUG_INTERRUPT_BIT , Ordering :: Release ) ;
553+
501554 // Acquire ordering to synchronize with the Release in set_running()
502555 let state = self . state . load ( Ordering :: Acquire ) ;
503- if state & Self :: RUNNING_BIT != 0 {
504- unsafe { WHvCancelRunVirtualProcessor ( self . partition_handle , 0 , 0 ) . is_ok ( ) }
505- } else {
506- false
556+ if state & Self :: RUNNING_BIT == 0 {
557+ return false ;
507558 }
559+
560+ // Take read lock to prevent race with WHvDeletePartition in set_dropped()
561+ let guard = match self . partition_state . read ( ) {
562+ Ok ( guard) => guard,
563+ Err ( e) => {
564+ log:: error!( "Failed to acquire partition_state read lock: {}" , e) ;
565+ return false ;
566+ }
567+ } ;
568+
569+ if guard. dropped {
570+ return false ;
571+ }
572+
573+ unsafe { WHvCancelRunVirtualProcessor ( guard. handle , 0 , 0 ) . is_ok ( ) }
508574 }
509575
510576 fn dropped ( & self ) -> bool {
511- // Acquire ordering to synchronize with the Release in set_dropped()
512- // This ensures we see all VM cleanup operations that happened before drop
513- self . dropped . load ( Ordering :: Acquire )
577+ // Take read lock to check dropped state consistently
578+ match self . partition_state . read ( ) {
579+ Ok ( guard) => guard. dropped ,
580+ Err ( e) => {
581+ log:: error!( "Failed to acquire partition_state read lock: {}" , e) ;
582+ true // Assume dropped if we can't acquire lock
583+ }
584+ }
514585 }
515586}
516587
0 commit comments