Skip to content

Commit 8ceef49

Browse files
authored
fix(windows): prevent race between WHvCancelRunVirtualProcessor and WHvDeletePartition (#1101)
kill() could call WHvCancelRunVirtualProcessor while WhpVm::drop() was calling WHvDeletePartition, causing use-after-free crashes (STATUS_ACCESS_VIOLATION or STATUS_HEAP_CORRUPTION). Fix by protecting the partition handle with an RwLock. kill() takes a read lock, and set_dropped() takes a write lock to block until all in-flight cancel calls complete before the partition is deleted. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com>
1 parent 33afe65 commit 8ceef49

File tree

4 files changed

+157
-22
lines changed

4 files changed

+157
-22
lines changed

src/hyperlight_host/src/hypervisor/hyperlight_vm.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,22 @@ use std::collections::HashMap;
1919
#[cfg(crashdump)]
2020
use std::path::Path;
2121
#[cfg(any(kvm, mshv3))]
22+
use std::sync::atomic::AtomicBool;
23+
use std::sync::atomic::AtomicU8;
24+
#[cfg(any(kvm, mshv3))]
2225
use std::sync::atomic::AtomicU64;
23-
use std::sync::atomic::{AtomicBool, AtomicU8};
2426
use std::sync::{Arc, Mutex};
2527

2628
use log::LevelFilter;
2729
use tracing::{Span, instrument};
2830
#[cfg(feature = "trace_guest")]
2931
use tracing_opentelemetry::OpenTelemetrySpanExt;
3032

31-
#[cfg(target_os = "windows")]
32-
use super::WindowsInterruptHandle;
3333
#[cfg(gdb)]
3434
use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, DebuggableVm, VcpuStopReason, arch};
3535
use super::regs::{CommonFpu, CommonRegisters};
36+
#[cfg(target_os = "windows")]
37+
use super::{PartitionState, WindowsInterruptHandle};
3638
use crate::HyperlightError::{ExecutionCanceledByHost, NoHypervisorFound};
3739
#[cfg(not(gdb))]
3840
use crate::hypervisor::Hypervisor;
@@ -168,8 +170,10 @@ impl HyperlightVm {
168170
#[cfg(target_os = "windows")]
169171
let interrupt_handle: Arc<dyn InterruptHandleImpl> = Arc::new(WindowsInterruptHandle {
170172
state: AtomicU8::new(0),
171-
partition_handle: vm.partition_handle(),
172-
dropped: AtomicBool::new(false),
173+
partition_state: std::sync::RwLock::new(PartitionState {
174+
handle: vm.partition_handle(),
175+
dropped: false,
176+
}),
173177
});
174178

175179
#[cfg_attr(not(gdb), allow(unused_mut))]

src/hyperlight_host/src/hypervisor/hyperv_windows.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,11 @@ impl DebuggableVm for WhpVm {
669669

670670
impl Drop for WhpVm {
671671
fn drop(&mut self) {
672+
// HyperlightVm::drop() calls set_dropped() before this runs.
673+
// set_dropped() ensures no WHvCancelRunVirtualProcessor calls are in progress
674+
// or will be made in the future, so it's safe to delete the partition.
675+
// (HyperlightVm::drop() runs before its fields are dropped, so
676+
// set_dropped() completes before this Drop impl runs.)
672677
if let Err(e) = unsafe { WHvDeletePartition(self.partition) } {
673678
log::error!("Failed to delete partition: {}", e);
674679
}

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 88 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ use std::str::FromStr;
5757
#[cfg(any(kvm, mshv3))]
5858
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering};
5959
#[cfg(target_os = "windows")]
60-
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
60+
use std::sync::atomic::{AtomicU8, Ordering};
6161
#[cfg(any(kvm, mshv3))]
6262
use std::time::Duration;
6363

@@ -413,8 +413,34 @@ pub(super) struct WindowsInterruptHandle {
413413
/// (e.g., during host function calls), but is cleared at the start of each new `VirtualCPU::run()` call.
414414
state: AtomicU8,
415415

416-
partition_handle: windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE,
417-
dropped: AtomicBool,
416+
/// RwLock protecting the partition handle and dropped state.
417+
///
418+
/// This lock prevents a race condition between `kill()` calling `WHvCancelRunVirtualProcessor`
419+
/// and `WhpVm::drop()` calling `WHvDeletePartition`. These two Windows Hypervisor Platform APIs
420+
/// must not execute concurrently - if `WHvDeletePartition` frees the partition while
421+
/// `WHvCancelRunVirtualProcessor` is still accessing it, the result is a use-after-free
422+
/// causing STATUS_ACCESS_VIOLATION or STATUS_HEAP_CORRUPTION.
423+
///
424+
/// The synchronization works as follows:
425+
/// - `kill()` takes a read lock before calling `WHvCancelRunVirtualProcessor`
426+
/// - `set_dropped()` takes a write lock, which blocks until all in-flight `kill()` calls complete,
427+
/// then sets `dropped = true`. This is called from `HyperlightVm::drop()` before `WhpVm::drop()`
428+
/// runs, ensuring no `kill()` is accessing the partition when `WHvDeletePartition` is called.
429+
partition_state: std::sync::RwLock<PartitionState>,
430+
}
431+
432+
/// State protected by the RwLock in `WindowsInterruptHandle`.
433+
///
434+
/// Contains a copy of the partition handle from `WhpVm` (not an owning reference).
435+
/// The RwLock and `dropped` flag ensure this handle is never used after `WhpVm`
436+
/// deletes the partition.
437+
#[cfg(target_os = "windows")]
438+
#[derive(Debug)]
439+
pub(super) struct PartitionState {
440+
/// Copy of partition handle from `WhpVm`. Only valid while `dropped` is false.
441+
pub(super) handle: windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE,
442+
/// Set true before partition deletion; prevents further use of `handle`.
443+
pub(super) dropped: bool,
418444
}
419445

420446
#[cfg(target_os = "windows")]
@@ -468,9 +494,20 @@ impl InterruptHandleImpl for WindowsInterruptHandle {
468494
}
469495

470496
fn set_dropped(&self) {
471-
// Release ordering to ensure all VM cleanup operations are visible
472-
// to any thread that checks dropped() via Acquire
473-
self.dropped.store(true, Ordering::Release);
497+
// Take write lock to:
498+
// 1. Wait for any in-flight kill() calls (holding read locks) to complete
499+
// 2. Block new kill() calls from starting while we hold the write lock
500+
// 3. Set dropped=true so no future kill() calls will use the handle
501+
// After this returns, no WHvCancelRunVirtualProcessor calls are in progress
502+
// or will ever be made, so WHvDeletePartition can safely be called.
503+
match self.partition_state.write() {
504+
Ok(mut guard) => {
505+
guard.dropped = true;
506+
}
507+
Err(e) => {
508+
log::error!("Failed to acquire partition_state write lock: {}", e);
509+
}
510+
}
474511
}
475512
}
476513

@@ -486,31 +523,65 @@ impl InterruptHandle for WindowsInterruptHandle {
486523
// Acquire ordering to synchronize with the Release in set_running()
487524
// This ensures we see the running state set by the vcpu thread
488525
let state = self.state.load(Ordering::Acquire);
489-
if state & Self::RUNNING_BIT != 0 {
490-
unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
491-
} else {
492-
false
526+
if state & Self::RUNNING_BIT == 0 {
527+
return false;
493528
}
529+
530+
// Take read lock to prevent race with WHvDeletePartition in set_dropped().
531+
// Multiple kill() calls can proceed concurrently (read locks don't block each other),
532+
// but set_dropped() will wait for all kill() calls to complete before proceeding.
533+
let guard = match self.partition_state.read() {
534+
Ok(guard) => guard,
535+
Err(e) => {
536+
log::error!("Failed to acquire partition_state read lock: {}", e);
537+
return false;
538+
}
539+
};
540+
541+
if guard.dropped {
542+
return false;
543+
}
544+
545+
unsafe { WHvCancelRunVirtualProcessor(guard.handle, 0, 0).is_ok() }
494546
}
495547
#[cfg(gdb)]
496548
fn kill_from_debugger(&self) -> bool {
497549
use windows::Win32::System::Hypervisor::WHvCancelRunVirtualProcessor;
498550

499551
self.state
500552
.fetch_or(Self::DEBUG_INTERRUPT_BIT, Ordering::Release);
553+
501554
// Acquire ordering to synchronize with the Release in set_running()
502555
let state = self.state.load(Ordering::Acquire);
503-
if state & Self::RUNNING_BIT != 0 {
504-
unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
505-
} else {
506-
false
556+
if state & Self::RUNNING_BIT == 0 {
557+
return false;
507558
}
559+
560+
// Take read lock to prevent race with WHvDeletePartition in set_dropped()
561+
let guard = match self.partition_state.read() {
562+
Ok(guard) => guard,
563+
Err(e) => {
564+
log::error!("Failed to acquire partition_state read lock: {}", e);
565+
return false;
566+
}
567+
};
568+
569+
if guard.dropped {
570+
return false;
571+
}
572+
573+
unsafe { WHvCancelRunVirtualProcessor(guard.handle, 0, 0).is_ok() }
508574
}
509575

510576
fn dropped(&self) -> bool {
511-
// Acquire ordering to synchronize with the Release in set_dropped()
512-
// This ensures we see all VM cleanup operations that happened before drop
513-
self.dropped.load(Ordering::Acquire)
577+
// Take read lock to check dropped state consistently
578+
match self.partition_state.read() {
579+
Ok(guard) => guard.dropped,
580+
Err(e) => {
581+
log::error!("Failed to acquire partition_state read lock: {}", e);
582+
true // Assume dropped if we can't acquire lock
583+
}
584+
}
514585
}
515586
}
516587

src/hyperlight_host/tests/integration_test.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,3 +1664,58 @@ fn exception_handler_installation_and_validation() {
16641664
let count: i32 = sandbox.call("GetExceptionHandlerCallCount", ()).unwrap();
16651665
assert_eq!(count, 2, "Handler should have been called twice");
16661666
}
1667+
1668+
/// This test is "likely" to catch a race condition where WHvCancelRunVirtualProcessor runs halfway, then the partition is deleted (by drop calling WHvDeletePartition),
1669+
/// and WHvCancelRunVirtualProcessor continues, and tries to access freed memory.
1670+
///
1671+
/// Based on local observations, "likely" means that if the bug exist, running this test 5 times will catch it at least once.
1672+
#[test]
1673+
#[cfg(target_os = "windows")]
1674+
fn interrupt_cancel_delete_race() {
1675+
const NUM_THREADS: usize = 8;
1676+
const NUM_KILL_THREADS: usize = 4;
1677+
const ITERATIONS_PER_THREAD: usize = 1000;
1678+
1679+
let mut handles = vec![];
1680+
1681+
for _ in 0..NUM_THREADS {
1682+
handles.push(thread::spawn(move || {
1683+
for _ in 0..ITERATIONS_PER_THREAD {
1684+
let mut sandbox: MultiUseSandbox = new_uninit().unwrap().evolve().unwrap();
1685+
let interrupt_handle = sandbox.interrupt_handle();
1686+
1687+
let stop_flag = Arc::new(AtomicBool::new(false));
1688+
1689+
let kill_handles: Vec<_> = (0..NUM_KILL_THREADS)
1690+
.map(|_| {
1691+
let handle = interrupt_handle.clone();
1692+
let stop = stop_flag.clone();
1693+
thread::spawn(move || {
1694+
while !stop.load(Ordering::Relaxed) {
1695+
handle.kill();
1696+
}
1697+
})
1698+
})
1699+
.collect();
1700+
1701+
// Makes sure RUNNING_BIT is set when kill() is called
1702+
let _ = sandbox.call::<String>("Echo", "test".to_string());
1703+
1704+
// Drop the sandbox while kill threads are spamming
1705+
drop(sandbox);
1706+
1707+
// Signal kill threads to stop
1708+
stop_flag.store(true, Ordering::Relaxed);
1709+
1710+
// Wait for kill threads
1711+
for kill_handle in kill_handles {
1712+
kill_handle.join().expect("Kill thread panicked!");
1713+
}
1714+
}
1715+
}));
1716+
}
1717+
1718+
for handle in handles {
1719+
handle.join().unwrap();
1720+
}
1721+
}

0 commit comments

Comments
 (0)