From ca211aeb581894097acfe3cdb0af86847b19f52f Mon Sep 17 00:00:00 2001 From: Lucy Menon <168595099+syntactically@users.noreply.github.com> Date: Tue, 29 Jul 2025 14:49:26 +0100 Subject: [PATCH 1/3] Unify page table manipulation code between the guest and the host Currently, the guest and the host both have code that manipulates architecture-specific page table structures: the guest has a general map operation, and the host has a much more specific routine that builds an identity map. As we move to more complex virtual memory configurations in the guest, the host will need the ability to build more complex mappings in the guest, so this commit removes the simple implementation in the host, and replaces it with calls to the implementation originally written for the guest (now moved to `hyperlight_common` and factored into an architecture-independent interface and architecture-specific code parts). Signed-off-by: Simon Davies --- src/hyperlight_common/src/arch/amd64/vm.rs | 217 ++++++++++++++++ src/hyperlight_common/src/lib.rs | 2 + src/hyperlight_common/src/vm.rs | 131 ++++++++++ src/hyperlight_guest_bin/src/paging.rs | 238 ++++-------------- src/hyperlight_host/src/mem/layout.rs | 35 +-- src/hyperlight_host/src/mem/memory_region.rs | 28 --- src/hyperlight_host/src/mem/mgr.rs | 238 +++++++----------- .../src/sandbox/uninitialized_evolve.rs | 5 +- 8 files changed, 495 insertions(+), 399 deletions(-) create mode 100644 src/hyperlight_common/src/arch/amd64/vm.rs create mode 100644 src/hyperlight_common/src/vm.rs diff --git a/src/hyperlight_common/src/arch/amd64/vm.rs b/src/hyperlight_common/src/arch/amd64/vm.rs new file mode 100644 index 000000000..cb7aff1e6 --- /dev/null +++ b/src/hyperlight_common/src/arch/amd64/vm.rs @@ -0,0 +1,217 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +use crate::vm::{Mapping, MappingKind, TableOps}; + +#[inline(always)] +/// Utility function to extract an (inclusive on both ends) bit range +/// from a quadword. +fn bits(x: u64) -> u64 { + (x & ((1 << (HIGH_BIT + 1)) - 1)) >> LOW_BIT +} + +/// A helper structure indicating a mapping operation that needs to be +/// performed +struct MapRequest { + table_base: T, + vmin: VirtAddr, + len: u64, +} + +/// A helper structure indicating that a particular PTE needs to be +/// modified +struct MapResponse { + entry_ptr: T, + vmin: VirtAddr, + len: u64, +} + +struct ModifyPteIterator { + request: MapRequest, + n: u64, +} +impl Iterator + for ModifyPteIterator +{ + type Item = MapResponse; + fn next(&mut self) -> Option { + if (self.n << LOW_BIT) >= self.request.len { + return None; + } + // next stage parameters + let mut next_vmin = self.request.vmin + (self.n << LOW_BIT); + let lower_bits_mask = (1 << LOW_BIT) - 1; + if self.n > 0 { + next_vmin &= !lower_bits_mask; + } + let entry_ptr = Op::entry_addr( + self.request.table_base, + bits::(next_vmin) << 3, + ); + let len_from_here = self.request.len - (next_vmin - self.request.vmin); + let max_len = (1 << LOW_BIT) - (next_vmin & lower_bits_mask); + let next_len = core::cmp::min(len_from_here, max_len); + + // update our state + self.n += 1; + + Some(MapResponse { + entry_ptr, + vmin: next_vmin, + len: next_len, + }) + } +} +fn modify_ptes( + r: MapRequest, +) -> ModifyPteIterator { + ModifyPteIterator { request: r, n: 0 } +} + +/// Page-mapping callback to allocate a next-level page table if necessary. +/// # Safety +/// This function modifies page table data structures, and should not be called concurrently +/// with any other operations that modify the page tables. +unsafe fn alloc_pte_if_needed( + op: &Op, + x: MapResponse, +) -> MapRequest { + let pte = unsafe { op.read_entry(x.entry_ptr) }; + let present = pte & 0x1; + if present != 0 { + return MapRequest { + table_base: Op::from_phys(pte & !0xfff), + vmin: x.vmin, + len: x.len, + }; + } + + let page_addr = unsafe { op.alloc_table() }; + + #[allow(clippy::identity_op)] + #[allow(clippy::precedence)] + let pte = Op::to_phys(page_addr) | + 1 << 5 | // A - we don't track accesses at table level + 0 << 4 | // PCD - leave caching enabled + 0 << 3 | // PWT - write-back + 1 << 2 | // U/S - allow user access to everything (for now) + 1 << 1 | // R/W - we don't use block-level permissions + 1 << 0; // P - this entry is present + unsafe { op.write_entry(x.entry_ptr, pte) }; + MapRequest { + table_base: page_addr, + vmin: x.vmin, + len: x.len, + } +} + +/// Map a normal memory page +/// # Safety +/// This function modifies page table data structures, and should not be called concurrently +/// with any other operations that modify the page tables. +#[allow(clippy::identity_op)] +#[allow(clippy::precedence)] +unsafe fn map_page(op: &Op, mapping: &Mapping, r: MapResponse) { + let pte = match &mapping.kind { + MappingKind::BasicMapping(bm) => + // TODO: Support not readable + { + (mapping.phys_base + (r.vmin - mapping.virt_base)) | + (!bm.executable as u64) << 63 | // NX - no execute unless allowed + 1 << 7 | // 1 - RES1 according to manual + 1 << 6 | // D - we don't presently track dirty state for anything + 1 << 5 | // A - we don't presently track access for anything + 0 << 4 | // PCD - leave caching enabled + 0 << 3 | // PWT - write-back + 1 << 2 | // U/S - allow user access to everything (for now) + (bm.writable as u64) << 1 | // R/W - for now make everything r/w + 1 << 0 // P - this entry is present + } + }; + unsafe { + op.write_entry(r.entry_ptr, pte); + } +} + +// There are no notable architecture-specific safety considerations +// here, and the general conditions are documented in the +// architecture-independent re-export in vm.rs +#[allow(clippy::missing_safety_doc)] +pub unsafe fn map(op: &Op, mapping: Mapping) { + modify_ptes::<47, 39, Op>(MapRequest { + table_base: op.root_table(), + vmin: mapping.virt_base, + len: mapping.len, + }) + .map(|r| unsafe { alloc_pte_if_needed(op, r) }) + .flat_map(modify_ptes::<38, 30, Op>) + .map(|r| unsafe { alloc_pte_if_needed(op, r) }) + .flat_map(modify_ptes::<29, 21, Op>) + .map(|r| unsafe { alloc_pte_if_needed(op, r) }) + .flat_map(modify_ptes::<20, 12, Op>) + .map(|r| unsafe { map_page(op, &mapping, r) }) + .for_each(drop); +} + +/// # Safety +/// This function traverses page table data structures, and should not +/// be called concurrently with any other operations that modify the +/// page table. +unsafe fn require_pte_exist( + op: &Op, + x: MapResponse, +) -> Option> { + let pte = unsafe { op.read_entry(x.entry_ptr) }; + let present = pte & 0x1; + if present == 0 { + return None; + } + Some(MapRequest { + table_base: Op::from_phys(pte & !0xfff), + vmin: x.vmin, + len: x.len, + }) +} + +// There are no notable architecture-specific safety considerations +// here, and the general conditions are documented in the +// architecture-independent re-export in vm.rs +#[allow(clippy::missing_safety_doc)] +pub unsafe fn vtop(op: &Op, address: u64) -> Option { + modify_ptes::<47, 39, Op>(MapRequest { + table_base: op.root_table(), + vmin: address, + len: 1, + }) + .filter_map(|r| unsafe { require_pte_exist::(op, r) }) + .flat_map(modify_ptes::<38, 30, Op>) + .filter_map(|r| unsafe { require_pte_exist::(op, r) }) + .flat_map(modify_ptes::<29, 21, Op>) + .filter_map(|r| unsafe { require_pte_exist::(op, r) }) + .flat_map(modify_ptes::<20, 12, Op>) + .filter_map(|r| { + let pte = unsafe { op.read_entry(r.entry_ptr) }; + let present = pte & 0x1; + if present == 0 { None } else { Some(pte) } + }) + .next() +} + +pub const PAGE_SIZE: usize = 4096; +pub const PAGE_TABLE_SIZE: usize = 4096; +pub type PageTableEntry = u64; +pub type VirtAddr = u64; +pub type PhysAddr = u64; diff --git a/src/hyperlight_common/src/lib.rs b/src/hyperlight_common/src/lib.rs index 962b0ca25..64aefb2b9 100644 --- a/src/hyperlight_common/src/lib.rs +++ b/src/hyperlight_common/src/lib.rs @@ -38,3 +38,5 @@ pub mod resource; /// cbindgen:ignore pub mod func; +// cbindgen:ignore +pub mod vm; diff --git a/src/hyperlight_common/src/vm.rs b/src/hyperlight_common/src/vm.rs new file mode 100644 index 000000000..e34692304 --- /dev/null +++ b/src/hyperlight_common/src/vm.rs @@ -0,0 +1,131 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + */ + +#[cfg_attr(target_arch = "x86_64", path = "arch/amd64/vm.rs")] +mod arch; + +pub use arch::{PAGE_SIZE, PAGE_TABLE_SIZE, PageTableEntry, PhysAddr, VirtAddr}; +pub const PAGE_TABLE_ENTRIES_PER_TABLE: usize = + PAGE_TABLE_SIZE / core::mem::size_of::(); + +/// The operations used to actually access the page table structures, +/// used to allow the same code to be used in the host and the guest +/// for page table setup +pub trait TableOps { + /// The type of table addresses + type TableAddr: Copy; + + /// Allocate a zeroed table + /// + /// # Safety + /// The current implementations of this function are not + /// inherently unsafe, but the guest implementation will likely + /// become so in the future when a real physical page allocator is + /// implemented. + /// + /// Currently, callers should take care not to call this on + /// multiple threads at the same time. + /// + /// # Panics + /// This function may panic if: + /// - The Layout creation fails + /// - Memory allocation fails + unsafe fn alloc_table(&self) -> Self::TableAddr; + + /// Offset the table address by the u64 entry offset + fn entry_addr(addr: Self::TableAddr, entry_offset: u64) -> Self::TableAddr; + + /// Read a u64 from the given address, used to read existing page + /// table entries + /// + /// # Safety + /// This reads from the given memory address, and so all the usual + /// Rust things about raw pointers apply. This will also be used + /// to update guest page tables, so especially in the guest, it is + /// important to ensure that the page tables updates do not break + /// invariants. The implementor of the trait should ensure that + /// nothing else will be reading/writing the address at the same + /// time as mapping code using the trait. + unsafe fn read_entry(&self, addr: Self::TableAddr) -> PageTableEntry; + + /// Write a u64 to the given address, used to write updated page + /// table entries + /// + /// # Safety + /// This writes to the given memory address, and so all the usual + /// Rust things about raw pointers apply. This will also be used + /// to update guest page tables, so especially in the guest, it is + /// important to ensure that the page tables updates do not break + /// invariants. The implementor of the trait should ensure that + /// nothing else will be reading/writing the address at the same + /// time as mapping code using the trait. + unsafe fn write_entry(&self, addr: Self::TableAddr, x: PageTableEntry); + + /// Convert an abstract physical address to a concrete u64 which + /// can be e.g. written into a table + fn to_phys(addr: Self::TableAddr) -> PhysAddr; + + /// Convert a concrete u64 which may have been e.g. read from a + /// table back into an abstract physical address + fn from_phys(addr: PhysAddr) -> Self::TableAddr; + + /// Return the address of the root page table + fn root_table(&self) -> Self::TableAddr; +} + +#[derive(Debug)] +pub struct BasicMapping { + pub readable: bool, + pub writable: bool, + pub executable: bool, +} + +#[derive(Debug)] +pub enum MappingKind { + BasicMapping(BasicMapping), + /* TODO: What useful things other than basic mappings actually + * require touching the tables? */ +} + +#[derive(Debug)] +pub struct Mapping { + pub phys_base: u64, + pub virt_base: u64, + pub len: u64, + pub kind: MappingKind, +} + +/// Assumption: all are page-aligned +/// +/// # Safety +/// This function modifies pages backing a virtual memory range which +/// is inherently unsafe w.r.t. the Rust memory model. +/// +/// When using this function, please note: +/// - No locking is performed before touching page table data structures, +/// as such do not use concurrently with any other page table operations +/// - TLB invalidation is not performed, if previously-mapped ranges +/// are being remapped, TLB invalidation may need to be performed +/// afterwards. +pub use arch::map; +/// This function is not presently used for anything, but is useful +/// for debugging +/// +/// # Safety +/// This function traverses page table data structures, and should not +/// be called concurrently with any other operations that modify the +/// page table. +pub use arch::vtop; diff --git a/src/hyperlight_guest_bin/src/paging.rs b/src/hyperlight_guest_bin/src/paging.rs index 4ee3d827a..a804e1355 100644 --- a/src/hyperlight_guest_bin/src/paging.rs +++ b/src/hyperlight_guest_bin/src/paging.rs @@ -38,20 +38,42 @@ pub fn ptov(x: u64) -> *mut u8 { // virtual address 0, and Rust raw pointer operations can't be // used to read/write from address 0. -/// A helper structure indicating a mapping operation that needs to be -/// performed -struct MapRequest { - table_base: u64, - vmin: *mut u8, - len: u64, -} - -/// A helper structure indicating that a particular PTE needs to be -/// modified -struct MapResponse { - entry_ptr: *mut u64, - vmin: *mut u8, - len: u64, +struct GuestMappingOperations {} +impl hyperlight_common::vm::TableOps for GuestMappingOperations { + type TableAddr = u64; + unsafe fn alloc_table(&self) -> u64 { + let page_addr = unsafe { alloc_phys_pages(1) }; + unsafe { ptov(page_addr).write_bytes(0u8, hyperlight_common::vm::PAGE_TABLE_SIZE) }; + page_addr + } + fn entry_addr(addr: u64, offset: u64) -> u64 { + addr + offset + } + unsafe fn read_entry(&self, addr: u64) -> u64 { + let ret: u64; + unsafe { + asm!("mov {}, qword ptr [{}]", out(reg) ret, in(reg) addr); + } + ret + } + unsafe fn write_entry(&self, addr: u64, x: u64) { + unsafe { + asm!("mov qword ptr [{}], {}", in(reg) addr, in(reg) x); + } + } + fn to_phys(addr: u64) -> u64 { + addr + } + fn from_phys(addr: u64) -> u64 { + addr + } + fn root_table(&self) -> u64 { + let pml4_base: u64; + unsafe { + asm!("mov {}, cr3", out(reg) pml4_base); + } + pml4_base & !0xfff + } } /// Assumption: all are page-aligned @@ -65,64 +87,22 @@ struct MapResponse { /// if previously-unmapped ranges are not being mapped, TLB invalidation may need to be performed afterwards. #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub unsafe fn map_region(phys_base: u64, virt_base: *mut u8, len: u64) { - let mut pml4_base: u64; + use hyperlight_common::vm; unsafe { - asm!("mov {}, cr3", out(reg) pml4_base); - } - pml4_base &= !0xfff; - modify_ptes::<47, 39>(MapRequest { - table_base: pml4_base, - vmin: virt_base, - len, - }) - .map(|r| unsafe { alloc_pte_if_needed(r) }) - .flat_map(modify_ptes::<38, 30>) - .map(|r| unsafe { alloc_pte_if_needed(r) }) - .flat_map(modify_ptes::<29, 21>) - .map(|r| unsafe { alloc_pte_if_needed(r) }) - .flat_map(modify_ptes::<20, 12>) - .map(|r| map_normal(phys_base, virt_base, r)) - .for_each(drop); -} - -#[allow(unused)] -/// This function is not presently used for anything, but is useful -/// for debugging -/// # Safety -/// This function traverses page table data structures, and should not be called concurrently -/// with any other operations that modify the page table. -/// # Panics -/// This function will panic if: -/// - A page map request resolves to multiple page table entries -pub unsafe fn dbg_print_address_pte(address: u64) -> u64 { - let mut pml4_base: u64 = 0; - unsafe { - asm!("mov {}, cr3", out(reg) pml4_base); - } - pml4_base &= !0xfff; - let addrs = modify_ptes::<47, 39>(MapRequest { - table_base: pml4_base, - vmin: address as *mut u8, - len: unsafe { OS_PAGE_SIZE as u64 }, - }) - .map(|r| unsafe { require_pte_exist(r) }) - .flat_map(modify_ptes::<38, 30>) - .map(|r| unsafe { require_pte_exist(r) }) - .flat_map(modify_ptes::<29, 21>) - .map(|r| unsafe { require_pte_exist(r) }) - .flat_map(modify_ptes::<20, 12>) - .map(|r| { - let mut pte: u64 = 0; - unsafe { - asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) r.entry_ptr); - } - pte - }) - .collect::>(); - if addrs.len() != 1 { - panic!("impossible: 1 page map request resolved to multiple PTEs"); + vm::map::( + &GuestMappingOperations {}, + vm::Mapping { + phys_base, + virt_base: virt_base as u64, + len, + kind: vm::MappingKind::BasicMapping(vm::BasicMapping { + readable: true, + writable: true, + executable: true, + }), + }, + ); } - addrs[0] } /// Allocate n contiguous physical pages and return the physical @@ -149,124 +129,6 @@ pub unsafe fn alloc_phys_pages(n: u64) -> u64 { } } -/// # Safety -/// This function traverses page table data structures, and should not be called concurrently -/// with any other operations that modify the page table. -unsafe fn require_pte_exist(x: MapResponse) -> MapRequest { - let mut pte: u64; - unsafe { - asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) x.entry_ptr); - } - let present = pte & 0x1; - if present == 0 { - panic!("debugging: found not-present pte"); - } - MapRequest { - table_base: pte & !0xfff, - vmin: x.vmin, - len: x.len, - } -} - -/// Page-mapping callback to allocate a next-level page table if necessary. -/// # Safety -/// This function modifies page table data structures, and should not be called concurrently -/// with any other operations that modify the page table. -unsafe fn alloc_pte_if_needed(x: MapResponse) -> MapRequest { - let mut pte: u64; - unsafe { - asm!("mov {}, qword ptr [{}]", out(reg) pte, in(reg) x.entry_ptr); - } - let present = pte & 0x1; - if present != 0 { - return MapRequest { - table_base: pte & !0xfff, - vmin: x.vmin, - len: x.len, - }; - } - let page_addr = unsafe { alloc_phys_pages(1) }; - unsafe { ptov(page_addr).write_bytes(0u8, OS_PAGE_SIZE as usize) }; - - #[allow(clippy::identity_op)] - #[allow(clippy::precedence)] - let pte = page_addr | - 1 << 5 | // A - we don't track accesses at table level - 0 << 4 | // PCD - leave caching enabled - 0 << 3 | // PWT - write-back - 1 << 2 | // U/S - allow user access to everything (for now) - 1 << 1 | // R/W - we don't use block-level permissions - 1 << 0; // P - this entry is present - unsafe { - asm!("mov qword ptr [{}], {}", in(reg) x.entry_ptr, in(reg) pte); - } - MapRequest { - table_base: page_addr, - vmin: x.vmin, - len: x.len, - } -} - -/// Map a normal memory page -/// -/// TODO: support permissions; currently mapping is always RWX -fn map_normal(phys_base: u64, virt_base: *mut u8, r: MapResponse) { - #[allow(clippy::identity_op)] - #[allow(clippy::precedence)] - let pte = (phys_base + (r.vmin as u64 - virt_base as u64)) | - 1 << 6 | // D - we don't presently track dirty state for anything - 1 << 5 | // A - we don't presently track access for anything - 0 << 4 | // PCD - leave caching enabled - 0 << 3 | // PWT - write-back - 1 << 2 | // U/S - allow user access to everything (for now) - 1 << 1 | // R/W - for now make everything r/w - 1 << 0; // P - this entry is present - unsafe { - r.entry_ptr.write_volatile(pte); - } -} - -#[inline(always)] -/// Utility function to extract an (inclusive on both ends) bit range -/// from a quadword. -fn bits(x: u64) -> u64 { - (x & ((1 << (HIGH_BIT + 1)) - 1)) >> LOW_BIT -} - -struct ModifyPteIterator { - request: MapRequest, - n: u64, -} -impl Iterator for ModifyPteIterator { - type Item = MapResponse; - fn next(&mut self) -> Option { - if (self.n << LOW_BIT) >= self.request.len { - return None; - } - // next stage parameters - let next_vmin = self.request.vmin.wrapping_add((self.n << LOW_BIT) as usize); - let entry_ptr = ptov(self.request.table_base) - .wrapping_add((bits::(next_vmin as u64) << 3) as usize) - as *mut u64; - let len_from_here = self.request.len - (self.n << LOW_BIT); - let next_len = core::cmp::min(len_from_here, 1 << LOW_BIT); - - // update our state - self.n += 1; - - Some(MapResponse { - entry_ptr, - vmin: next_vmin, - len: next_len, - }) - } -} -fn modify_ptes( - r: MapRequest, -) -> ModifyPteIterator { - ModifyPteIterator { request: r, n: 0 } -} - pub fn flush_tlb() { // Currently this just always flips CR4.PGE back and forth to // trigger a tlb flush. We should use a faster approach where diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 32d7b5478..ca64b3211 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -217,27 +217,6 @@ impl SandboxMemoryLayout { /// The offset into the sandbox's memory where the PML4 Table is located. /// See https://www.pagetable.com/?p=14 for more information. pub(crate) const PML4_OFFSET: usize = 0x0000; - /// The offset into the sandbox's memory where the Page Directory Pointer - /// Table starts. - #[cfg(feature = "init-paging")] - pub(super) const PDPT_OFFSET: usize = 0x1000; - /// The offset into the sandbox's memory where the Page Directory starts. - #[cfg(feature = "init-paging")] - pub(super) const PD_OFFSET: usize = 0x2000; - /// The offset into the sandbox's memory where the Page Tables start. - #[cfg(feature = "init-paging")] - pub(super) const PT_OFFSET: usize = 0x3000; - /// The address (not the offset) to the start of the page directory - #[cfg(feature = "init-paging")] - pub(super) const PD_GUEST_ADDRESS: usize = Self::BASE_ADDRESS + Self::PD_OFFSET; - /// The address (not the offset) into sandbox memory where the Page - /// Directory Pointer Table starts - #[cfg(feature = "init-paging")] - pub(super) const PDPT_GUEST_ADDRESS: usize = Self::BASE_ADDRESS + Self::PDPT_OFFSET; - /// The address (not the offset) into sandbox memory where the Page - /// Tables start - #[cfg(feature = "init-paging")] - pub(super) const PT_GUEST_ADDRESS: usize = Self::BASE_ADDRESS + Self::PT_OFFSET; /// The maximum amount of memory a single sandbox will be allowed. /// The addressable virtual memory with current paging setup is virtual address 0x0 - 0x40000000 (excl.), /// However, the memory up to Self::BASE_ADDRESS is not used. @@ -478,16 +457,10 @@ impl SandboxMemoryLayout { self.total_page_table_size } - // This function calculates the page table size for the sandbox - // We need enough memory to store the PML4, PDPT, PD and PTs - // The size of a single table is 4K, we can map up to 1GB total memory which requires 1 PML4, 1 PDPT, 1 PD and 512 PTs - // but we only need enough PTs to map the memory we are using. (In other words we only need 512 PTs to map the memory if the memory size is 1GB) - // - // We can calculate the amount of memory needed for the PTs by calculating how much memory is needed for the sandbox configuration in total, - // and then add 3 * 4K (for the PML4, PDPT and PD) to that, - // then add 2MB to that (the maximum size of memory required for the PTs themselves is 2MB when we map 1GB of memory in 4K pages), - // then divide that by 0x200_000 (as we can map 2MB in each PT). - // This will give us the total size of the PTs required for the sandbox to which we can add the size of the PML4, PDPT and PD. + // TODO: This over-counts on small sandboxes (because not all 512 + // PTs may be required), under-counts on sandboxes with more than + // 1GiB memory, and would get unreasonably complicated if we + // needed to support hugepages. #[instrument(skip_all, parent = Span::current(), level= "Trace")] #[cfg(feature = "init-paging")] fn get_total_page_table_size( diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 2b801329b..c7ae6b629 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -31,9 +31,6 @@ use mshv_bindings::{hv_x64_memory_intercept_message, mshv_user_mem_region}; #[cfg(target_os = "windows")] use windows::Win32::System::Hypervisor::{self, WHV_MEMORY_ACCESS_TYPE}; -#[cfg(feature = "init-paging")] -use super::mgr::{PAGE_NX, PAGE_PRESENT, PAGE_RW, PAGE_USER}; - pub(crate) const DEFAULT_GUEST_BLOB_MEM_FLAGS: MemoryRegionFlags = MemoryRegionFlags::READ; bitflags! { @@ -53,31 +50,6 @@ bitflags! { } } -impl MemoryRegionFlags { - #[cfg(feature = "init-paging")] - pub(crate) fn translate_flags(&self) -> u64 { - let mut page_flags = 0; - - page_flags |= PAGE_PRESENT; // Mark page as present - - if self.contains(MemoryRegionFlags::WRITE) { - page_flags |= PAGE_RW; // Allow read/write - } - - if self.contains(MemoryRegionFlags::STACK_GUARD) { - page_flags |= PAGE_RW; // The guard page is marked RW so that if it gets written to we can detect it in the host - } - - if self.contains(MemoryRegionFlags::EXECUTE) { - page_flags |= PAGE_USER; // Allow user access - } else { - page_flags |= PAGE_NX; // Mark as non-executable if EXECUTE is not set - } - - page_flags - } -} - impl std::fmt::Display for MemoryRegionFlags { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if self.is_empty() { diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 55d5fc2af..ca3adfeae 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -23,13 +23,18 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{ use hyperlight_common::flatbuffer_wrappers::function_types::FunctionCallResult; use hyperlight_common::flatbuffer_wrappers::guest_log_data::GuestLogData; use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; +#[cfg(feature = "init-paging")] +use hyperlight_common::vm::{ + self, BasicMapping, Mapping, MappingKind, PAGE_TABLE_ENTRIES_PER_TABLE, PAGE_TABLE_SIZE, + PageTableEntry, PhysAddr, +}; use tracing::{Span, instrument}; use super::exe::ExeInfo; use super::layout::SandboxMemoryLayout; use super::memory_region::MemoryRegion; #[cfg(feature = "init-paging")] -use super::memory_region::{DEFAULT_GUEST_BLOB_MEM_FLAGS, MemoryRegionType}; +use super::memory_region::MemoryRegionFlags; use super::ptr::{GuestPtr, RawPtr}; use super::ptr_offset::Offset; use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory}; @@ -40,16 +45,6 @@ use crate::{Result, log_then_return, new_error}; cfg_if::cfg_if! { if #[cfg(feature = "init-paging")] { - /// Paging Flags - /// - /// See the following links explaining paging, also see paging-development-notes.md in docs: - /// - /// * Very basic description: https://stackoverflow.com/a/26945892 - /// * More in-depth descriptions: https://wiki.osdev.org/Paging - pub(crate) const PAGE_PRESENT: u64 = 1; // Page is Present - pub(crate) const PAGE_RW: u64 = 1 << 1; // Page is Read/Write (if not set page is read only so long as the WP bit in CR0 is set to 1 - which it is in Hyperlight) - pub(crate) const PAGE_USER: u64 = 1 << 2; // User/Supervisor (if this bit is set then the page is accessible by user mode code) - pub(crate) const PAGE_NX: u64 = 1 << 63; // Execute Disable (if this bit is set then data in the page cannot be executed)` // The amount of memory that can be mapped per page table pub(super) const AMOUNT_OF_MEMORY_PER_PT: usize = 0x200_000; } @@ -80,6 +75,62 @@ pub(crate) struct SandboxMemoryManager { pub(crate) abort_buffer: Vec, } +#[cfg(feature = "init-paging")] +struct GuestPageTableBuffer { + buffer: std::cell::RefCell>, +} +#[cfg(feature = "init-paging")] +impl vm::TableOps for GuestPageTableBuffer { + type TableAddr = (usize, usize); + unsafe fn alloc_table(&self) -> (usize, usize) { + let mut b = self.buffer.borrow_mut(); + let page_addr = b.len(); + b.push([0; PAGE_TABLE_ENTRIES_PER_TABLE]); + (page_addr, 0) + } + fn entry_addr(addr: (usize, usize), offset: u64) -> (usize, usize) { + (addr.0, offset as usize >> 3) + } + unsafe fn read_entry(&self, addr: (usize, usize)) -> PageTableEntry { + let b = self.buffer.borrow(); + b[addr.0][addr.1] + } + unsafe fn write_entry(&self, addr: (usize, usize), x: PageTableEntry) { + let mut b = self.buffer.borrow_mut(); + b[addr.0][addr.1] = x; + } + fn to_phys(addr: (usize, usize)) -> PhysAddr { + (addr.0 as u64 * PAGE_TABLE_SIZE as u64) + addr.1 as u64 + } + fn from_phys(addr: PhysAddr) -> (usize, usize) { + ( + addr as usize / PAGE_TABLE_SIZE, + addr as usize % PAGE_TABLE_SIZE, + ) + } + fn root_table(&self) -> (usize, usize) { + (0, 0) + } +} +#[cfg(feature = "init-paging")] +impl GuestPageTableBuffer { + fn new() -> Self { + GuestPageTableBuffer { + buffer: std::cell::RefCell::new(vec![[0; PAGE_TABLE_ENTRIES_PER_TABLE]]), + } + } + fn into_bytes(self) -> Box<[u8]> { + let bx = self.buffer.into_inner().into_boxed_slice(); + let len = bx.len(); + unsafe { + Box::from_raw(std::ptr::slice_from_raw_parts_mut( + Box::into_raw(bx) as *mut u8, + len * PAGE_TABLE_SIZE, + )) + } + } +} + impl SandboxMemoryManager where S: SharedMemory, @@ -121,17 +172,13 @@ where &mut self.shared_mem } - /// Set up the hypervisor partition in the given `SharedMemory` parameter - /// `shared_mem`, with the given memory size `mem_size` + /// Set up the guest page tables in the given `SharedMemory` parameter + /// `shared_mem` // TODO: This should perhaps happen earlier and use an // ExclusiveSharedMemory from the beginning. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] #[cfg(feature = "init-paging")] - pub(crate) fn set_up_shared_memory( - &mut self, - mem_size: u64, - regions: &mut [MemoryRegion], - ) -> Result { + pub(crate) fn set_up_shared_memory(&mut self, regions: &mut [MemoryRegion]) -> Result { let rsp: u64 = self.layout.get_top_of_user_stack_offset() as u64 + SandboxMemoryLayout::BASE_ADDRESS as u64 + self.layout.stack_size as u64 @@ -142,144 +189,37 @@ where - 0x28; self.shared_mem.with_exclusivity(|shared_mem| { - // Create PDL4 table with only 1 PML4E - shared_mem.write_u64( - SandboxMemoryLayout::PML4_OFFSET, - SandboxMemoryLayout::PDPT_GUEST_ADDRESS as u64 | PAGE_PRESENT | PAGE_RW, - )?; - - // Create PDPT with only 1 PDPTE - shared_mem.write_u64( - SandboxMemoryLayout::PDPT_OFFSET, - SandboxMemoryLayout::PD_GUEST_ADDRESS as u64 | PAGE_PRESENT | PAGE_RW, - )?; - - for i in 0..512 { - let offset = SandboxMemoryLayout::PD_OFFSET + (i * 8); - let val_to_write: u64 = (SandboxMemoryLayout::PT_GUEST_ADDRESS as u64 - + (i * 4096) as u64) - | PAGE_PRESENT - | PAGE_RW; - shared_mem.write_u64(offset, val_to_write)?; - } - - // We only need to create enough PTEs to map the amount of memory we have - // We need one PT for every 2MB of memory that is mapped - // We can use the memory size to calculate the number of PTs we need - // We round up mem_size/2MB - - let mem_size = usize::try_from(mem_size)?; - - let num_pages: usize = mem_size.div_ceil(AMOUNT_OF_MEMORY_PER_PT); - - // Create num_pages PT with 512 PTEs - // Pre-allocate buffer for all page table entries to minimize shared memory writes - let total_ptes = num_pages * 512; - let mut pte_buffer = vec![0u64; total_ptes]; // Pre-allocate u64 buffer directly - let mut cached_region_idx: Option = None; // Cache for optimized region lookup - let mut pte_index = 0; - - for p in 0..num_pages { - for i in 0..512 { - // Each PTE maps a 4KB page - let flags = match Self::get_page_flags(p, i, regions, &mut cached_region_idx) { - Ok(region_type) => match region_type { - // TODO: We parse and load the exe according to its sections and then - // have the correct flags set rather than just marking the entire binary as executable - MemoryRegionType::Code => PAGE_PRESENT | PAGE_RW | PAGE_USER, - MemoryRegionType::InitData => self - .layout - .init_data_permissions - .map(|perm| perm.translate_flags()) - .unwrap_or(DEFAULT_GUEST_BLOB_MEM_FLAGS.translate_flags()), - MemoryRegionType::Stack => PAGE_PRESENT | PAGE_RW | PAGE_USER | PAGE_NX, - #[cfg(feature = "executable_heap")] - MemoryRegionType::Heap => PAGE_PRESENT | PAGE_RW | PAGE_USER, - #[cfg(not(feature = "executable_heap"))] - MemoryRegionType::Heap => PAGE_PRESENT | PAGE_RW | PAGE_USER | PAGE_NX, - // The guard page is marked RW and User so that if it gets written to we can detect it in the host - // If/When we implement an interrupt handler for page faults in the guest then we can remove this access and handle things properly there - MemoryRegionType::GuardPage => { - PAGE_PRESENT | PAGE_RW | PAGE_USER | PAGE_NX - } - MemoryRegionType::InputData => PAGE_PRESENT | PAGE_RW | PAGE_NX, - MemoryRegionType::OutputData => PAGE_PRESENT | PAGE_RW | PAGE_NX, - MemoryRegionType::Peb => PAGE_PRESENT | PAGE_RW | PAGE_NX, - // Host Function Definitions are readonly in the guest - MemoryRegionType::HostFunctionDefinitions => PAGE_PRESENT | PAGE_NX, - MemoryRegionType::PageTables => PAGE_PRESENT | PAGE_RW | PAGE_NX, - }, - // If there is an error then the address isn't mapped so mark it as not present - Err(_) => 0, - }; - let val_to_write = ((p << 21) as u64 | (i << 12) as u64) | flags; - // Write u64 directly to buffer - more efficient than converting to bytes - pte_buffer[pte_index] = val_to_write.to_le(); - pte_index += 1; - } + let buffer = GuestPageTableBuffer::new(); + for region in regions.iter() { + let readable = region.flags.contains(MemoryRegionFlags::READ); + let writable = region.flags.contains(MemoryRegionFlags::WRITE) + // Temporary hack: the stack guard page is + // currently checked for in the host, rather than + // the guest, so we need to mark it writable in + // the Stage 1 translation so that the fault + // exception on a write is taken to the + // hypervisor, rather than the guest kernel + || region.flags.contains(MemoryRegionFlags::STACK_GUARD); + let executable = region.flags.contains(MemoryRegionFlags::EXECUTE); + let mapping = Mapping { + phys_base: region.guest_region.start as u64, + virt_base: region.guest_region.start as u64, + len: region.guest_region.len() as u64, + kind: MappingKind::BasicMapping(BasicMapping { + readable, + writable, + executable, + }), + }; + unsafe { vm::map(&buffer, mapping) }; } - - // Write the entire PTE buffer to shared memory in a single operation - // Convert u64 buffer to bytes for writing to shared memory - let pte_bytes = unsafe { - std::slice::from_raw_parts(pte_buffer.as_ptr() as *const u8, pte_buffer.len() * 8) - }; - shared_mem.copy_from_slice(pte_bytes, SandboxMemoryLayout::PT_OFFSET)?; + shared_mem.copy_from_slice(&buffer.into_bytes(), SandboxMemoryLayout::PML4_OFFSET)?; Ok::<(), crate::HyperlightError>(()) })??; Ok(rsp) } - /// Optimized page flags getter that maintains state for sequential access patterns - #[cfg(feature = "init-paging")] - fn get_page_flags( - p: usize, - i: usize, - regions: &[MemoryRegion], - cached_region_idx: &mut Option, - ) -> Result { - let addr = (p << 21) + (i << 12); - - // First check if we're still in the cached region - if let Some(cached_idx) = *cached_region_idx - && cached_idx < regions.len() - && regions[cached_idx].guest_region.contains(&addr) - { - return Ok(regions[cached_idx].region_type); - } - - // If not in cached region, try adjacent regions first (common for sequential access) - if let Some(cached_idx) = *cached_region_idx { - // Check next region - if cached_idx + 1 < regions.len() - && regions[cached_idx + 1].guest_region.contains(&addr) - { - *cached_region_idx = Some(cached_idx + 1); - return Ok(regions[cached_idx + 1].region_type); - } - } - - // Fall back to binary search for non-sequential access - let idx = regions.binary_search_by(|region| { - if region.guest_region.contains(&addr) { - std::cmp::Ordering::Equal - } else if region.guest_region.start > addr { - std::cmp::Ordering::Greater - } else { - std::cmp::Ordering::Less - } - }); - - match idx { - Ok(index) => { - *cached_region_idx = Some(index); - Ok(regions[index].region_type) - } - Err(_) => Err(new_error!("Could not find region for address: {}", addr)), - } - } - /// Create a snapshot with the given mapped regions pub(crate) fn snapshot( &mut self, diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 98941d893..baecaa818 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -30,7 +30,7 @@ use crate::mem::mgr::SandboxMemoryManager; use crate::mem::ptr::{GuestPtr, RawPtr}; use crate::mem::ptr_offset::Offset; use crate::mem::shared_mem::GuestSharedMemory; -#[cfg(any(feature = "init-paging", target_os = "windows"))] +#[cfg(target_os = "windows")] use crate::mem::shared_mem::SharedMemory; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; @@ -108,8 +108,7 @@ pub(crate) fn set_up_hypervisor_partition( #[cfg(feature = "init-paging")] let rsp_ptr = { let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; - let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; - let rsp_u64 = mgr.set_up_shared_memory(mem_size, &mut regions)?; + let rsp_u64 = mgr.set_up_shared_memory(&mut regions)?; let rsp_raw = RawPtr::from(rsp_u64); GuestPtr::try_from(rsp_raw) }?; From 2807608b5bb13cb91a6676f8506119b5b1c7112b Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Wed, 10 Dec 2025 11:34:37 +0000 Subject: [PATCH 2/3] Only include vm mod when init-paging is enabled Signed-off-by: Simon Davies --- src/hyperlight_common/Cargo.toml | 1 + src/hyperlight_common/src/lib.rs | 1 + src/hyperlight_guest_bin/Cargo.toml | 2 +- src/hyperlight_host/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_common/Cargo.toml b/src/hyperlight_common/Cargo.toml index a10a991d6..199bb1f12 100644 --- a/src/hyperlight_common/Cargo.toml +++ b/src/hyperlight_common/Cargo.toml @@ -30,6 +30,7 @@ fuzzing = ["dep:arbitrary"] trace_guest = [] mem_profile = [] std = ["thiserror/std", "log/std", "tracing/std"] +init-paging = [] [lib] bench = false # see https://bheisler.github.io/criterion.rs/book/faq.html#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options diff --git a/src/hyperlight_common/src/lib.rs b/src/hyperlight_common/src/lib.rs index 64aefb2b9..a66383903 100644 --- a/src/hyperlight_common/src/lib.rs +++ b/src/hyperlight_common/src/lib.rs @@ -39,4 +39,5 @@ pub mod resource; /// cbindgen:ignore pub mod func; // cbindgen:ignore +#[cfg(feature = "init-paging")] pub mod vm; diff --git a/src/hyperlight_guest_bin/Cargo.toml b/src/hyperlight_guest_bin/Cargo.toml index 056e636e6..74888db00 100644 --- a/src/hyperlight_guest_bin/Cargo.toml +++ b/src/hyperlight_guest_bin/Cargo.toml @@ -23,7 +23,7 @@ macros = ["dep:hyperlight-guest-macro", "dep:linkme"] [dependencies] hyperlight-guest = { workspace = true, default-features = false } -hyperlight-common = { workspace = true, default-features = false } +hyperlight-common = { workspace = true, default-features = false, features = [ "init-paging" ] } hyperlight-guest-tracing = { workspace = true, default-features = false } hyperlight-guest-macro = { workspace = true, default-features = false, optional = true } buddy_system_allocator = "0.11.0" diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 8cd0ef944..19383bee9 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -40,7 +40,7 @@ tracing = { version = "0.1.43", features = ["log"] } tracing-log = "0.2.0" tracing-core = "0.1.35" tracing-opentelemetry = { version = "0.32.0", optional = true } -hyperlight-common = { workspace = true, default-features = true, features = [ "std" ] } +hyperlight-common = { workspace = true, default-features = true, features = [ "std", "init-paging" ] } hyperlight-guest-tracing = { workspace = true, default-features = true, optional = true } vmm-sys-util = "0.15.0" crossbeam-channel = "0.5.15" From 09cf01911221688955fda361d31c2c366803c355 Mon Sep 17 00:00:00 2001 From: Simon Davies Date: Tue, 9 Dec 2025 22:48:45 +0000 Subject: [PATCH 3/3] fix clippy issue on windows Signed-off-by: Simon Davies --- src/hyperlight_host/src/sandbox/uninitialized_evolve.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index baecaa818..f05e1a124 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -30,8 +30,6 @@ use crate::mem::mgr::SandboxMemoryManager; use crate::mem::ptr::{GuestPtr, RawPtr}; use crate::mem::ptr_offset::Offset; use crate::mem::shared_mem::GuestSharedMemory; -#[cfg(target_os = "windows")] -use crate::mem::shared_mem::SharedMemory; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; #[cfg(feature = "mem_profile")]