Skip to content

improve the performance of building page tables #635

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions src/hyperlight_host/src/mem/mgr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use super::ptr::{GuestPtr, RawPtr};
use super::ptr_offset::Offset;
use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory};
use super::shared_mem_snapshot::SharedMemorySnapshot;
use crate::error::HyperlightError::NoMemorySnapshot;
use crate::HyperlightError::NoMemorySnapshot;
use crate::sandbox::SandboxConfiguration;
use crate::sandbox::uninitialized::GuestBlob;
use crate::{HyperlightError, Result, log_then_return, new_error};
Expand Down Expand Up @@ -150,11 +150,16 @@ where
let num_pages: usize = mem_size.div_ceil(AMOUNT_OF_MEMORY_PER_PT);

// Create num_pages PT with 512 PTEs
// Pre-allocate buffer for all page table entries to minimize shared memory writes
let total_ptes = num_pages * 512;
let mut pte_buffer = vec![0u64; total_ptes]; // Pre-allocate u64 buffer directly
let mut cached_region_idx: Option<usize> = None; // Cache for optimized region lookup
let mut pte_index = 0;

for p in 0..num_pages {
for i in 0..512 {
let offset = SandboxMemoryLayout::PT_OFFSET + (p * 4096) + (i * 8);
// Each PTE maps a 4KB page
let flags = match Self::get_page_flags(p, i, regions) {
let flags = match Self::get_page_flags(p, i, regions, &mut cached_region_idx) {
Ok(region_type) => match region_type {
// TODO: We parse and load the exe according to its sections and then
// have the correct flags set rather than just marking the entire binary as executable
Expand Down Expand Up @@ -185,22 +190,52 @@ where
Err(_) => 0,
};
let val_to_write = ((p << 21) as u64 | (i << 12) as u64) | flags;
shared_mem.write_u64(offset, val_to_write)?;
// Write u64 directly to buffer - more efficient than converting to bytes
pte_buffer[pte_index] = val_to_write.to_le();
pte_index += 1;
}
}

// Write the entire PTE buffer to shared memory in a single operation
// Convert u64 buffer to bytes for writing to shared memory
let pte_bytes = unsafe {
std::slice::from_raw_parts(pte_buffer.as_ptr() as *const u8, pte_buffer.len() * 8)
};
shared_mem.copy_from_slice(pte_bytes, SandboxMemoryLayout::PT_OFFSET)?;
Ok::<(), HyperlightError>(())
})??;

Ok(rsp)
}

/// Optimized page flags getter that maintains state for sequential access patterns
fn get_page_flags(
p: usize,
i: usize,
regions: &mut [MemoryRegion],
regions: &[MemoryRegion],
cached_region_idx: &mut Option<usize>,
) -> Result<MemoryRegionType> {
let addr = (p << 21) + (i << 12);

// First check if we're still in the cached region
if let Some(cached_idx) = *cached_region_idx {
if cached_idx < regions.len() && regions[cached_idx].guest_region.contains(&addr) {
return Ok(regions[cached_idx].region_type);
}
}

// If not in cached region, try adjacent regions first (common for sequential access)
if let Some(cached_idx) = *cached_region_idx {
// Check next region
if cached_idx + 1 < regions.len()
&& regions[cached_idx + 1].guest_region.contains(&addr)
{
*cached_region_idx = Some(cached_idx + 1);
return Ok(regions[cached_idx + 1].region_type);
}
}

// Fall back to binary search for non-sequential access
let idx = regions.binary_search_by(|region| {
if region.guest_region.contains(&addr) {
std::cmp::Ordering::Equal
Expand All @@ -212,7 +247,10 @@ where
});

match idx {
Ok(index) => Ok(regions[index].region_type),
Ok(index) => {
*cached_region_idx = Some(index);
Ok(regions[index].region_type)
}
Err(_) => Err(new_error!("Could not find region for address: {}", addr)),
}
}
Expand Down
Loading