Skip to content

Commit e14ac69

Browse files
committed
crashdump: create core dump file when a guest crashes
- the core dump file is an ELF file with special segments that describe the guest's memory when it crashed, the CPU register's values and other special notes that tell the debugger how to set up a debugging session starting from the core dump Signed-off-by: Doru Blânzeanu <[email protected]>
1 parent a6bc615 commit e14ac69

File tree

9 files changed

+464
-37
lines changed

9 files changed

+464
-37
lines changed

Cargo.lock

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/hyperlight_host/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ tempfile = { version = "3.19", optional = true }
4646
serde_yaml = "0.9"
4747
anyhow = "1.0"
4848
metrics = "0.24.2"
49+
elfcore = { git = "https://github.com/dblnz/elfcore.git", branch = "split-linux-impl-from-elfcore" }
4950

5051
[target.'cfg(windows)'.dependencies]
5152
windows = { version = "0.61", features = [
Lines changed: 239 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,257 @@
1-
use std::io::Write;
1+
use std::cmp::min;
22

3+
use elfcore::{
4+
ArchComponentState, ArchState, CoreDumpBuilder, CoreError, Elf64_Auxv, ProcessInfoSource,
5+
ReadProcessMemory, ThreadView, VaProtection, VaRegion,
6+
};
37
use tempfile::NamedTempFile;
48

59
use super::Hypervisor;
10+
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
611
use crate::{new_error, Result};
712

8-
/// Dump registers + memory regions + raw memory to a tempfile
9-
#[cfg(crashdump)]
10-
pub(crate) fn crashdump_to_tempfile(hv: &dyn Hypervisor) -> Result<()> {
11-
let mut temp_file = NamedTempFile::with_prefix("mem")?;
12-
let hv_details = format!("{:#x?}", hv);
13+
const NT_X86_XSTATE: u32 = 0x202;
14+
const AT_ENTRY: u64 = 9;
15+
const AT_NULL: u64 = 0;
16+
17+
/// Structure to hold the crash dump context
18+
/// This structure contains the information needed to create a core dump
19+
#[derive(Debug)]
20+
pub(crate) struct CrashDumpContext<'a> {
21+
regions: &'a [MemoryRegion],
22+
regs: [u64; 27],
23+
xsave: Vec<u8>,
24+
entry: u64,
25+
}
26+
27+
impl<'a> CrashDumpContext<'a> {
28+
pub(crate) fn new(
29+
regions: &'a [MemoryRegion],
30+
regs: [u64; 27],
31+
xsave: Vec<u8>,
32+
entry: u64,
33+
) -> Self {
34+
Self {
35+
regions,
36+
regs,
37+
xsave,
38+
entry,
39+
}
40+
}
41+
}
42+
43+
/// Structure that contains the process information for the core dump
44+
/// This serves as a source of information for `elfcore`'s [`CoreDumpBuilder`]
45+
struct GuestView {
46+
regions: Vec<VaRegion>,
47+
threads: Vec<ThreadView>,
48+
aux_vector: Vec<elfcore::Elf64_Auxv>,
49+
}
1350

14-
// write hypervisor details such as registers, info about mapped memory regions, etc.
15-
temp_file.write_all(hv_details.as_bytes())?;
16-
temp_file.write_all(b"================ MEMORY DUMP =================\n")?;
51+
impl GuestView {
52+
fn new(ctx: &CrashDumpContext) -> Self {
53+
// Map the regions to the format `CoreDumpBuilder` expects
54+
let regions = ctx
55+
.regions
56+
.iter()
57+
.filter(|r| !r.host_region.is_empty())
58+
.map(|r| VaRegion {
59+
begin: r.guest_region.start as u64,
60+
end: r.guest_region.end as u64,
61+
offset: r.host_region.start as u64,
62+
protection: VaProtection {
63+
is_private: false,
64+
read: r.flags.contains(MemoryRegionFlags::READ),
65+
write: r.flags.contains(MemoryRegionFlags::WRITE),
66+
execute: r.flags.contains(MemoryRegionFlags::EXECUTE),
67+
},
68+
mapped_file_name: None,
69+
})
70+
.collect();
1771

18-
// write the raw memory dump for each memory region
19-
for region in hv.get_memory_regions() {
20-
if region.host_region.start == 0 || region.host_region.is_empty() {
21-
continue;
72+
// The xsave state is checked as it can be empty
73+
let mut components = vec![];
74+
if !ctx.xsave.is_empty() {
75+
components.push(ArchComponentState {
76+
name: "XSAVE",
77+
note_type: NT_X86_XSTATE,
78+
note_name: b"LINUX",
79+
data: ctx.xsave.clone(),
80+
});
2281
}
23-
// SAFETY: we got this memory region from the hypervisor so should never be invalid
24-
let region_slice = unsafe {
25-
std::slice::from_raw_parts(
26-
region.host_region.start as *const u8,
27-
region.host_region.len(),
28-
)
82+
83+
// Create the thread view
84+
// The thread view contains the information about the thread
85+
// NOTE: Some of these fields are not used in the current implementation
86+
let thread = ThreadView {
87+
flags: 0, // Kernel flags for the process
88+
tid: 1,
89+
uid: 0, // User ID
90+
gid: 0, // Group ID
91+
comm: "\0".to_string(),
92+
ppid: 0, // Parent PID
93+
pgrp: 0, // Process group ID
94+
nice: 0, // Nice value
95+
state: 0, // Process state
96+
utime: 0, // User time
97+
stime: 0, // System time
98+
cutime: 0, // Children User time
99+
cstime: 0, // Children User time
100+
cursig: 0, // Current signal
101+
session: 0, // Session ID of the process
102+
sighold: 0, // Blocked signal
103+
sigpend: 0, // Pending signal
104+
cmd_line: "\0".to_string(),
105+
106+
arch_state: Box::new(ArchState {
107+
gpr_state: ctx.regs.to_vec(),
108+
components,
109+
}),
29110
};
30-
temp_file.write_all(region_slice)?;
111+
112+
// Create the auxv vector
113+
// The first entry is AT_ENTRY, which is the entry point of the program
114+
// The entry point is the address where the program starts executing
115+
// This helps the debugger to know that the entry is changed by an offset
116+
// so the symbols can be loaded correctly.
117+
// The second entry is AT_NULL, which marks the end of the vector
118+
let auxv = vec![
119+
Elf64_Auxv {
120+
a_type: AT_ENTRY,
121+
a_val: ctx.entry,
122+
},
123+
Elf64_Auxv {
124+
a_type: AT_NULL,
125+
a_val: 0,
126+
},
127+
];
128+
129+
Self {
130+
regions,
131+
threads: vec![thread],
132+
aux_vector: auxv,
133+
}
134+
}
135+
}
136+
137+
impl ProcessInfoSource for GuestView {
138+
fn pid(&self) -> i32 {
139+
1
140+
}
141+
fn threads(&self) -> &[elfcore::ThreadView] {
142+
&self.threads
31143
}
32-
temp_file.flush()?;
144+
fn page_size(&self) -> usize {
145+
0x1000
146+
}
147+
fn aux_vector(&self) -> Option<&[elfcore::Elf64_Auxv]> {
148+
Some(&self.aux_vector)
149+
}
150+
fn va_regions(&self) -> &[elfcore::VaRegion] {
151+
&self.regions
152+
}
153+
fn mapped_files(&self) -> Option<&[elfcore::MappedFile]> {
154+
None
155+
}
156+
}
33157

34-
// persist the tempfile to disk
35-
let persist_path = temp_file.path().with_extension("dmp");
158+
/// Structure that reads the guest memory
159+
/// This structure serves as a custom memory reader for `elfcore`'s
160+
/// [`CoreDumpBuilder`]
161+
struct GuestMemReader {
162+
regions: Vec<MemoryRegion>,
163+
}
164+
165+
impl GuestMemReader {
166+
fn new(ctx: &CrashDumpContext) -> Self {
167+
Self {
168+
regions: ctx.regions.to_vec(),
169+
}
170+
}
171+
}
172+
173+
impl ReadProcessMemory for GuestMemReader {
174+
fn read_process_memory(
175+
&mut self,
176+
base: usize,
177+
buf: &mut [u8],
178+
) -> std::result::Result<usize, CoreError> {
179+
for r in self.regions.iter() {
180+
// Check if the base address is within the guest region
181+
if base >= r.guest_region.start && base < r.guest_region.end {
182+
let offset = base - r.guest_region.start;
183+
let region_slice = unsafe {
184+
std::slice::from_raw_parts(
185+
r.host_region.start as *const u8,
186+
r.host_region.len(),
187+
)
188+
};
189+
190+
// Calculate how much we can copy
191+
let copy_size = min(buf.len(), region_slice.len() - offset);
192+
if copy_size == 0 {
193+
return std::result::Result::Ok(0);
194+
}
195+
196+
// Only copy the amount that fits in both buffers
197+
buf[..copy_size].copy_from_slice(&region_slice[offset..offset + copy_size]);
198+
199+
// Return the number of bytes copied
200+
return std::result::Result::Ok(copy_size);
201+
}
202+
}
203+
204+
// If we reach here, we didn't find a matching region
205+
std::result::Result::Ok(0)
206+
}
207+
}
208+
209+
/// Create core dump file from the hypervisor information
210+
///
211+
/// This function generates an ELF core dump file capturing the hypervisor's state,
212+
/// which can be used for debugging when crashes occur. The file is created in the
213+
/// system's temporary directory with extension '.elf' and the path is printed to stdout and logs.
214+
///
215+
/// # Arguments
216+
/// * `hv`: Reference to the hypervisor implementation
217+
///
218+
/// # Returns
219+
/// * `Result<()>`: Success or error
220+
pub(crate) fn crashdump_to_tempfile(hv: &dyn Hypervisor) -> Result<()> {
221+
log::info!("Creating core dump file...");
222+
223+
// Create a temporary file with a recognizable prefix
224+
let temp_file = NamedTempFile::with_prefix("hl_core_")
225+
.map_err(|e| new_error!("Failed to create temporary file: {:?}", e))?;
226+
227+
// Get crash context from hypervisor
228+
let ctx = hv
229+
.crashdump_context()
230+
.map_err(|e| new_error!("Failed to get crashdump context: {:?}", e))?;
231+
232+
// Set up data sources for the core dump
233+
let guest_view = GuestView::new(&ctx);
234+
let memory_reader = GuestMemReader::new(&ctx);
235+
236+
// Create and write core dump
237+
let core_builder = CoreDumpBuilder::from_source(
238+
Box::new(guest_view) as Box<dyn ProcessInfoSource>,
239+
Box::new(memory_reader) as Box<dyn ReadProcessMemory>,
240+
);
241+
242+
core_builder
243+
.write(&temp_file)
244+
.map_err(|e| new_error!("Failed to write core dump: {:?}", e))?;
245+
246+
let persist_path = temp_file.path().with_extension("elf");
36247
temp_file
37248
.persist(&persist_path)
38-
.map_err(|e| new_error!("Failed to persist crashdump file: {:?}", e))?;
249+
.map_err(|e| new_error!("Failed to persist core dump file: {:?}", e))?;
250+
251+
let path_string = persist_path.to_string_lossy().to_string();
39252

40-
println!("Memory dumped to file: {:?}", persist_path);
41-
log::error!("Memory dumped to file: {:?}", persist_path);
253+
println!("Core dump created successfully: {}", path_string);
254+
log::error!("Core dump file: {}", path_string);
42255

43256
Ok(())
44257
}

src/hyperlight_host/src/hypervisor/hyperv_linux.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ use mshv_bindings::{
4949
use mshv_ioctls::{Mshv, VcpuFd, VmFd};
5050
use tracing::{instrument, Span};
5151

52+
#[cfg(crashdump)]
53+
use super::crashdump;
5254
use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT};
5355
#[cfg(gdb)]
5456
use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, MshvDebug};
@@ -658,8 +660,48 @@ impl Hypervisor for HypervLinuxDriver {
658660
}
659661

660662
#[cfg(crashdump)]
661-
fn get_memory_regions(&self) -> &[MemoryRegion] {
662-
&self.mem_regions
663+
fn crashdump_context(&self) -> Result<super::crashdump::CrashDumpContext> {
664+
let mut regs = [0; 27];
665+
666+
let vcpu_regs = self.vcpu_fd.get_regs()?;
667+
let sregs = self.vcpu_fd.get_sregs()?;
668+
let xsave = self.vcpu_fd.get_xsave()?;
669+
670+
// Set up the registers for the crash dump
671+
regs[0] = vcpu_regs.r15; // r15
672+
regs[1] = vcpu_regs.r14; // r14
673+
regs[2] = vcpu_regs.r13; // r13
674+
regs[3] = vcpu_regs.r12; // r12
675+
regs[4] = vcpu_regs.rbp; // rbp
676+
regs[5] = vcpu_regs.rbx; // rbx
677+
regs[6] = vcpu_regs.r11; // r11
678+
regs[7] = vcpu_regs.r10; // r10
679+
regs[8] = vcpu_regs.r9; // r9
680+
regs[9] = vcpu_regs.r8; // r8
681+
regs[10] = vcpu_regs.rax; // rax
682+
regs[11] = vcpu_regs.rcx; // rcx
683+
regs[12] = vcpu_regs.rdx; // rdx
684+
regs[13] = vcpu_regs.rsi; // rsi
685+
regs[14] = vcpu_regs.rdi; // rdi
686+
regs[15] = 0; // orig rax
687+
regs[16] = vcpu_regs.rip; // rip
688+
regs[17] = sregs.cs.selector as u64; // cs
689+
regs[18] = vcpu_regs.rflags; // eflags
690+
regs[19] = vcpu_regs.rsp; // rsp
691+
regs[20] = sregs.ss.selector as u64; // ss
692+
regs[21] = sregs.fs.base; // fs_base
693+
regs[22] = sregs.gs.base; // gs_base
694+
regs[23] = sregs.ds.selector as u64; // ds
695+
regs[24] = sregs.es.selector as u64; // es
696+
regs[25] = sregs.fs.selector as u64; // fs
697+
regs[26] = sregs.gs.selector as u64; // gs
698+
699+
Ok(crashdump::CrashDumpContext::new(
700+
&self.mem_regions,
701+
regs,
702+
xsave.buffer.to_vec(),
703+
self.entrypoint,
704+
))
663705
}
664706

665707
#[cfg(gdb)]

0 commit comments

Comments
 (0)