From d7a1c6255b863935988b8b7a88baa7b6df306cc5 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 6 Aug 2021 17:34:56 +0200 Subject: [PATCH] Fix possible race condition in paranoid ISRs. Additionally, because it turned out to be infeasible to rely on link-time constants in global_asm! code, I have also converted the interrupt handlers to naked fns. This removes the proc-macro-reliant "paste" dependency, but inserts a tiny ud2 at the end of every ISR. --- Cargo.lock | 7 - Cargo.toml | 1 - src/arch/x86_64/consts.rs | 3 +- src/arch/x86_64/gdt.rs | 5 +- src/arch/x86_64/interrupt/exception.rs | 6 +- src/arch/x86_64/interrupt/handler.rs | 206 +++++++++++++++++++------ 6 files changed, 169 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 706ed5e2..6184caf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,7 +75,6 @@ dependencies = [ "linked_list_allocator 0.9.0", "log", "memoffset", - "paste", "raw-cpuid 8.1.2", "redox_syscall", "rmm", @@ -131,12 +130,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "paste" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58" - [[package]] name = "plain" version = "0.2.3" diff --git a/Cargo.toml b/Cargo.toml index 9f651a64..c58a7ec9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,6 @@ memoffset = { version = "0.6", features = ["unstable_const"] } redox_syscall = { path = "syscall" } slab_allocator = { path = "slab_allocator", optional = true } spin = "0.9" -paste = "1" rmm = { path = "rmm", default-features = false } [dependencies.goblin] diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs index 5589106e..2d9ac401 100644 --- a/src/arch/x86_64/consts.rs +++ b/src/arch/x86_64/consts.rs @@ -28,7 +28,8 @@ pub const KERNEL_PERCPU_OFFSET: usize = KERNEL_TMP_MISC_OFFSET - PML4_SIZE; pub const KERNEL_PERCPU_PML4: usize = (KERNEL_PERCPU_OFFSET & PML4_MASK)/PML4_SIZE; /// Size of kernel percpu variables - pub const KERNEL_PERCPU_SIZE: usize = 64 * 1024; // 64 KB + pub const KERNEL_PERCPU_SHIFT: u8 = 16; // 2^16 = 64 KiB + pub const KERNEL_PERCPU_SIZE: usize = 1_usize << KERNEL_PERCPU_SHIFT; /// Offset of physmap // This needs to match RMM's PHYS_OFFSET diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs index 94ad795b..5e974c3d 100644 --- a/src/arch/x86_64/gdt.rs +++ b/src/arch/x86_64/gdt.rs @@ -52,7 +52,7 @@ static mut INIT_GDT: [GdtEntry; 4] = [ ]; #[thread_local] -pub static mut GDT: [GdtEntry; 9] = [ +pub static mut GDT: [GdtEntry; 10] = [ // Null GdtEntry::new(0, 0, 0, 0), // Kernel code @@ -71,6 +71,9 @@ pub static mut GDT: [GdtEntry; 9] = [ GdtEntry::new(0, 0, GDT_A_PRESENT | GDT_A_RING_3 | GDT_A_TSS_AVAIL, 0), // TSS must be 16 bytes long, twice the normal size GdtEntry::new(0, 0, 0, 0), + // Unused entry which stores the CPU ID. This is necessary for paranoid interrupts as they have + // no other way of determining it. + GdtEntry::new(0, 0, 0, 0), ]; #[repr(C, align(16))] diff --git a/src/arch/x86_64/interrupt/exception.rs b/src/arch/x86_64/interrupt/exception.rs index 48c0353d..242c7cf3 100644 --- a/src/arch/x86_64/interrupt/exception.rs +++ b/src/arch/x86_64/interrupt/exception.rs @@ -18,7 +18,7 @@ interrupt_stack!(divide_by_zero, |stack| { ksignal(SIGFPE); }); -interrupt_stack!(debug, super_atomic: swapgs_iff_ring3_slow!, |stack| { +interrupt_stack!(debug, @paranoid, |stack| { let mut handled = false; // Disable singlestep before there is a breakpoint, since the breakpoint @@ -41,7 +41,7 @@ interrupt_stack!(debug, super_atomic: swapgs_iff_ring3_slow!, |stack| { } }); -interrupt_stack!(non_maskable, super_atomic: swapgs_iff_ring3_slow!, |stack| { +interrupt_stack!(non_maskable, @paranoid, |stack| { println!("Non-maskable interrupt"); stack.dump(); }); @@ -158,7 +158,7 @@ interrupt_error!(alignment_check, |stack| { ksignal(SIGBUS); }); -interrupt_stack!(machine_check, super_atomic: swapgs_iff_ring3_slow!, |stack| { +interrupt_stack!(machine_check, @paranoid, |stack| { println!("Machine check fault"); stack.dump(); stack_trace(); diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs index d3921f0c..58ee4dd7 100644 --- a/src/arch/x86_64/interrupt/handler.rs +++ b/src/arch/x86_64/interrupt/handler.rs @@ -303,38 +303,125 @@ macro_rules! swapgs_iff_ring3_fast_errorcode { 1: " }; } -#[macro_export] -macro_rules! swapgs_iff_ring3_slow { + +#[cfg(feature = "x86_fsgsbase")] +macro_rules! save_gsbase_paranoid { + () => { " + // Unused: {IA32_GS_BASE} + rdgsbase rax + push rax + " } +} +#[cfg(feature = "x86_fsgsbase")] +macro_rules! restore_gsbase_paranoid { () => { " - push rax - push rdx - push rcx - mov ecx, 0xC0000102 - rdmsr - shl rdx, 32 - or eax, edx - test rdx, rdx - jnz 1f - swapgs - 1: - pop rcx - pop rdx - pop rax + // Unused: {IA32_GS_BASE} + pop rax + wrgsbase rax " } } +#[cfg(not(feature = "x86_fsgsbase"))] +macro_rules! save_gsbase_paranoid { + () => { " + mov ecx, {IA32_GS_BASE} + rdmsr + shl rdx, 32 + or rax, rdx + + push rax + " } +} +#[cfg(not(feature = "x86_fsgsbase"))] +macro_rules! restore_gsbase_paranoid { + () => { " + pop rdx + + mov ecx, {IA32_GS_BASE} + mov eax, edx + shr rdx, 32 + wrmsr + " } +} + +#[cfg(feature = "x86_fsgsbase")] +macro_rules! set_gsbase_paranoid { + () => { " + // Unused: {IA32_GS_BASE} + wrgsbase rax + " } +} +#[cfg(not(feature = "x86_fsgsbase"))] +macro_rules! set_gsbase_paranoid { + () => { " + mov ecx, {IA32_GS_BASE} + mov rdx, rax + shr rdx, 32 + wrmsr + " } +} + +macro_rules! save_and_set_gsbase_paranoid { + // For paranoid interrupt entries, we have to be extremely careful with how we use IA32_GS_BASE + // and IA32_KERNEL_GS_BASE. If FSGSBASE is enabled, then we have no way to differentiate these + // two, as paranoid interrupts (e.g. NMIs) can occur even in kernel mode. In fact, they can + // even occur within another IRQ, so we cannot check the the privilege level via the stack. + // + // TODO: Linux uses the Interrupt Stack Table to figure out which NMIs were nested. Perhaps + // this could be done here, because if nested (sp > initial_sp), that means the NMI could not + // have come from userspace. But then, knowing the initial sp would somehow have to involve + // percpu, which brings us back to square one. But it might be useful if we would allow faults + // in NMIs. + // + // What we do instead, is using a special entry in the GDT, since we know that the GDT will + // always be thread local, as it contains the TSS. This gives us more than 32 bits to work + // with, which already is the largest x2APIC ID that an x86 CPU can handle. Luckily we can also + // use the stack, even though there might be interrupts in between. + () => { concat!( + save_gsbase_paranoid!(), + + // Allocate stack space for 8 bytes GDT base and 2 bytes size (ignored). + "sub rsp, 16\n", + // Set it to the GDT base. + "sgdt [rsp + 6]\n", + // Get the base pointer + " + mov rax, [rsp + 8] + add rsp, 16 + ", + // Load the lower 32 bits of that GDT entry. + "mov eax, [rax]\n", + // Calculate the percpu offset. + " + mov rbx, {KERNEL_PERCPU_OFFSET} + shl rax, {KERNEL_PERCPU_SHIFT} + add rax, rbx + ", + // Set GSBASE to RAX accordingly + set_gsbase_paranoid!(), + ) } +} +macro_rules! nop { + () => { " + // Unused: {IA32_GS_BASE} {KERNEL_PERCPU_OFFSET} {KERNEL_PERCPU_SHIFT} + " } +} #[macro_export] macro_rules! interrupt_stack { // XXX: Apparently we cannot use $expr and check for bool exhaustiveness, so we will have to // use idents directly instead. - ($name:ident, super_atomic: $is_super_atomic:ident!, |$stack:ident| $code:block) => { - paste::item! { - #[no_mangle] - unsafe extern "C" fn [<__interrupt_ $name>]($stack: &mut $crate::arch::x86_64::interrupt::InterruptStack) { - // Deadlock safety: interrupts are not normally enabled in the kernel, except in - // kmain. However, no locks for context list nor even individual context locks, are - // ever meant to be acquired there. - let _guard = $crate::ptrace::set_process_regs($stack); + ($name:ident, $save1:ident!, $save2:ident!, $rstor2:ident!, $rstor1:ident!, is_paranoid: $is_paranoid:expr, |$stack:ident| $code:block) => { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner($stack: &mut $crate::arch::x86_64::interrupt::InterruptStack) { + let _guard; + + if !$is_paranoid { + // Deadlock safety: (non-paranoid) interrupts are not normally enabled in the + // kernel, except in kmain. However, no locks for context list nor even + // individual context locks, are ever meant to be acquired there. + _guard = $crate::ptrace::set_process_regs($stack); + } // TODO: Force the declarations to specify unsafe? @@ -343,46 +430,61 @@ macro_rules! interrupt_stack { $code } } - - function!($name => { + asm!(concat!( // Backup all userspace registers to stack - $is_super_atomic!(), + $save1!(), "push rax\n", push_scratch!(), push_preserved!(), + $save2!(), + // TODO: Map PTI // $crate::arch::x86_64::pti::map(); // Call inner function with pointer to stack - "mov rdi, rsp\n", - "call __interrupt_", stringify!($name), "\n", + " + mov rdi, rsp + call {inner} + ", // TODO: Unmap PTI // $crate::arch::x86_64::pti::unmap(); + $rstor2!(), + // Restore all userspace registers pop_preserved!(), pop_scratch!(), - $is_super_atomic!(), + $rstor1!(), "iretq\n", - }); + ), + + inner = sym inner, + IA32_GS_BASE = const(x86::msr::IA32_GS_BASE), + KERNEL_PERCPU_SHIFT = const(crate::KERNEL_PERCPU_SHIFT), + KERNEL_PERCPU_OFFSET = const(crate::KERNEL_PERCPU_OFFSET), + + options(noreturn), + + ); } }; - ($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, super_atomic: swapgs_iff_ring3_fast!, |$stack| $code); }; + ($name:ident, |$stack:ident| $code:block) => { interrupt_stack!($name, swapgs_iff_ring3_fast!, nop!, nop!, swapgs_iff_ring3_fast!, is_paranoid: false, |$stack| $code); }; + ($name:ident, @paranoid, |$stack:ident| $code:block) => { interrupt_stack!($name, nop!, save_and_set_gsbase_paranoid!, restore_gsbase_paranoid!, nop!, is_paranoid: true, |$stack| $code); } } #[macro_export] macro_rules! interrupt { ($name:ident, || $code:block) => { - paste::item! { - #[no_mangle] - unsafe extern "C" fn [<__interrupt_ $name>]() { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner() { $code } - function!($name => { + asm!(concat!( // Backup all userspace registers to stack swapgs_iff_ring3_fast!(), "push rax\n", @@ -392,7 +494,7 @@ macro_rules! interrupt { // $crate::arch::x86_64::pti::map(); // Call inner function with pointer to stack - "call __interrupt_", stringify!($name), "\n", + "call {inner}\n", // TODO: Unmap PTI // $crate::arch::x86_64::pti::unmap(); @@ -402,7 +504,12 @@ macro_rules! interrupt { swapgs_iff_ring3_fast!(), "iretq\n", - }); + ), + + inner = sym inner, + + options(noreturn), + ); } }; } @@ -410,9 +517,9 @@ macro_rules! interrupt { #[macro_export] macro_rules! interrupt_error { ($name:ident, |$stack:ident| $code:block) => { - paste::item! { - #[no_mangle] - unsafe extern "C" fn [<__interrupt_ $name>]($stack: &mut $crate::arch::x86_64::interrupt::handler::InterruptErrorStack) { + #[naked] + pub unsafe extern "C" fn $name() { + unsafe extern "C" fn inner($stack: &mut $crate::arch::x86_64::interrupt::handler::InterruptErrorStack) { let _guard; // Only set_ptrace_process_regs if this error occured from userspace. If this fault @@ -432,7 +539,7 @@ macro_rules! interrupt_error { } } - function!($name => { + asm!(concat!( swapgs_iff_ring3_fast_errorcode!(), // Move rax into code's place, put code in last instead (to be // compatible with InterruptStack) @@ -449,8 +556,10 @@ macro_rules! interrupt_error { // $crate::arch::x86_64::pti::map(); // Call inner function with pointer to stack - "mov rdi, rsp\n", - "call __interrupt_", stringify!($name), "\n", + " + mov rdi, rsp + call {inner} + ", // TODO: Unmap PTI // $crate::arch::x86_64::pti::unmap(); @@ -462,9 +571,14 @@ macro_rules! interrupt_error { pop_preserved!(), pop_scratch!(), - swapgs_iff_ring3_fast_errorcode!(), + // The error code has already been popped, so use the regular macro. + swapgs_iff_ring3_fast!(), "iretq\n", - }); + ), + + inner = sym inner, + + options(noreturn)); } }; } -- GitLab