syscall.rs 5.74 KB
Newer Older
1
use crate::arch::macros::InterruptStack;
jD91mZM2's avatar
jD91mZM2 committed
2
use crate::arch::{gdt, pti};
3 4
use crate::common::unique::Unique;
use crate::{context, ptrace, syscall};
5 6 7 8 9
use x86::shared::msr;

pub unsafe fn init() {
    msr::wrmsr(msr::IA32_STAR, ((gdt::GDT_KERNEL_CODE as u64) << 3) << 32);
    msr::wrmsr(msr::IA32_LSTAR, syscall_instruction as u64);
10
    msr::wrmsr(msr::IA32_FMASK, 0x0300); // Clear trap flag and interrupt enable
11 12 13 14 15 16
    msr::wrmsr(msr::IA32_KERNEL_GS_BASE, &gdt::TSS as *const _ as u64);

    let efer = msr::rdmsr(msr::IA32_EFER);
    msr::wrmsr(msr::IA32_EFER, efer | 1);
}

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
// Not a function pointer because it somehow messes up the returning
// from clone() (via clone_ret()). Not sure what the problem is.
macro_rules! with_interrupt_stack {
    (unsafe fn $wrapped:ident($stack:ident) -> usize $code:block) => {
        /// Because of how clones work, we need a function that returns a
        /// usize. Here, `inner` will be this function. The child process in a
        /// clone will terminate this function with a 0 return value, and it
        /// might also have updated the interrupt_stack pointer.
        #[inline(never)]
        unsafe fn $wrapped(stack: *mut SyscallStack) {
            let stack = &mut *stack;
            {
                let contexts = context::contexts();
                if let Some(context) = contexts.current() {
                    let mut context = context.write();
                    if let Some(ref mut kstack) = context.kstack {
                        context.regs = Some((kstack.as_mut_ptr() as usize, Unique::new_unchecked(&mut stack.interrupt_stack)));
                    }
                }
            }

            let is_sysemu = ptrace::breakpoint_callback(false);
            if !is_sysemu.unwrap_or(false) {
                // If not on a sysemu breakpoint
                let $stack = &mut *stack;
                $stack.interrupt_stack.scratch.rax = $code;

                if is_sysemu.is_some() {
                    // Only callback if there was a pre-syscall
                    // callback too.
                    ptrace::breakpoint_callback(false);
                }
            }

            {
                let contexts = context::contexts();
                if let Some(context) = contexts.current() {
                    let mut context = context.write();
                    context.regs = None;
                }
            }
        }
    }
}

62 63
#[naked]
pub unsafe extern fn syscall_instruction() {
64 65 66 67 68 69 70 71
    with_interrupt_stack! {
        unsafe fn inner(stack) -> usize {
            let rbp;
            asm!("" : "={rbp}"(rbp) : : : "intel", "volatile");

            let scratch = &stack.interrupt_stack.scratch;
            syscall::syscall(scratch.rax, scratch.rdi, scratch.rsi, scratch.rdx, scratch.r10, scratch.r8, rbp, stack)
        }
72 73 74
    }

    // Yes, this is magic. No, you don't need to understand
75
    asm!("
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
          swapgs                    // Set gs segment to TSS
          mov gs:[28], rsp          // Save userspace rsp
          mov rsp, gs:[4]           // Load kernel rsp
          push 5 * 8 + 3            // Push userspace data segment
          push qword ptr gs:[28]    // Push userspace rsp
          mov qword ptr gs:[28], 0  // Clear userspace rsp
          push r11                  // Push rflags
          push 4 * 8 + 3            // Push userspace code segment
          push rcx                  // Push userspace return pointer
          swapgs                    // Restore gs
          "
          :
          :
          :
          : "intel", "volatile");

92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    // Push scratch registers
    scratch_push!();
    asm!("push fs
         mov r11, 0x18
         mov fs, r11
         push rbx"
         : : : : "intel", "volatile");

    // Get reference to stack variables
    let rsp: usize;
    asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");

    // Map kernel
    pti::map();

    inner(rsp as *mut SyscallStack);

    // Unmap kernel
    pti::unmap();

    // Interrupt return
    asm!("pop rbx
         pop fs"
         : : : : "intel", "volatile");
    scratch_pop!();
    asm!("iretq" : : : : "intel", "volatile");
118
}
119

120 121
#[naked]
pub unsafe extern fn syscall() {
122 123 124 125 126 127 128 129
    with_interrupt_stack! {
        unsafe fn inner(stack) -> usize {
            let rbp;
            asm!("" : "={rbp}"(rbp) : : : "intel", "volatile");

            let scratch = &stack.interrupt_stack.scratch;
            syscall::syscall(scratch.rax, stack.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack)
        }
130 131
    }

132
    // Push scratch registers
133 134
    scratch_push!();
    asm!("push fs
135
         mov r11, 0x18
136 137
         mov fs, r11
         push rbx"
138
         : : : : "intel", "volatile");
139

140 141 142 143
    // Get reference to stack variables
    let rsp: usize;
    asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");

144 145 146
    // Map kernel
    pti::map();

147
    inner(rsp as *mut SyscallStack);
148 149 150 151

    // Unmap kernel
    pti::unmap();

152
    // Interrupt return
153 154 155 156 157
    asm!("pop rbx
         pop fs"
         : : : : "intel", "volatile");
    scratch_pop!();
    asm!("iretq" : : : : "intel", "volatile");
158 159
}

160 161 162
#[allow(dead_code)]
#[repr(packed)]
pub struct SyscallStack {
163
    pub rbx: usize,
164 165
    pub interrupt_stack: InterruptStack,

166 167 168
    // Will only be present if syscall is called from another ring
    pub rsp: usize,
    pub ss: usize,
169 170
}

171
#[naked]
172 173 174 175 176 177 178 179 180 181 182 183
pub unsafe extern "C" fn clone_ret() {
    // The C x86_64 ABI specifies that rbp is pushed to save the old
    // call frame. Popping rbp means we're using the parent's call
    // frame and thus will not only return from this function but also
    // from the function above this one.
    // When this is called, the stack should have been
    // interrupt->inner->syscall->clone
    // then changed to
    // interrupt->inner->clone_ret->clone
    // so this will return from "inner".

    asm!("pop rbp" : : : : "intel", "volatile");
184
}