From 67edfbfc42b040ac3701fee3ba3ef3f2dbabcc07 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 31 Dec 2021 15:15:22 +0100 Subject: [PATCH 01/44] Remove the unused USER_HEAP_PML4. --- src/arch/x86_64/consts.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs index 7df51adc..b7b7ccbc 100644 --- a/src/arch/x86_64/consts.rs +++ b/src/arch/x86_64/consts.rs @@ -42,12 +42,8 @@ /// Offset to user arguments pub const USER_ARG_OFFSET: usize = USER_OFFSET + PML4_SIZE/2; - /// Offset to user heap - pub const USER_HEAP_OFFSET: usize = USER_OFFSET + PML4_SIZE; - pub const USER_HEAP_PML4: usize = (USER_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - /// Offset to user grants - pub const USER_GRANT_OFFSET: usize = USER_HEAP_OFFSET + PML4_SIZE; + pub const USER_GRANT_OFFSET: usize = USER_OFFSET + PML4_SIZE; pub const USER_GRANT_PML4: usize = (USER_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; /// Offset to user stack -- GitLab From 846318e716d3fde2115a656823f9fa6dfb2bb880 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 2 Jan 2022 11:58:46 +0100 Subject: [PATCH 02/44] WIP: Attempt implementing fexec in userspace. --- Cargo.lock | 4 +- rmm | 2 +- src/arch/x86_64/paging/mod.rs | 7 +- src/context/context.rs | 9 - src/context/memory.rs | 5 +- src/lib.rs | 26 +- src/scheme/memory.rs | 37 +- src/scheme/sys/context.rs | 21 +- src/syscall/debug.rs | 6 +- src/syscall/mod.rs | 7 +- src/syscall/process.rs | 622 ++-------------------------------- syscall | 2 +- 12 files changed, 77 insertions(+), 671 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33f358d4..7c81df02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,9 +116,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.16" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] diff --git a/rmm b/rmm index 0944b179..9462df03 160000 --- a/rmm +++ b/rmm @@ -1 +1 @@ -Subproject commit 0944b17983223966e339a25f9328bdb77a59d5c7 +Subproject commit 9462df03e786312b6ce197cf56113d411412cbb2 diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index 347aebcf..51e35878 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -386,7 +386,10 @@ impl Page { } } - pub fn range_inclusive(start: Page, end: Page) -> PageIter { + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { + PageIter { start, end: r#final.next() } + } + pub fn range_exclusive(start: Page, end: Page) -> PageIter { PageIter { start, end } } @@ -406,7 +409,7 @@ impl Iterator for PageIter { type Item = Page; fn next(&mut self) -> Option { - if self.start <= self.end { + if self.start < self.end { let page = self.start; self.start = self.start.next(); Some(page) diff --git a/src/context/context.rs b/src/context/context.rs index 99a8e7f9..276626c9 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -226,12 +226,6 @@ pub struct Context { pub ksig: Option<(arch::Context, Option>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, - /// Executable image - pub image: Vec, - /// User stack - pub stack: Option, - /// User signal stack - pub sigstack: Option, /// User grants pub grants: Arc>, /// The name of the context @@ -338,9 +332,6 @@ impl Context { kstack: None, ksig: None, ksig_restore: false, - image: Vec::new(), - stack: None, - sigstack: None, grants: Arc::new(RwLock::new(UserGrants::default())), name: Arc::new(RwLock::new(String::new().into_boxed_str())), cwd: Arc::new(RwLock::new(String::new())), diff --git a/src/context/memory.rs b/src/context/memory.rs index 209d9080..65fa3f4a 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -77,7 +77,8 @@ impl UserGrants { // Get last used region let last = self.inner.iter().next_back().map(Region::from).unwrap_or(Region::new(VirtualAddress::new(0), 0)); // At the earliest, start at grant offset - let address = cmp::max(last.end_address().data(), crate::USER_GRANT_OFFSET); + // TODO + let address = last.start_address().data() - size; // Create new region Region::new(VirtualAddress::new(address), size) } @@ -224,7 +225,7 @@ impl Region { pub fn pages(&self) -> PageIter { Page::range_inclusive( Page::containing_address(self.start_address()), - Page::containing_address(self.end_address()) + Page::containing_address(self.final_address()) ) } diff --git a/src/lib.rs b/src/lib.rs index b1743892..5dcbcc81 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -172,33 +172,27 @@ static mut INIT_ENV: &[u8] = &[]; /// Initialize userspace by running the initfs:bin/init process /// This function will also set the CWD to initfs:bin and open debug: as stdio pub extern fn userspace_init() { - let path = "initfs:/bin/init"; - let env = unsafe { INIT_ENV }; + let path = "initfs:/bin/bootstrap"; if let Err(err) = syscall::chdir("initfs:") { info!("Failed to enter initfs ({}).", err); panic!("Unexpected error while trying to enter initfs:."); } - assert_eq!(syscall::open("debug:", syscall::flag::O_RDONLY).map(FileHandle::into), Ok(0)); - assert_eq!(syscall::open("debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(1)); - assert_eq!(syscall::open("debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(2)); - let fd = syscall::open(path, syscall::flag::O_RDONLY).expect("failed to open init"); - let mut args = Vec::new(); - args.push(path.as_bytes().to_vec().into_boxed_slice()); + let mut total_bytes_read = 0; + let mut data = Vec::new(); - let mut vars = Vec::new(); - for var in env.split(|b| *b == b'\n') { - if ! var.is_empty() { - vars.push(var.to_vec().into_boxed_slice()); - } + loop { + data.resize(total_bytes_read + 4096, 0); + let bytes_read = syscall::file_op_mut_slice(syscall::number::SYS_READ, fd, &mut data[total_bytes_read..]).expect("failed to read init"); + if bytes_read == 0 { break } + total_bytes_read += bytes_read; } + let _ = syscall::close(fd); - syscall::fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None, None).expect("failed to execute init"); - - panic!("init returned"); + crate::syscall::process::usermode_bootstrap(data.into_boxed_slice()); } /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 2636ebf8..cac3743d 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -17,34 +17,33 @@ impl MemoryScheme { pub fn fmap_anonymous(map: &Map) -> Result { //TODO: Abstract with other grant creation if map.size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + return Ok(0); + } + let contexts = context::contexts(); + let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut grants = context.grants.write(); - let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); + let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); - { - // Make sure it's *absolutely* not mapped already - // TODO: Keep track of all allocated memory so this isn't necessary + { + // Make sure it's *absolutely* not mapped already + // TODO: Keep track of all allocated memory so this isn't necessary - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(map.address).kind()) }; + let active_table = unsafe { ActivePageTable::new(rmm::TableKind::User) }; - for page in region.pages() { - if active_table.translate_page(page).is_some() { - println!("page at {:#x} was already mapped", page.start_address().data()); - return Err(Error::new(EEXIST)) - } + for page in region.pages() { + if active_table.translate_page(page).is_some() { + println!("page at {:#x} was already mapped", page.start_address().data()); + return Err(Error::new(EEXIST)) } } + } - grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); + grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); - Ok(region.start_address().data()) - } + Ok(region.start_address().data()) } } impl Scheme for MemoryScheme { diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index 3002f555..1a776a27 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -26,10 +26,12 @@ pub fn resource() -> Result> { let context = context_lock.read(); let mut stat_string = String::new(); - if context.stack.is_some() { - stat_string.push('U'); - } else { + // TODO: All user programs must have some grant in order for executable memory to even + // exist, but is this a good indicator of whether it is user or kernel? + if context.grants.read().is_empty() { stat_string.push('K'); + } else { + stat_string.push('U'); } match context.status { context::Status::Runnable => { @@ -77,19 +79,6 @@ pub fn resource() -> Result> { if let Some(ref kstack) = context.kstack { memory += kstack.len(); } - for shared_mem in context.image.iter() { - shared_mem.with(|mem| { - memory += mem.size(); - }); - } - if let Some(ref stack) = context.stack { - stack.with(|stack| { - memory += stack.size(); - }); - } - if let Some(ref sigstack) = context.sigstack { - memory += sigstack.size(); - } for grant in context.grants.read().iter() { if grant.is_owned() { memory += grant.size(); diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 4567058d..5f607857 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -192,8 +192,8 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - b ), //TODO: Cleanup, do not allocate - SYS_FEXEC => format!( - "fexec({}, {:?}, {:?})", + /*SYS_EXEC => format!( + "exec({}, {:?}, {:?})", b, validate_slice( c as *const [usize; 2], @@ -213,7 +213,7 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - .and_then(|s| ::core::str::from_utf8(s).ok()) ).collect::>>() }) - ), + ),*/ SYS_FUTEX => format!( "futex({:#X} [{:?}], {}, {}, {}, {})", b, diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index cb64c4c1..f4e73862 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -83,7 +83,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_DUP => dup(fd, validate_slice(c as *const u8, d)?).map(FileHandle::into), SYS_DUP2 => dup2(fd, FileHandle::from(c), validate_slice(d as *const u8, e)?).map(FileHandle::into), SYS_FCNTL => fcntl(fd, c, d), - SYS_FEXEC => fexec(fd, validate_slice(c as *const [usize; 2], d)?, validate_slice(e as *const [usize; 2], f)?), SYS_FRENAME => frename(fd, validate_str(c as *const u8, d)?), SYS_FUNMAP => funmap(b, c), SYS_FMAP_OLD => { @@ -210,13 +209,12 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - /* let debug = { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { let context = context_lock.read(); let name = context.name.read(); - if name.contains("redoxfs") { + if true || name.contains("redoxfs") { if a == SYS_CLOCK_GETTIME || a == SYS_YIELD { false } else if (a == SYS_WRITE || a == SYS_FSYNC) && (b == 1 || b == 2) { @@ -241,7 +239,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u println!("{}", debug::format_call(a, b, c, d, e, f)); } - */ // The next lines set the current syscall in the context struct, then once the inner() function // completes, we set the current syscall to none. @@ -266,7 +263,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - /* if debug { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { @@ -285,7 +281,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } } - */ // errormux turns Result into -errno Error::mux(result) diff --git a/src/syscall/process.rs b/src/syscall/process.rs index c246568d..11fd16b7 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -6,6 +6,7 @@ use alloc::{ vec::Vec, }; use core::alloc::{GlobalAlloc, Layout}; +use core::convert::TryFrom; use core::ops::DerefMut; use core::{intrinsics, mem, str}; use spin::{RwLock, RwLockWriteGuard}; @@ -53,9 +54,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { let mut kfx_opt = None; let mut kstack_opt = None; let mut offset = 0; - let mut image = vec![]; - let mut stack_opt = None; - let mut sigstack_opt = None; let mut grants; let name; let cwd; @@ -143,74 +141,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { } } - if flags.contains(CLONE_VM) { - for memory_shared in context.image.iter() { - image.push(memory_shared.clone()); - } - } else { - for memory_shared in context.image.iter() { - memory_shared.with(|memory| { - let mut new_memory = context::memory::Memory::new( - VirtualAddress::new(memory.start_address().data() + crate::USER_TMP_OFFSET), - memory.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(memory.start_address().data() as *const u8, - new_memory.start_address().data() as *mut u8, - memory.size()); - } - - new_memory.remap(memory.flags()); - image.push(new_memory.to_shared()); - }); - } - } - - if let Some(ref stack_shared) = context.stack { - if flags.contains(CLONE_STACK) { - stack_opt = Some(stack_shared.clone()); - } else { - stack_shared.with(|stack| { - let mut new_stack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_STACK_OFFSET), - stack.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(stack.start_address().data() as *const u8, - new_stack.start_address().data() as *mut u8, - stack.size()); - } - - new_stack.remap(stack.flags()); - stack_opt = Some(new_stack.to_shared()); - }); - } - } - - if let Some(ref sigstack) = context.sigstack { - let mut new_sigstack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_SIGSTACK_OFFSET), - sigstack.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(sigstack.start_address().data() as *const u8, - new_sigstack.start_address().data() as *mut u8, - sigstack.size()); - } - - new_sigstack.remap(sigstack.flags()); - sigstack_opt = Some(new_sigstack); - } - if flags.contains(CLONE_VM) { grants = Arc::clone(&context.grants); } else { @@ -438,70 +368,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { // TODO: Clone ksig? - // Setup image, heap, and grants - if flags.contains(CLONE_VM) { - // Copy user image mapping, if found - if ! image.is_empty() { - let frame = active_utable.p4()[crate::USER_PML4].pointed_frame().expect("user image not mapped"); - let flags = active_utable.p4()[crate::USER_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_PML4].set(frame, flags); - } - context.image = image; - - // Copy grant mapping - if ! grants.read().is_empty() { - let frame = active_utable.p4()[crate::USER_GRANT_PML4].pointed_frame().expect("user grants not mapped"); - let flags = active_utable.p4()[crate::USER_GRANT_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_GRANT_PML4].set(frame, flags); - } - context.grants = grants; - } else { - // Move copy of image - for memory_shared in image.iter_mut() { - memory_shared.with(|memory| { - let start = VirtualAddress::new(memory.start_address().data() - crate::USER_TMP_OFFSET + crate::USER_OFFSET); - memory.move_to(start, &mut new_utable); - }); - } - context.image = image; - - // Move grants - { - let mut grants = grants.write(); - let old_grants = mem::replace(&mut *grants, UserGrants::default()); - - for mut grant in old_grants.inner.into_iter() { - let start = VirtualAddress::new(grant.start_address().data() + crate::USER_GRANT_OFFSET - crate::USER_TMP_GRANT_OFFSET); - grant.move_to(start, &mut new_utable); - grants.insert(grant); - } - } - context.grants = grants; - } - - // Setup user stack - if let Some(stack_shared) = stack_opt { - if flags.contains(CLONE_STACK) { - let frame = active_utable.p4()[crate::USER_STACK_PML4].pointed_frame().expect("user stack not mapped"); - let flags = active_utable.p4()[crate::USER_STACK_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_STACK_PML4].set(frame, flags); - } else { - stack_shared.with(|stack| { - stack.move_to(VirtualAddress::new(crate::USER_STACK_OFFSET), &mut new_utable); - }); - } - context.stack = Some(stack_shared); - } - - // Setup user sigstack - if let Some(mut sigstack) = sigstack_opt { - sigstack.move_to(VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), &mut new_utable); - context.sigstack = Some(sigstack); - } - #[cfg(target_arch = "aarch64")] { if let Some(stack) = &mut context.kstack { @@ -553,18 +419,6 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { } fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGuard<'lock, Context>, reaping: bool) -> RwLockWriteGuard<'lock, Context> { - if reaping { - // Memory should already be unmapped - assert!(context.image.is_empty()); - assert!(context.stack.is_none()); - assert!(context.sigstack.is_none()); - } else { - // Unmap previous image, heap, grants, stack - context.image.clear(); - drop(context.stack.take()); - drop(context.sigstack.take()); - } - // NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the // main thread and another thread exit simultaneously before either one is reaped. If that // happens, then the last context that runs exit will think that there is still are still @@ -580,9 +434,7 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu let mut grants_arc = mem::take(&mut context.grants); if let Some(grants_lock_mut) = Arc::get_mut(&mut grants_arc) { - // TODO: Use get_mut to bypass the need to acquire a lock when there we already have an - // exclusive reference from `Arc::get_mut`. This will require updating `spin`. - let mut grants_guard = grants_lock_mut.write(); + let mut grants_guard = grants_lock_mut.get_mut(); let grants = mem::replace(&mut *grants_guard, UserGrants::default()); for grant in grants.inner.into_iter() { @@ -616,450 +468,6 @@ impl Drop for ExecFile { } } -#[allow(clippy::too_many_arguments)] -fn fexec_noreturn( - setuid: Option, - setgid: Option, - name: Box, - data: Box<[u8]>, - phdr_grant: context::memory::Grant, - args: Box<[Box<[u8]>]>, - vars: Box<[Box<[u8]>]>, - auxv: Box<[usize]>, -) -> ! { - let entry; - let singlestep; - let mut sp = crate::USER_STACK_OFFSET + crate::USER_STACK_SIZE - 256; - - { - let (vfork, ppid, files) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exec_noreturn pid not found"); - let mut context = context_lock.write(); - - singlestep = unsafe { - ptrace::regs_for(&context).map(|s| s.is_singlestep()).unwrap_or(false) - }; - - context.name = Arc::new(RwLock::new(name)); - - context = empty(&context_lock, context, false); - - context.grants.write().insert(phdr_grant); - - #[cfg(all(target_arch = "x86_64"))] - { - context.arch.fsbase = 0; - context.arch.gsbase = 0; - - #[cfg(feature = "x86_fsgsbase")] - unsafe { - x86::bits64::segmentation::wrfsbase(0); - x86::bits64::segmentation::swapgs(); - x86::bits64::segmentation::wrgsbase(0); - x86::bits64::segmentation::swapgs(); - } - #[cfg(not(feature = "x86_fsgsbase"))] - unsafe { - x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0); - x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); - } - } - - if let Some(uid) = setuid { - context.euid = uid; - } - - if let Some(gid) = setgid { - context.egid = gid; - } - - // Map and copy new segments - { - let elf = elf::Elf::from(&data).unwrap(); - entry = elf.entry(); - - for segment in elf.segments() { - match segment.p_type { - program_header::PT_LOAD => { - let voff = segment.p_vaddr as usize % PAGE_SIZE; - let vaddr = segment.p_vaddr as usize - voff; - - let mut memory = context::memory::Memory::new( - VirtualAddress::new(vaddr), - segment.p_memsz as usize + voff, - PageFlags::new().write(true), - true - ); - - unsafe { - // Copy file data - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - segment.p_vaddr as *mut u8, - segment.p_filesz as usize); - } - - let mut flags = PageFlags::new().user(true); - - // W ^ X. If it is executable, do not allow it to be writable, even if requested - if segment.p_flags & program_header::PF_X == program_header::PF_X { - flags = flags.execute(true); - } else if segment.p_flags & program_header::PF_W == program_header::PF_W { - flags = flags.write(true); - } - - memory.remap(flags); - - context.image.push(memory.to_shared()); - }, - _ => (), - } - } - } - - // Map stack - context.stack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_STACK_OFFSET), - crate::USER_STACK_SIZE, - PageFlags::new().write(true).user(true), - true - ).to_shared()); - - // Map stack - context.sigstack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), - crate::USER_SIGSTACK_SIZE, - PageFlags::new().write(true).user(true), - true - )); - - // Data no longer required, can deallocate - drop(data); - - let mut push = |arg| { - sp -= mem::size_of::(); - unsafe { *(sp as *mut usize) = arg; } - }; - - // Push auxiliary vector - push(AT_NULL); - for &arg in auxv.iter().rev() { - push(arg); - } - - drop(auxv); // no longer required - - let mut arg_size = 0; - - // Push environment variables and arguments - for iter in &[&vars, &args] { - // Push null-terminator - push(0); - - // Push pointer to content - for arg in iter.iter().rev() { - push(crate::USER_ARG_OFFSET + arg_size); - arg_size += arg.len() + 1; - } - } - - // For some reason, Linux pushes the argument count here (in - // addition to being null-terminated), but not the environment - // variable count. - // TODO: Push more counts? Less? Stop having null-termination? - push(args.len()); - - // Write environment and argument pointers to USER_ARG_OFFSET - if arg_size > 0 { - let mut memory = context::memory::Memory::new( - VirtualAddress::new(crate::USER_ARG_OFFSET), - arg_size, - PageFlags::new().write(true), - true - ); - - let mut arg_offset = 0; - for arg in vars.iter().rev().chain(args.iter().rev()) { - unsafe { - intrinsics::copy(arg.as_ptr(), - (crate::USER_ARG_OFFSET + arg_offset) as *mut u8, - arg.len()); - } - arg_offset += arg.len(); - - unsafe { - *((crate::USER_ARG_OFFSET + arg_offset) as *mut u8) = 0; - } - arg_offset += 1; - } - - memory.remap(PageFlags::new().user(true)); - - context.image.push(memory.to_shared()); - } - - // Args and vars no longer required, can deallocate - drop(args); - drop(vars); - - context.actions = Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])); - - let vfork = context.vfork; - context.vfork = false; - - let files = Arc::clone(&context.files); - - (vfork, context.ppid, files) - }; - - for (_fd, file_opt) in files.write().iter_mut().enumerate() { - let mut cloexec = false; - if let Some(ref file) = *file_opt { - if file.cloexec { - cloexec = true; - } - } - - if cloexec { - let _ = file_opt.take().unwrap().close(); - } - } - - if vfork { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.get(ppid) { - let mut context = context_lock.write(); - if ! context.unblock() { - println!("{} not blocked for exec vfork unblock", ppid.into()); - } - } else { - println!("{} not found for exec vfork unblock", ppid.into()); - } - } - } - - // Go to usermode - unsafe { usermode(entry, sp, 0, usize::from(singlestep)) } -} - -pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>]>, name_override_opt: Option>, auxv: Option<(Vec, context::memory::Grant)>) -> Result { - let (uid, gid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.euid, context.egid) - }; - - let mut stat: Stat; - let name: String; - let mut data: Vec; - { - let file = ExecFile(fd); - - stat = Stat::default(); - syscall::file_op_mut_slice(syscall::number::SYS_FSTAT, file.0, &mut stat)?; - - let mut perm = stat.st_mode & 0o7; - if stat.st_uid == uid { - perm |= (stat.st_mode >> 6) & 0o7; - } - if stat.st_gid == gid { - perm |= (stat.st_mode >> 3) & 0o7; - } - if uid == 0 { - perm |= 0o7; - } - - if perm & 0o1 != 0o1 { - return Err(Error::new(EACCES)); - } - - if let Some(name_override) = name_override_opt { - name = String::from(name_override); - } else { - let mut name_bytes = vec![0; 4096]; - let len = syscall::file_op_mut_slice(syscall::number::SYS_FPATH, file.0, &mut name_bytes)?; - name_bytes.truncate(len); - name = match String::from_utf8(name_bytes) { - Ok(ok) => ok, - Err(_err) => { - //TODO: print error? - return Err(Error::new(EINVAL)); - } - }; - } - - //TODO: Only read elf header, not entire file. Then read required segments - data = vec![0; stat.st_size as usize]; - syscall::file_op_mut_slice(syscall::number::SYS_READ, file.0, &mut data)?; - drop(file); - } - - // Set UID and GID are determined after resolving any hashbangs - let setuid = if stat.st_mode & syscall::flag::MODE_SETUID == syscall::flag::MODE_SETUID { - Some(stat.st_uid) - } else { - None - }; - - let setgid = if stat.st_mode & syscall::flag::MODE_SETGID == syscall::flag::MODE_SETGID { - Some(stat.st_gid) - } else { - None - }; - - // The argument list is limited to avoid using too much userspace stack - // This check is done last to allow all hashbangs to be resolved - // - // This should be based on the size of the userspace stack, divided - // by the cost of each argument, which should be usize * 2, with - // one additional argument added to represent the total size of the - // argument pointer array and potential padding - // - // A limit of 4095 would mean a stack of (4095 + 1) * 8 * 2 = 65536, or 64KB - if (args.len() + vars.len()) > 4095 { - return Err(Error::new(E2BIG)); - } - - let elf = match elf::Elf::from(&data) { - Ok(elf) => elf, - Err(err) => { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - println!( - "{}: {}: fexec failed to execute {}: {}", - context.id.into(), - *context.name.read(), - fd.into(), - err - ); - } - return Err(Error::new(ENOEXEC)); - } - }; - - // `fexec_kernel` can recurse if an interpreter is found. We get the - // auxiliary vector from the first invocation, which is passed via an - // argument, or if this is the first one we create it. - let (auxv, phdr_grant) = if let Some((auxv, phdr_grant)) = auxv { - (auxv, phdr_grant) - } else { - let phdr_grant = match context::contexts().current().ok_or(Error::new(ESRCH))?.read().grants.write() { - grants => { - let size = elf.program_headers_size() * elf.program_header_count(); - let aligned_size = (size + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE; - - if aligned_size > MAX_PHDRS_SIZE { - return Err(Error::new(ENOMEM)); - } - - let phdrs_region = grants.find_free(aligned_size); - let grant = context::memory::Grant::map(phdrs_region.start_address(), aligned_size, PageFlags::new().write(true).user(true)); - - unsafe { - let dst = core::slice::from_raw_parts_mut(grant.start_address().data() as *mut u8, aligned_size); - dst[..size].copy_from_slice(&data[elf.program_headers()..elf.program_headers() + elf.program_headers_size() * elf.program_header_count()]); - } - - grant - } - }; - let mut auxv = Vec::with_capacity(3); - - auxv.push(AT_ENTRY); - auxv.push(elf.entry()); - auxv.push(AT_PHDR); - auxv.push(phdr_grant.start_address().data()); - auxv.push(AT_PHENT); - auxv.push(elf.program_headers_size()); - auxv.push(AT_PHNUM); - auxv.push(elf.program_header_count()); - - (auxv, phdr_grant) - }; - - // We check the validity of all loadable sections here - for segment in elf.segments() { - match segment.p_type { - program_header::PT_INTERP => { - //TODO: length restraint, parse interp earlier - let mut interp = vec![0; segment.p_memsz as usize]; - unsafe { - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - interp.as_mut_ptr(), - segment.p_filesz as usize); - } - - let mut i = 0; - while i < interp.len() { - if interp[i] == 0 { - break; - } - i += 1; - } - interp.truncate(i); - - let interp_str = str::from_utf8(&interp).map_err(|_| Error::new(EINVAL))?; - - let interp_fd = super::fs::open(interp_str, super::flag::O_RDONLY | super::flag::O_CLOEXEC)?; - - let mut args_vec = Vec::from(args); - //TODO: pass file handle in auxv - let name_override = name.into_boxed_str(); - args_vec[0] = name_override.clone().into(); - - // Drop variables, since fexec_kernel probably won't return - drop(elf); - drop(interp); - - return fexec_kernel( - interp_fd, - args_vec.into_boxed_slice(), - vars, - Some(name_override), - Some((auxv, phdr_grant)), - ); - }, - _ => (), - } - } - - // This is the point of no return, quite literaly. Any checks for validity need - // to be done before, and appropriate errors returned. Otherwise, we have nothing - // to return to. - fexec_noreturn(setuid, setgid, name.into_boxed_str(), data.into_boxed_slice(), phdr_grant, args, vars, auxv.into_boxed_slice()); -} -const MAX_PHDRS_SIZE: usize = PAGE_SIZE; - -pub fn fexec(fd: FileHandle, arg_ptrs: &[[usize; 2]], var_ptrs: &[[usize; 2]]) -> Result { - let mut args = Vec::new(); - for arg_ptr in arg_ptrs { - let arg = validate_slice(arg_ptr[0] as *const u8, arg_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - args.push(arg.to_vec().into_boxed_slice()); - } - - let mut vars = Vec::new(); - for var_ptr in var_ptrs { - let var = validate_slice(var_ptr[0] as *const u8, var_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - vars.push(var.to_vec().into_boxed_slice()); - } - - // Neither arg_ptrs nor var_ptrs should be used after this point, the kernel - // now has owned copies in args and vars - - fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None, None) -} - pub fn exit(status: usize) -> ! { ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status))); @@ -1629,3 +1037,29 @@ pub fn waitpid(pid: ContextId, status_ptr: usize, flags: WaitFlags) -> Result) -> ! { + assert!(!data.is_empty()); + + const LOAD_BASE: usize = 0; + let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), data.len(), PageFlags::new().user(true).write(true).execute(true)); + + let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + + for (index, page) in grant.pages().enumerate() { + let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; + let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); + unsafe { ((frame.start_address().data() + crate::KERNEL_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } + } + context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); + + drop(data); + + #[cfg(target_arch = "x86_64")] + unsafe { + let start = ((LOAD_BASE + 0x18) as *mut usize).read(); + // Start with the (probably) ELF executable loaded, without any stack the ability to load + // sections to arbitrary addresses. + crate::arch::start::usermode(start, 0, 0, 0); + } +} diff --git a/syscall b/syscall index 0c98fbd1..d6af2661 160000 --- a/syscall +++ b/syscall @@ -1 +1 @@ -Subproject commit 0c98fbd16212282aeb3db17c991472885a9b79be +Subproject commit d6af266119e7b4a3b0e9a04c63b3cfcfac94781a -- GitLab From f7f722f81cbe3d27e449b8e246edeedb528dc45c Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 21 Jan 2022 21:38:03 +0100 Subject: [PATCH 03/44] Don't use identity mapping for ACPI. This is so that any process can use pointers to ACPI tables, since they now point to the universally-accessible KERNEL_OFFSET+physaddr virtual addresses. --- src/acpi/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index c75c78b4..45c384df 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -30,23 +30,23 @@ mod rsdp; pub fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'static Sdt { { - let page = Page::containing_address(VirtualAddress::new(sdt_address)); + let page = Page::containing_address(VirtualAddress::new(sdt_address + crate::KERNEL_OFFSET)); if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data())); + let frame = Frame::containing_address(PhysicalAddress::new(sdt_address)); let result = active_table.map_to(page, frame, PageFlags::new()); result.flush(); } } - let sdt = unsafe { &*(sdt_address as *const Sdt) }; + let sdt = unsafe { &*((sdt_address + crate::KERNEL_OFFSET) as *const Sdt) }; // Map extra SDT frames if required { - let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096)); - let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize)); + let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096 + crate::KERNEL_OFFSET)); + let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize + crate::KERNEL_OFFSET)); for page in Page::range_inclusive(start_page, end_page) { if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data())); + let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - crate::KERNEL_OFFSET)); let result = active_table.map_to(page, frame, PageFlags::new()); result.flush(); } @@ -125,7 +125,7 @@ pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: O rxsdt.map_all(active_table); for sdt_address in rxsdt.iter() { - let sdt = &*(sdt_address as *const Sdt); + let sdt = &*((sdt_address + crate::KERNEL_OFFSET) as *const Sdt); let signature = get_sdt_signature(sdt); if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) { -- GitLab From e6e13480724077f8faa21ab3ad94648fd3d0d47b Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 21 Jan 2022 21:44:12 +0100 Subject: [PATCH 04/44] Implement exec, and change UserGrant allocator. --- src/arch/x86_64/interrupt/trace.rs | 4 +- src/arch/x86_64/paging/mod.rs | 6 +- src/arch/x86_64/paging/table.rs | 6 +- src/context/context.rs | 2 +- src/context/list.rs | 8 +- src/context/memory.rs | 489 +++++++++++++---------------- src/debugger.rs | 15 - src/lib.rs | 2 + src/scheme/memory.rs | 4 +- src/scheme/proc.rs | 16 + src/scheme/sys/mod.rs | 1 + src/scheme/user.rs | 11 +- src/syscall/debug.rs | 30 +- src/syscall/mod.rs | 14 +- src/syscall/process.rs | 225 +++++++++---- 15 files changed, 438 insertions(+), 395 deletions(-) diff --git a/src/arch/x86_64/interrupt/trace.rs b/src/arch/x86_64/interrupt/trace.rs index b8eb820d..1b72260d 100644 --- a/src/arch/x86_64/interrupt/trace.rs +++ b/src/arch/x86_64/interrupt/trace.rs @@ -16,7 +16,9 @@ pub unsafe fn stack_trace() { let active_table = ActivePageTable::new(TableKind::User); for _frame in 0..64 { if let Some(rip_rbp) = rbp.checked_add(mem::size_of::()) { - if active_table.translate(VirtualAddress::new(rbp)).is_some() && active_table.translate(VirtualAddress::new(rip_rbp)).is_some() { + let rbp_virt = VirtualAddress::new(rbp); + let rip_rbp_virt = VirtualAddress::new(rip_rbp); + if rbp_virt.is_canonical() && rip_rbp_virt.is_canonical() && active_table.translate(rbp_virt).is_some() && active_table.translate(rip_rbp_virt).is_some() { let rip = *(rip_rbp as *const usize); if rip == 0 { println!(" {:>016X}: EMPTY RETURN", rbp); diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index 51e35878..cf732b93 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -311,7 +311,6 @@ impl InactivePageTable { /// For this to be safe, the caller must have exclusive access to the corresponding virtual /// address of the frame. pub unsafe fn new( - _active_table: &mut ActivePageTable, frame: Frame, ) -> InactivePageTable { // FIXME: Use active_table to ensure that the newly-allocated frame be linearly mapped, in @@ -394,8 +393,11 @@ impl Page { } pub fn next(self) -> Page { + self.next_by(1) + } + pub fn next_by(self, n: usize) -> Page { Self { - number: self.number + 1, + number: self.number + n, } } } diff --git a/src/arch/x86_64/paging/table.rs b/src/arch/x86_64/paging/table.rs index 9e907d35..605e078c 100644 --- a/src/arch/x86_64/paging/table.rs +++ b/src/arch/x86_64/paging/table.rs @@ -46,11 +46,7 @@ pub struct Table { impl Table where L: TableLevel { pub fn is_unused(&self) -> bool { - if self.entry_count() > 0 { - return false; - } - - true + self.entry_count() == 0 } pub fn zero(&mut self) { diff --git a/src/context/context.rs b/src/context/context.rs index 276626c9..71a21d05 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -16,7 +16,7 @@ use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE}; use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::{UserGrants, Memory, SharedMemory}; +use crate::context::memory::UserGrants; use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::scheme::{SchemeNamespace, FileHandle}; use crate::sync::WaitMap; diff --git a/src/context/list.rs b/src/context/list.rs index e37b4b95..f1a9b541 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -100,9 +100,13 @@ impl ContextList { context.arch.set_context_handle(); } - context.arch.set_page_utable(unsafe { ActivePageTable::new(TableKind::User).address() }); + let mut new_tables = super::memory::setup_new_utable()?; + new_tables.take(); + + context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { ActivePageTable::new(TableKind::Kernel).address() }); + context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); + context.arch.set_fx(fx.as_ptr() as usize); context.arch.set_stack(stack.as_ptr() as usize + offset); context.kfx = Some(fx); diff --git a/src/context/memory.rs b/src/context/memory.rs index 65fa3f4a..802c49bd 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -4,19 +4,20 @@ use core::borrow::Borrow; use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; use core::intrinsics; -use core::ops::{Deref, DerefMut}; +use core::ops::Deref; use spin::Mutex; use syscall::{ flag::MapFlags, error::*, }; +use rmm::Arch as _; use crate::arch::paging::PAGE_SIZE; use crate::context::file::FileDescriptor; use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::Frame; use crate::paging::mapper::PageFlushAll; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, VirtualAddress}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, TableKind, VirtualAddress}; /// Round down to the nearest multiple of page size pub fn round_down_pages(number: usize) -> usize { @@ -46,14 +47,31 @@ impl Drop for UnmapResult { } } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct UserGrants { - pub inner: BTreeSet, + inner: BTreeSet, + holes: BTreeMap, + // TODO: Would an additional map ordered by (size,start) to allow for O(log n) allocations be + // beneficial? + //TODO: technically VirtualAddress is from a scheme's context! pub funmap: BTreeMap, } +impl Default for UserGrants { + fn default() -> Self { + Self::new() + } +} + impl UserGrants { + pub fn new() -> Self { + Self { + inner: BTreeSet::new(), + holes: core::iter::once((VirtualAddress::new(0), crate::PML4_SIZE * 256)).collect::>(), + funmap: BTreeMap::new(), + } + } /// Returns the grant, if any, which occupies the specified address pub fn contains(&self, address: VirtualAddress) -> Option<&Grant> { let byte = Region::byte(address); @@ -73,28 +91,30 @@ impl UserGrants { .take_while(move |region| !region.intersect(requested).is_empty()) } /// Return a free region with the specified size - pub fn find_free(&self, size: usize) -> Region { - // Get last used region - let last = self.inner.iter().next_back().map(Region::from).unwrap_or(Region::new(VirtualAddress::new(0), 0)); - // At the earliest, start at grant offset - // TODO - let address = last.start_address().data() - size; + // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). + pub fn find_free(&self, size: usize) -> Option { + // Get first available hole, but do reserve the page starting from zero as most compiled + // language cannot handle null pointers safely even if they do point to valid memory. If an + // application absolutely needs to map the 0th page, they will have to do so explicitly via + // MAP_FIXED/MAP_FIXED_NOREPLACE. + + let (hole_start, hole_size) = self.holes.iter().find(|(hole_offset, hole_size)| size <= if hole_offset.data() == 0 { hole_size.saturating_sub(PAGE_SIZE) } else { **hole_size })?; // Create new region - Region::new(VirtualAddress::new(address), size) + Some(Region::new(VirtualAddress::new(cmp::max(hole_start.data(), PAGE_SIZE)), size)) } /// Return a free region, respecting the user's hinted address and flags. Address may be null. pub fn find_free_at(&mut self, address: VirtualAddress, size: usize, flags: MapFlags) -> Result { if address == VirtualAddress::new(0) { // Free hands! - return Ok(self.find_free(size)); + return self.find_free(size).ok_or(Error::new(ENOMEM)); } // The user wished to have this region... let mut requested = Region::new(address, size); if - requested.end_address().data() >= crate::PML4_SIZE * 256 // There are 256 PML4 entries reserved for userspace - && address.data() % PAGE_SIZE != 0 + requested.end_address().data() > crate::PML4_SIZE * 256 // There are 256 PML4 entries reserved for userspace + || address.data() % PAGE_SIZE != 0 { // ... but it was invalid return Err(Error::new(EINVAL)); @@ -111,22 +131,76 @@ impl UserGrants { return Err(Error::new(EOPNOTSUPP)); } else { // TODO: Find grant close to requested address? - requested = self.find_free(requested.size()); + requested = self.find_free(requested.size()).ok_or(Error::new(ENOMEM))?; } } Ok(requested) } -} -impl Deref for UserGrants { - type Target = BTreeSet; - fn deref(&self) -> &Self::Target { - &self.inner + fn reserve(&mut self, grant: &Region) { + let previous_hole = self.holes.range_mut(..grant.start_address()).next_back(); + + if let Some((hole_offset, hole_size)) = previous_hole { + let prev_hole_end = hole_offset.data() + *hole_size; + + // Note that prev_hole_end cannot exactly equal grant.start_address, since that would + // imply there is another grant at that position already, as it would otherwise have + // been larger. + + if prev_hole_end > grant.start_address().data() { + // hole_offset must be below (but never equal to) the start address due to the + // `..grant.start_address()` limit; hence, all we have to do is to shrink the + // previous offset. + *hole_size = grant.start_address().data() - hole_offset.data(); + } + if prev_hole_end > grant.end_address().data() { + // The grant is splitting this hole in two, so insert the new one at the end. + self.holes.insert(grant.end_address(), prev_hole_end - grant.end_address().data()); + } + } + + // Next hole + if let Some(hole_size) = self.holes.remove(&grant.start_address()) { + let remainder = hole_size - grant.size(); + if remainder > 0 { + self.holes.insert(grant.end_address(), remainder); + } + } } -} -impl DerefMut for UserGrants { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner + fn unreserve(&mut self, grant: &Region) { + // The size of any possible hole directly after the to-be-freed region. + let exactly_after_size = self.holes.remove(&grant.end_address()); + + // There was a range that began exactly prior to the to-be-freed region, so simply + // increment the size such that it occupies the grant too. If in additional there was a + // grant directly after the grant, include it too in the size. + if let Some((hole_offset, hole_size)) = self.holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { + *hole_size = grant.end_address().data() - hole_offset.data() + exactly_after_size.unwrap_or(0); + } else { + // There was no free region directly before the to-be-freed region, however will + // now unconditionally insert a new free region where the grant was, and add that extra + // size if there was something after it. + self.holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); + } + } + pub fn insert(&mut self, grant: Grant) { + self.reserve(&grant); + self.inner.insert(grant); + } + pub fn remove(&mut self, region: &Region) -> bool { + self.take(region).is_some() + } + pub fn take(&mut self, region: &Region) -> Option { + let grant = self.inner.take(region)?; + self.unreserve(region); + Some(grant) + } + pub fn iter(&self) -> impl Iterator + '_ { + self.inner.iter() + } + pub fn is_empty(&self) -> bool { self.inner.is_empty() } + pub fn into_iter(self) -> impl Iterator { + self.inner.into_iter() } } @@ -223,9 +297,9 @@ impl Region { /// Return all pages containing a chunk of the region pub fn pages(&self) -> PageIter { - Page::range_inclusive( + Page::range_exclusive( Page::containing_address(self.start_address()), - Page::containing_address(self.final_address()) + Page::containing_address(self.end_address()) ) } @@ -382,6 +456,15 @@ impl Grant { desc_opt: None, } } + pub fn zeroed_inactive(dst: Page, page_count: usize, flags: PageFlags, table: &mut InactivePageTable) -> Result { + let mut inactive_mapper = table.mapper(); + + for page in Page::range_exclusive(dst, dst.next_by(page_count)) { + let flush = inactive_mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?; + unsafe { flush.ignore(); } + } + Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None }) + } pub fn map_inactive(src: VirtualAddress, dst: VirtualAddress, size: usize, flags: PageFlags, desc_opt: Option, inactive_table: &mut InactivePageTable) -> Grant { let active_table = unsafe { ActivePageTable::new(src.kind()) }; @@ -418,55 +501,44 @@ impl Grant { } /// This function should only be used in clone! - pub fn secret_clone(&self, new_start: VirtualAddress) -> Grant { + pub(crate) fn secret_clone(&self, inactive_table: &mut InactivePageTable) -> Grant { assert!(self.mapped); - let mut active_table = unsafe { ActivePageTable::new(new_start.kind()) }; - - let flush_all = PageFlushAll::new(); + let active_table = unsafe { ActivePageTable::new(TableKind::User) }; + let mut inactive_mapper = inactive_table.mapper(); - let start_page = Page::containing_address(self.region.start); - let end_page = Page::containing_address(VirtualAddress::new(self.region.start.data() + self.region.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { + for page in self.pages() { //TODO: One function to do both? let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let frame = active_table.translate_page(page).expect("grant references unmapped memory"); + let old_frame = active_table.translate_page(page).expect("grant references unmapped memory"); - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.region.start.data() + new_start.data())); - if self.owned { - let result = active_table.map(new_page, PageFlags::new().write(true)) + let frame = if self.owned { + // TODO: CoW paging + let new_frame = crate::memory::allocate_frames(1) .expect("TODO: handle ENOMEM in Grant::secret_clone"); - flush_all.consume(result); - } else { - let result = active_table.map_to(new_page, frame, flags); - flush_all.consume(result); - } - } - - flush_all.flush(); - if self.owned { - unsafe { - intrinsics::copy(self.region.start.data() as *const u8, new_start.data() as *mut u8, self.region.size); - } - - let flush_all = PageFlushAll::new(); - - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); + unsafe { + // We might as well use self.start_address() directly, but if we were to + // introduce SMAP it would help to only move to/from kernel memory, and we are + // copying physical frames anyway. + let src_pointer = RmmA::phys_to_virt(old_frame.start_address()).data() as *const u8; + let dst_pointer = RmmA::phys_to_virt(new_frame.start_address()).data() as *mut u8; + dst_pointer.copy_from_nonoverlapping(src_pointer, PAGE_SIZE); + } - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.region.start.data() + new_start.data())); - let result = active_table.remap(new_page, flags); - flush_all.consume(result); - } + new_frame + } else { + old_frame + }; - flush_all.flush(); + let flush = inactive_mapper.map_to(page, frame, flags); + // SAFETY: This happens within an inactive table. + unsafe { flush.ignore() } } Grant { region: Region { - start: new_start, + start: self.region.start, size: self.region.size, }, flags: self.flags, @@ -476,32 +548,6 @@ impl Grant { } } - pub fn move_to(&mut self, new_start: VirtualAddress, new_table: &mut InactivePageTable) { - assert!(self.mapped); - - let mut active_table = unsafe { ActivePageTable::new(new_start.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(self.region.start); - let end_page = Page::containing_address(VirtualAddress::new(self.region.start.data() + self.region.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let (result, frame) = active_table.unmap_return(page, false); - flush_all.consume(result); - - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.region.start.data() + new_start.data())); - let result = new_table.mapper().map_to(new_page, frame, flags); - // Ignore result due to mapping on inactive table - unsafe { result.ignore(); } - } - - flush_all.flush(); - - self.region.start = new_start; - } - pub fn flags(&self) -> PageFlags { self.flags } @@ -511,12 +557,9 @@ impl Grant { let mut active_table = unsafe { ActivePageTable::new(self.start_address().kind()) }; - let flush_all = PageFlushAll::new(); - let start_page = Page::containing_address(self.start_address()); - let end_page = Page::containing_address(self.final_address()); - for page in Page::range_inclusive(start_page, end_page) { + for page in self.pages() { let (result, frame) = active_table.unmap_return(page, false); if self.owned { //TODO: make sure this frame can be safely freed, physical use counter @@ -533,13 +576,11 @@ impl Grant { UnmapResult { file_desc: self.desc_opt.take() } } - pub fn unmap_inactive(mut self, new_table: &mut InactivePageTable) -> UnmapResult { + pub fn unmap_inactive(mut self, other_table: &mut InactivePageTable) -> UnmapResult { assert!(self.mapped); - let start_page = Page::containing_address(self.start_address()); - let end_page = Page::containing_address(self.final_address()); - for page in Page::range_inclusive(start_page, end_page) { - let (result, frame) = new_table.mapper().unmap_return(page, false); + for page in self.pages() { + let (result, frame) = other_table.mapper().unmap_return(page, false); if self.owned { //TODO: make sure this frame can be safely freed, physical use counter crate::memory::deallocate_frames(frame, 1); @@ -594,6 +635,34 @@ impl Grant { Some((before_grant, self, after_grant)) } + pub fn move_to_address_space(&mut self, new_start: Page, new_page_table: &mut InactivePageTable, flags: PageFlags, flush_all: &mut PageFlushAll) -> Grant { + assert!(self.mapped); + + let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + let mut new_mapper = new_page_table.mapper(); + let keep_parents = false; + + for (i, page) in self.pages().enumerate() { + unsafe { + let (flush, frame) = active_table.unmap_return(page, keep_parents); + flush_all.consume(flush); + + let flush = new_mapper.map_to(new_start.next_by(i), frame, flags); + flush.ignore(); + } + } + + let was_owned = core::mem::replace(&mut self.owned, false); + self.mapped = false; + + Self { + region: Region::new(new_start.start_address(), self.region.size), + flags, + mapped: true, + owned: was_owned, + desc_opt: self.desc_opt.clone(), + } + } } impl Deref for Grant { @@ -632,202 +701,84 @@ impl Drop for Grant { } } -#[derive(Clone, Debug)] -pub enum SharedMemory { - Owned(Arc>), - Borrowed(Weak>) -} - -impl SharedMemory { - pub fn with(&self, f: F) -> T where F: FnOnce(&mut Memory) -> T { - match *self { - SharedMemory::Owned(ref memory_lock) => { - let mut memory = memory_lock.lock(); - f(&mut *memory) - }, - SharedMemory::Borrowed(ref memory_weak) => { - let memory_lock = memory_weak.upgrade().expect("SharedMemory::Borrowed no longer valid"); - let mut memory = memory_lock.lock(); - f(&mut *memory) - } - } - } +pub const DANGLING: usize = 1 << (usize::BITS - 2); - pub fn borrow(&self) -> SharedMemory { - match *self { - SharedMemory::Owned(ref memory_lock) => SharedMemory::Borrowed(Arc::downgrade(memory_lock)), - SharedMemory::Borrowed(ref memory_lock) => SharedMemory::Borrowed(memory_lock.clone()) - } - } -} +pub struct NewTables { + #[cfg(target_arch = "aarch64")] + pub new_ktable: InactivePageTable, + pub new_utable: InactivePageTable, -#[derive(Debug)] -pub struct Memory { - start: VirtualAddress, - size: usize, - flags: PageFlags, + taken: bool, } - -impl Memory { - pub fn new(start: VirtualAddress, size: usize, flags: PageFlags, clear: bool) -> Self { - let mut memory = Memory { - start, - size, - flags, - }; - - memory.map(clear); - - memory - } - - pub fn to_shared(self) -> SharedMemory { - SharedMemory::Owned(Arc::new(Mutex::new(self))) - } - - pub fn start_address(&self) -> VirtualAddress { - self.start - } - - pub fn size(&self) -> usize { - self.size - } - - pub fn flags(&self) -> PageFlags { - self.flags - } - - pub fn pages(&self) -> PageIter { - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.data() + self.size - 1)); - Page::range_inclusive(start_page, end_page) - } - - fn map(&mut self, clear: bool) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let result = active_table - .map(page, self.flags) - .expect("TODO: handle ENOMEM in Memory::map"); - flush_all.consume(result); - } - - flush_all.flush(); - - if clear { - assert!(self.flags.has_write()); - unsafe { - intrinsics::write_bytes(self.start_address().data() as *mut u8, 0, self.size); - } - } +impl NewTables { + pub fn take(&mut self) { + self.taken = true; } +} - fn unmap(&mut self) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; +impl Drop for NewTables { + fn drop(&mut self) { + if self.taken { return } - let flush_all = PageFlushAll::new(); + unsafe { + use crate::memory::deallocate_frames; + deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_utable.address())), 1); - for page in self.pages() { - let result = active_table.unmap(page); - flush_all.consume(result); + #[cfg(target_arch = "aarch64")] + deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_ktable.address())), 1); } - - flush_all.flush(); } +} - /// A complicated operation to move a piece of memory to a new page table - /// It also allows for changing the address at the same time - pub fn move_to(&mut self, new_start: VirtualAddress, new_table: &mut InactivePageTable) { - let mut inactive_mapper = new_table.mapper(); +/// Allocates a new identically mapped ktable and empty utable (same memory on x86_64). +pub fn setup_new_utable() -> Result { + let mut new_utable = unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) }; - let mut active_table = unsafe { ActivePageTable::new(new_start.kind()) }; + let mut new_ktable = if cfg!(target_arch = "aarch64") { + unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) } + } else { + unsafe { InactivePageTable::from_address(new_utable.address()) } + }; - let flush_all = PageFlushAll::new(); + let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - for page in self.pages() { - let (result, frame) = active_table.unmap_return(page, false); - flush_all.consume(result); - - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.start.data() + new_start.data())); - let result = inactive_mapper.map_to(new_page, frame, self.flags); - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } - } + // Copy kernel image mapping + { + let frame = active_ktable.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped"); + let flags = active_ktable.p4()[crate::KERNEL_PML4].flags(); - flush_all.flush(); - - self.start = new_start; + new_ktable.mapper().p4_mut()[crate::KERNEL_PML4].set(frame, flags); } - pub fn remap(&mut self, new_flags: PageFlags) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let result = active_table.remap(page, new_flags); - flush_all.consume(result); - } - - flush_all.flush(); + // Copy kernel heap mapping + { + let frame = active_ktable.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped"); + let flags = active_ktable.p4()[crate::KERNEL_HEAP_PML4].flags(); - self.flags = new_flags; + new_ktable.mapper().p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags); } - pub fn resize(&mut self, new_size: usize, clear: bool) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; - - //TODO: Calculate page changes to minimize operations - if new_size > self.size { - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(VirtualAddress::new(self.start.data() + self.size)); - let end_page = Page::containing_address(VirtualAddress::new(self.start.data() + new_size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { - let result = active_table - .map(page, self.flags) - .expect("TODO: Handle OOM in Memory::resize"); - flush_all.consume(result); - } - } - - flush_all.flush(); - - if clear { - unsafe { - intrinsics::write_bytes((self.start.data() + self.size) as *mut u8, 0, new_size - self.size); - } - } - } else if new_size < self.size { - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(VirtualAddress::new(self.start.data() + new_size)); - let end_page = Page::containing_address(VirtualAddress::new(self.start.data() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_some() { - let result = active_table.unmap(page); - flush_all.consume(result); - } - } - - flush_all.flush(); - } - - self.size = new_size; + // Copy physmap mapping + { + let frame = active_ktable.p4()[crate::PHYS_PML4].pointed_frame().expect("physmap not mapped"); + let flags = active_ktable.p4()[crate::PHYS_PML4].flags(); + new_ktable.mapper().p4_mut()[crate::PHYS_PML4].set(frame, flags); } -} - -impl Drop for Memory { - fn drop(&mut self) { - self.unmap(); + // Copy kernel percpu (similar to TLS) mapping. + { + let frame = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].pointed_frame().expect("kernel TLS not mapped"); + let flags = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].flags(); + new_ktable.mapper().p4_mut()[crate::KERNEL_PERCPU_PML4].set(frame, flags); } + + Ok(NewTables { + taken: false, + new_utable, + #[cfg(target_arch = "aarch64")] + new_ktable, + }) } -pub const DANGLING: usize = 1 << (usize::BITS - 2); #[cfg(tests)] mod tests { diff --git a/src/debugger.rs b/src/debugger.rs index b0c2735d..e49edf9a 100644 --- a/src/debugger.rs +++ b/src/debugger.rs @@ -19,21 +19,6 @@ pub unsafe fn debugger() { if let Some((a, b, c, d, e, f)) = context.syscall { println!("syscall: {}", crate::syscall::debug::format_call(a, b, c, d, e, f)); } - if ! context.image.is_empty() { - println!("image:"); - for shared_memory in context.image.iter() { - shared_memory.with(|memory| { - let region = crate::context::memory::Region::new( - memory.start_address(), - memory.size() - ); - println!( - " virt 0x{:016x}:0x{:016x} size 0x{:08x}", - region.start_address().data(), region.final_address().data(), region.size() - ); - }); - } - } { let grants = context.grants.read(); if ! grants.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 5dcbcc81..1fbfbe8b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -190,6 +190,8 @@ pub extern fn userspace_init() { if bytes_read == 0 { break } total_bytes_read += bytes_read; } + data.truncate(total_bytes_read); + let _ = syscall::close(fd); crate::syscall::process::usermode_bootstrap(data.into_boxed_slice()); diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index cac3743d..7ae887fa 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -34,8 +34,8 @@ impl MemoryScheme { let active_table = unsafe { ActivePageTable::new(rmm::TableKind::User) }; for page in region.pages() { - if active_table.translate_page(page).is_some() { - println!("page at {:#x} was already mapped", page.start_address().data()); + if let Some(flags) = active_table.translate_page_flags(page).filter(|flags| flags.has_present()) { + println!("page at {:#x} was already mapped, flags: {:?}", page.start_address().data(), flags); return Err(Error::new(EEXIST)) } } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 34454513..5b66dcb3 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -101,6 +101,7 @@ enum Operation { Regs(RegsKind), Trace, Static(&'static str), + Name, } impl Operation { fn needs_child_process(self) -> bool { @@ -109,6 +110,7 @@ impl Operation { Self::Regs(_) => true, Self::Trace => true, Self::Static(_) => false, + Self::Name => false, } } } @@ -248,6 +250,7 @@ impl Scheme for ProcScheme { Some("regs/env") => Operation::Regs(RegsKind::Env), Some("trace") => Operation::Trace, Some("exe") => Operation::Static("exe"), + Some("name") => Operation::Name, _ => return Err(Error::new(EINVAL)) }; @@ -519,6 +522,13 @@ impl Scheme for ProcScheme { // Return read events Ok(read * mem::size_of::()) } + Operation::Name => match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.read() { + name => { + let to_copy = cmp::min(buf.len(), name.len()); + buf[..to_copy].copy_from_slice(&name.as_bytes()[..to_copy]); + Ok(to_copy) + } + } } } @@ -704,6 +714,11 @@ impl Scheme for ProcScheme { Ok(mem::size_of::()) }, + Operation::Name => { + let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?.into_boxed_str(); + *context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.write() = utf8; + Ok(buf.len()) + } } } @@ -741,6 +756,7 @@ impl Scheme for ProcScheme { Operation::Regs(RegsKind::Env) => "regs/env", Operation::Trace => "trace", Operation::Static(path) => path, + Operation::Name => "name", }); let len = cmp::min(path.len(), buf.len()); diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs index da6576cf..6c2c6f01 100644 --- a/src/scheme/sys/mod.rs +++ b/src/scheme/sys/mod.rs @@ -52,6 +52,7 @@ impl SysScheme { files.insert("scheme_num", Box::new(scheme_num::resource)); files.insert("syscall", Box::new(syscall::resource)); files.insert("uname", Box::new(uname::resource)); + files.insert("env", Box::new(|| Ok(Vec::from(unsafe { crate::INIT_ENV })))); #[cfg(target_arch = "x86_64")] files.insert("spurious_irq", Box::new(irq::spurious_irq_resource)); diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 5d7f9c6d..e2a3cd2d 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -152,6 +152,10 @@ impl UserInner { let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; + /*if !dst_region.intersect(Region::new(VirtualAddress::new(0x39d000), 1)).is_empty() { + dbg!(dst_region); + }*/ + //TODO: Use syscall_head and syscall_tail to avoid leaking data grants.insert(Grant::map_inactive( src_region.start_address(), @@ -166,20 +170,21 @@ impl UserInner { } pub fn release(&self, address: usize) -> Result<()> { + //dbg!(address); if address == DANGLING { return Ok(()); } let context_lock = self.context.upgrade().ok_or(Error::new(ESRCH))?; let mut context = context_lock.write(); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + let mut other_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; let mut grants = context.grants.write(); let region = match grants.contains(VirtualAddress::new(address)).map(Region::from) { Some(region) => region, - None => return Err(Error::new(EFAULT)), + None => return Err(Error::new(EFAULT)), }; - grants.take(®ion).unwrap().unmap_inactive(&mut new_table); + grants.take(®ion).unwrap().unmap_inactive(&mut other_table); Ok(()) } diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 5f607857..5458998e 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -191,29 +191,15 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - "exit({})", b ), - //TODO: Cleanup, do not allocate - /*SYS_EXEC => format!( - "exec({}, {:?}, {:?})", - b, - validate_slice( - c as *const [usize; 2], - d - ).map(|slice| { - slice.iter().map(|a| - validate_slice(a[0] as *const u8, a[1]).ok() - .and_then(|s| ::core::str::from_utf8(s).ok()) - ).collect::>>() - }), + SYS_EXEC => format!( + "exec({:#x?}, {:p}, {:p})", validate_slice( - e as *const [usize; 2], - f - ).map(|slice| { - slice.iter().map(|a| - validate_slice(a[0] as *const u8, a[1]).ok() - .and_then(|s| ::core::str::from_utf8(s).ok()) - ).collect::>>() - }) - ),*/ + b as *const crate::syscall::data::ExecMemRange, + c, + ), + d as *const u8, + e as *const u8, + ), SYS_FUTEX => format!( "futex({:#X} [{:?}], {}, {}, {}, {})", b, diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index f4e73862..d85a19b7 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,7 +25,7 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; -use self::data::{Map, SigAction, Stat, TimeSpec}; +use self::data::{ExecMemRange, Map, SigAction, Stat, TimeSpec}; use self::error::{Error, Result, ENOSYS}; use self::flag::{CloneFlags, MapFlags, PhysmapFlags, WaitFlags}; use self::number::*; @@ -129,6 +129,8 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_GETPID => getpid().map(ContextId::into), SYS_GETPGID => getpgid(ContextId::from(b)).map(ContextId::into), SYS_GETPPID => getppid().map(ContextId::into), + + SYS_EXEC => exec(validate_slice(b as *const ExecMemRange, c)?, d, e), SYS_CLONE => { let b = CloneFlags::from_bits_truncate(b); @@ -209,12 +211,12 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - let debug = { + /*let debug = { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { let context = context_lock.read(); let name = context.name.read(); - if true || name.contains("redoxfs") { + if name.contains("redoxfs") { if a == SYS_CLOCK_GETTIME || a == SYS_YIELD { false } else if (a == SYS_WRITE || a == SYS_FSYNC) && (b == 1 || b == 2) { @@ -238,7 +240,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } println!("{}", debug::format_call(a, b, c, d, e, f)); - } + }*/ // The next lines set the current syscall in the context struct, then once the inner() function // completes, we set the current syscall to none. @@ -263,7 +265,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - if debug { + /*if debug { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { let context = context_lock.read(); @@ -280,7 +282,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u println!("Err({} ({:#X}))", err, err.errno); } } - } + }*/ // errormux turns Result into -errno Error::mux(result) diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 11fd16b7..a6e670f4 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -9,23 +9,25 @@ use core::alloc::{GlobalAlloc, Layout}; use core::convert::TryFrom; use core::ops::DerefMut; use core::{intrinsics, mem, str}; +use crate::context::file::{FileDescription, FileDescriptor}; + use spin::{RwLock, RwLockWriteGuard}; -use crate::context::file::{FileDescription, FileDescriptor}; -use crate::context::memory::{UserGrants, Region}; use crate::context::{Context, ContextId, WaitpidKey}; +use crate::context::memory::{Grant, Region, NewTables, page_flags, setup_new_utable, UserGrants}; + use crate::context; #[cfg(not(feature="doc"))] use crate::elf::{self, program_header}; use crate::interrupt; use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::allocate_frames; +use crate::memory::{allocate_frames, Frame, PhysicalAddress}; use crate::paging::mapper::PageFlushAll; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, TableKind, VirtualAddress, PAGE_SIZE}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, TableKind, VirtualAddress, PAGE_SIZE}; use crate::{ptrace, syscall}; use crate::scheme::FileHandle; use crate::start::usermode; -use crate::syscall::data::{SigAction, Stat}; +use crate::syscall::data::{ExecMemRange, SigAction, Stat}; use crate::syscall::error::*; use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags, CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM, @@ -141,16 +143,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { } } - if flags.contains(CLONE_VM) { - grants = Arc::clone(&context.grants); - } else { - let mut grants_set = UserGrants::default(); - for grant in context.grants.read().iter() { - let start = VirtualAddress::new(grant.start_address().data() + crate::USER_TMP_GRANT_OFFSET - crate::USER_GRANT_OFFSET); - grants_set.insert(grant.secret_clone(start)); - } - grants = Arc::new(RwLock::new(grants_set)); - } + grants = Arc::clone(&context.grants); if flags.contains(CLONE_VM) { name = Arc::clone(&context.name); @@ -197,7 +190,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { // If not cloning virtual memory, use fmap to re-obtain every grant where possible if !flags.contains(CLONE_VM) { let grants = Arc::get_mut(&mut grants).ok_or(Error::new(EBUSY))?.get_mut(); - let old_grants = mem::take(&mut grants.inner); + let old_grants = mem::take(grants); // TODO: Find some way to do this without having to allocate. @@ -296,59 +289,27 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize; } - let mut active_utable = unsafe { ActivePageTable::new(TableKind::User) }; - let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - - let mut new_utable = unsafe { - let frame = allocate_frames(1).ok_or(Error::new(ENOMEM))?; - // SAFETY: This is safe because the frame is exclusive, owned, and valid, as we - // have just allocated it. - InactivePageTable::new(&mut active_utable, frame) - }; - context.arch.set_page_utable(unsafe { new_utable.address() }); - - #[cfg(target_arch = "aarch64")] - let mut new_ktable = { - let mut new_ktable = { - let frame = allocate_frames(1).expect("no more frames in syscall::clone new_table"); - InactivePageTable::new(frame, &mut active_ktable) - }; - context.arch.set_page_ktable(unsafe { new_ktable.address() }); - new_ktable - }; + if flags.contains(CloneFlags::CLONE_VM) { + // Reuse same CR3, same grants, everything. + context.grants = grants; + } else { + // TODO: Handle ENOMEM + let mut new_tables = setup_new_utable().expect("failed to allocate new page tables for cloned process"); - #[cfg(not(target_arch = "aarch64"))] - let mut new_ktable = unsafe { - InactivePageTable::from_address(new_utable.address()) - }; + let mut new_grants = UserGrants::new(); + for old_grant in grants.read().iter() { + new_grants.insert(old_grant.secret_clone(&mut new_tables.new_utable)); + } + context.grants = Arc::new(RwLock::new(new_grants)); - // Copy kernel image mapping - { - let frame = active_ktable.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PML4].flags(); + drop(grants); - new_ktable.mapper().p4_mut()[crate::KERNEL_PML4].set(frame, flags); - } + new_tables.take(); - // Copy kernel heap mapping - { - let frame = active_ktable.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_HEAP_PML4].flags(); + context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - new_ktable.mapper().p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags); - } - - // Copy physmap mapping - { - let frame = active_ktable.p4()[crate::PHYS_PML4].pointed_frame().expect("physmap not mapped"); - let flags = active_ktable.p4()[crate::PHYS_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::PHYS_PML4].set(frame, flags); - } - // Copy kernel percpu (similar to TLS) mapping. - { - let frame = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].pointed_frame().expect("kernel TLS not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::KERNEL_PERCPU_PML4].set(frame, flags); + #[cfg(target_arch = "aarch64")] + context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); } if let Some(fx) = kfx_opt.take() { @@ -391,6 +352,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { } } + context.name = name; context.cwd = cwd; @@ -437,7 +399,7 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu let mut grants_guard = grants_lock_mut.get_mut(); let grants = mem::replace(&mut *grants_guard, UserGrants::default()); - for grant in grants.inner.into_iter() { + for grant in grants.into_iter() { let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); @@ -1042,7 +1004,7 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { assert!(!data.is_empty()); const LOAD_BASE: usize = 0; - let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), data.len(), PageFlags::new().user(true).write(true).execute(true)); + let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), ((data.len()+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true)); let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; @@ -1051,6 +1013,7 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); unsafe { ((frame.start_address().data() + crate::KERNEL_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } } + context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); drop(data); @@ -1060,6 +1023,134 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { let start = ((LOAD_BASE + 0x18) as *mut usize).read(); // Start with the (probably) ELF executable loaded, without any stack the ability to load // sections to arbitrary addresses. - crate::arch::start::usermode(start, 0, 0, 0); + usermode(start, 0, 0, 0); + } +} + +pub fn exec(memranges: &[ExecMemRange], instruction_ptr: usize, stack_ptr: usize) -> Result { + // TODO: rlimit? + if memranges.len() > 1024 { + return Err(Error::new(EINVAL)); } + + let mut new_grants = UserGrants::new(); + + { + let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); + + // Linux will always destroy other threads immediately if one of them executes execve(2). + // At the moment the Redox kernel is ignorant of threads, other than them sharing files, + // memory, etc. We fail with EBUSY if any resources that are being replaced, are shared. + + let mut old_grants = Arc::try_unwrap(mem::take(&mut current_context_lock.write().grants)).map_err(|_| Error::new(EBUSY))?.into_inner(); + // TODO: Allow multiple contexts which share the file table, to have one of them run exec? + let mut old_files = Arc::try_unwrap(mem::take(&mut current_context_lock.write().files)).map_err(|_| Error::new(EBUSY))?.into_inner(); + + // FIXME: Handle leak in case of ENOMEM. + let mut new_tables = setup_new_utable()?; + + let mut flush = PageFlushAll::new(); + + // FIXME: This is to the extreme, but fetch with atomic volatile? + for memrange in memranges.iter().copied() { + let old_address = if memrange.old_address == !0 { None } else { Some(memrange.old_address) }; + + if memrange.address % PAGE_SIZE != 0 || old_address.map_or(false, |a| a % PAGE_SIZE != 0) || memrange.size % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } + if memrange.size == 0 { continue } + + let new_start = Page::containing_address(VirtualAddress::new(memrange.address)); + let flags = MapFlags::from_bits(memrange.flags).ok_or(Error::new(EINVAL))?; + let page_count = memrange.size / PAGE_SIZE; + let flags = page_flags(flags); + + if let Some(old_address) = old_address { + let old_start = VirtualAddress::new(memrange.old_address); + + let entire_region = Region::new(old_start, memrange.size); + + // TODO: This will do one B-Tree search for each memrange. If a process runs exec + // and keeps every range the way it is, then this would be O(n log n)! + loop { + let region = match old_grants.conflicts(entire_region).next().map(|g| *g.region()) { + Some(r) => r, + None => break, + }; + let owned = old_grants.take(®ion).expect("cannot fail"); + let (before, mut current, after) = owned.extract(region).expect("cannot fail"); + + if let Some(before) = before { old_grants.insert(before); } + if let Some(after) = after { old_grants.insert(after); } + + new_grants.insert(current.move_to_address_space(new_start, &mut new_tables.new_utable, flags, &mut flush)); + } + } else { + new_grants.insert(Grant::zeroed_inactive(new_start, page_count, flags, &mut new_tables.new_utable)?); + } + } + + { + unsafe { flush.ignore(); } + + new_tables.take(); + + let mut context = current_context_lock.write(); + context.grants = Arc::new(RwLock::new(new_grants)); + + let old_utable = context.arch.get_page_utable(); + let old_frame = Frame::containing_address(PhysicalAddress::new(old_utable)); + + context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); + + #[cfg(target_arch = "x86_64")] + unsafe { x86::controlregs::cr3_write(new_tables.new_utable.address() as u64); } + + for old_grant in old_grants.into_iter() { + old_grant.unmap_inactive(&mut unsafe { InactivePageTable::from_address(old_utable) }); + } + crate::memory::deallocate_frames(old_frame, 1); + + #[cfg(target_arch = "aarch64")] + context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); + + context.actions = Arc::new(RwLock::new(vec![( + SigAction { + sa_handler: unsafe { mem::transmute(SIG_DFL) }, + sa_mask: [0; 2], + sa_flags: SigActionFlags::empty(), + }, + 0 + ); 128])); + let was_vfork = mem::replace(&mut context.vfork, false); + + // TODO: Reuse in place if the file table is not shared. + drop(context); + + for file_slot in old_files.iter_mut().filter(|file_opt| file_opt.as_ref().map_or(false, |file| file.cloexec)) { + let file = file_slot.take().expect("iterator filter requires file slot to be occupied, not None"); + let _ = file.close(); + } + let mut context = current_context_lock.write(); + + context.files = Arc::new(RwLock::new(old_files)); + let ppid = context.ppid; + drop(context); + + // TODO: Should this code be preserved as is? + if was_vfork { + let contexts = context::contexts(); + if let Some(context_lock) = contexts.get(ppid) { + let mut context = context_lock.write(); + if !context.unblock() { + println!("{} not blocked for exec vfork unblock", ppid.into()); + } + } else { + println!("{} not found for exec vfork unblock", ppid.into()); + } + } + } + } + + unsafe { usermode(instruction_ptr, stack_ptr, 0, 0); } } -- GitLab From 15b029de3687fdbbe62efc018e339f9b2937f933 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Tue, 22 Feb 2022 19:49:05 +0100 Subject: [PATCH 05/44] Fix everything all the way to booting to desktop. --- Cargo.toml | 4 +- src/arch/x86_64/consts.rs | 43 +--------------------- src/context/context.rs | 9 ++++- src/context/signal.rs | 6 +-- src/scheme/proc.rs | 18 +++++++++ src/syscall/driver.rs | 77 +++++++++++++++++---------------------- src/syscall/mod.rs | 16 ++++++-- src/syscall/process.rs | 41 ++++++++++++--------- 8 files changed, 103 insertions(+), 111 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2bd3aa7e..82ead039 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,9 @@ raw-cpuid = "10.2.0" x86 = { version = "0.47.0", default-features = false } [features] -default = ["acpi", "multi_core", "graphical_debug", "serial_debug"] +# TODO: Fix multicore +#default = ["acpi", "multi_core", "graphical_debug", "serial_debug"] +default = ["acpi", "graphical_debug", "serial_debug"] acpi = [] doc = [] graphical_debug = [] diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs index b7b7ccbc..33432e82 100644 --- a/src/arch/x86_64/consts.rs +++ b/src/arch/x86_64/consts.rs @@ -37,47 +37,8 @@ /// Offset to user image pub const USER_OFFSET: usize = 0; - pub const USER_PML4: usize = (USER_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user arguments - pub const USER_ARG_OFFSET: usize = USER_OFFSET + PML4_SIZE/2; - - /// Offset to user grants - pub const USER_GRANT_OFFSET: usize = USER_OFFSET + PML4_SIZE; - pub const USER_GRANT_PML4: usize = (USER_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user stack - pub const USER_STACK_OFFSET: usize = USER_GRANT_OFFSET + PML4_SIZE; - pub const USER_STACK_PML4: usize = (USER_STACK_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of user stack - pub const USER_STACK_SIZE: usize = 1024 * 1024; // 1 MB - - /// Offset to user sigstack - pub const USER_SIGSTACK_OFFSET: usize = USER_STACK_OFFSET + PML4_SIZE; - pub const USER_SIGSTACK_PML4: usize = (USER_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of user sigstack - pub const USER_SIGSTACK_SIZE: usize = 256 * 1024; // 256 KB - - /// Offset to user temporary image (used when cloning) - pub const USER_TMP_OFFSET: usize = USER_SIGSTACK_OFFSET + PML4_SIZE; - pub const USER_TMP_PML4: usize = (USER_TMP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary heap (used when cloning) - pub const USER_TMP_HEAP_OFFSET: usize = USER_TMP_OFFSET + PML4_SIZE; - pub const USER_TMP_HEAP_PML4: usize = (USER_TMP_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary page for grants - pub const USER_TMP_GRANT_OFFSET: usize = USER_TMP_HEAP_OFFSET + PML4_SIZE; - pub const USER_TMP_GRANT_PML4: usize = (USER_TMP_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary stack (used when cloning) - pub const USER_TMP_STACK_OFFSET: usize = USER_TMP_GRANT_OFFSET + PML4_SIZE; - pub const USER_TMP_STACK_PML4: usize = (USER_TMP_STACK_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary sigstack (used when cloning) - pub const USER_TMP_SIGSTACK_OFFSET: usize = USER_TMP_STACK_OFFSET + PML4_SIZE; - pub const USER_TMP_SIGSTACK_PML4: usize = (USER_TMP_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE; /// Offset for usage in other temporary pages - pub const USER_TMP_MISC_OFFSET: usize = USER_TMP_SIGSTACK_OFFSET + PML4_SIZE; + // TODO: Currently used for ptrace but should be removed or replaced with a kernel address. + pub const USER_TMP_MISC_OFFSET: usize = USER_OFFSET + PML4_SIZE; pub const USER_TMP_MISC_PML4: usize = (USER_TMP_MISC_OFFSET & PML4_MASK)/PML4_SIZE; diff --git a/src/context/context.rs b/src/context/context.rs index 71a21d05..614318a2 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -244,7 +244,11 @@ pub struct Context { /// A somewhat hacky way to initially stop a context when creating /// a new instance of the proc: scheme, entirely separate from /// signals or any other way to restart a process. - pub ptrace_stop: bool + pub ptrace_stop: bool, + /// A pointer to the signal stack. If this is unset, none of the sigactions can be anything + /// else than SIG_DFL, otherwise signals will not be delivered. Userspace is responsible for + /// setting this. + pub sigstack: Option, } // Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box @@ -345,7 +349,8 @@ impl Context { 0 ); 128])), regs: None, - ptrace_stop: false + ptrace_stop: false, + sigstack: None, }) } diff --git a/src/context/signal.rs b/src/context/signal.rs index ae6b2529..7b19831e 100644 --- a/src/context/signal.rs +++ b/src/context/signal.rs @@ -13,12 +13,12 @@ pub fn is_user_handled(handler: Option) -> bool { } pub extern "C" fn signal_handler(sig: usize) { - let (action, restorer) = { + let ((action, restorer), sigstack) = { let contexts = contexts(); let context_lock = contexts.current().expect("context::signal_handler not inside of context"); let context = context_lock.read(); let actions = context.actions.read(); - actions[sig] + (actions[sig], context.sigstack) }; let handler = action.sa_handler.map(|ptr| ptr as usize).unwrap_or(0); @@ -115,7 +115,7 @@ pub extern "C" fn signal_handler(sig: usize) { }; unsafe { - let mut sp = crate::USER_SIGSTACK_OFFSET + crate::USER_SIGSTACK_SIZE - 256; + let mut sp = sigstack.expect("sigaction was set while sigstack was not") - 256; sp = (sp / 16) * 16; diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 5b66dcb3..cb2fc921 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -23,6 +23,7 @@ use alloc::{ }; use core::{ cmp, + convert::TryFrom, mem, slice, str, @@ -102,6 +103,7 @@ enum Operation { Trace, Static(&'static str), Name, + Sigstack, } impl Operation { fn needs_child_process(self) -> bool { @@ -111,6 +113,7 @@ impl Operation { Self::Trace => true, Self::Static(_) => false, Self::Name => false, + Self::Sigstack => false, } } } @@ -251,6 +254,7 @@ impl Scheme for ProcScheme { Some("trace") => Operation::Trace, Some("exe") => Operation::Static("exe"), Some("name") => Operation::Name, + Some("sigstack") => Operation::Sigstack, _ => return Err(Error::new(EINVAL)) }; @@ -529,6 +533,13 @@ impl Scheme for ProcScheme { Ok(to_copy) } } + Operation::Sigstack => match context::contexts().current().ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes() { + sigstack => { + let to_copy = cmp::min(buf.len(), sigstack.len()); + buf[..to_copy].copy_from_slice(&sigstack[..to_copy]); + Ok(to_copy) + } + } } } @@ -719,6 +730,12 @@ impl Scheme for ProcScheme { *context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.write() = utf8; Ok(buf.len()) } + Operation::Sigstack => { + let bytes = <[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?; + let sigstack = usize::from_ne_bytes(bytes); + context::contexts().current().ok_or(Error::new(ESRCH))?.write().sigstack = (sigstack != !0).then(|| sigstack); + Ok(buf.len()) + } } } @@ -757,6 +774,7 @@ impl Scheme for ProcScheme { Operation::Trace => "trace", Operation::Static(path) => path, Operation::Name => "name", + Operation::Sigstack => "sigstack", }); let len = cmp::min(path.len(), buf.len()); diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 1e64e1bb..f1fc77f2 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -1,9 +1,9 @@ use crate::interrupt::InterruptStack; -use crate::memory::{allocate_frames_complex, deallocate_frames, Frame}; +use crate::memory::{allocate_frames_complex, deallocate_frames, Frame, PAGE_SIZE}; use crate::paging::{ActivePageTable, PageFlags, PhysicalAddress, VirtualAddress}; use crate::paging::entry::EntryFlags; use crate::context; -use crate::context::memory::{Grant, Region}; +use crate::context::memory::{DANGLING, Grant, Region}; use crate::syscall::error::{Error, EFAULT, EINVAL, ENOMEM, EPERM, ESRCH, Result}; use crate::syscall::flag::{PhysallocFlags, PartialAllocStrategy, PhysmapFlags, PHYSMAP_WRITE, PHYSMAP_WRITE_COMBINE, PHYSMAP_NO_CACHE}; @@ -71,56 +71,47 @@ pub fn physfree(physical_address: usize, size: usize) -> Result { } //TODO: verify exlusive access to physical memory +// TODO: Replace this completely with something such as `memory:physical`. Mmapping at offset +// `physaddr` to `address` (optional) will map that physical address. We would have to find out +// some way to pass flags such as WRITE_COMBINE/NO_CACHE however. pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { //TODO: Abstract with other grant creation if size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.write(); - - let from_address = (physical_address/4096) * 4096; - let offset = physical_address - from_address; - let full_size = ((offset + size + 4095)/4096) * 4096; - let mut to_address = crate::USER_GRANT_OFFSET; + return Ok(DANGLING); + } + if size % PAGE_SIZE != 0 || physical_address % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } + // TODO: Enforce size being a multiple of the page size, fail otherwise. - let mut page_flags = PageFlags::new().user(true); - if flags.contains(PHYSMAP_WRITE) { - page_flags = page_flags.write(true); - } - if flags.contains(PHYSMAP_WRITE_COMBINE) { - page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true); - } - #[cfg(target_arch = "x86_64")] // TODO: AARCH64 - if flags.contains(PHYSMAP_NO_CACHE) { - page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); - } + let contexts = context::contexts(); + let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context = context_lock.read(); - // TODO: Make this faster than Sonic himself by using le superpowers of BTreeSet + let mut grants = context.grants.write(); - for grant in grants.iter() { - let start = grant.start_address().data(); - if to_address + full_size < start { - break; - } + let dst_address = grants.find_free(size).ok_or(Error::new(ENOMEM))?; - let pages = (grant.size() + 4095) / 4096; - let end = start + pages * 4096; - to_address = end; - } + let mut page_flags = PageFlags::new().user(true); + if flags.contains(PHYSMAP_WRITE) { + page_flags = page_flags.write(true); + } + if flags.contains(PHYSMAP_WRITE_COMBINE) { + page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true); + } + #[cfg(target_arch = "x86_64")] // TODO: AARCH64 + if flags.contains(PHYSMAP_NO_CACHE) { + page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); + } - grants.insert(Grant::physmap( - PhysicalAddress::new(from_address), - VirtualAddress::new(to_address), - full_size, - page_flags - )); + grants.insert(Grant::physmap( + PhysicalAddress::new(physical_address), + dst_address.start_address(), + size, + page_flags, + )); - Ok(to_address + offset) - } + Ok(dst_address.start_address().data()) } pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { enforce_root()?; diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index d85a19b7..0f9f4408 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,8 +25,8 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; -use self::data::{ExecMemRange, Map, SigAction, Stat, TimeSpec}; -use self::error::{Error, Result, ENOSYS}; +use self::data::{CloneInfo, ExecMemRange, Map, SigAction, Stat, TimeSpec}; +use self::error::{Error, Result, ENOSYS, EINVAL}; use self::flag::{CloneFlags, MapFlags, PhysmapFlags, WaitFlags}; use self::number::*; @@ -134,6 +134,13 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_CLONE => { let b = CloneFlags::from_bits_truncate(b); + let info = if b.contains(CloneFlags::CLONE_VM) { + if d < core::mem::size_of::() { + return Err(Error::new(EINVAL)); + } + Some(&validate_slice(c as *const CloneInfo, 1)?[0]) + } else { None }; + #[cfg(not(target_arch = "x86_64"))] { //TODO: CLONE_STACK @@ -144,10 +151,11 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u #[cfg(target_arch = "x86_64")] { let old_rsp = stack.iret.rsp; + // TODO: Unify CLONE_STACK and CLONE_VM. if b.contains(flag::CLONE_STACK) { - stack.iret.rsp = c; + stack.iret.rsp = info.as_ref().ok_or(Error::new(EINVAL))?.target_stack; } - let ret = clone(b, bp).map(ContextId::into); + let ret = clone(b, bp, info).map(ContextId::into); stack.iret.rsp = old_rsp; ret } diff --git a/src/syscall/process.rs b/src/syscall/process.rs index a6e670f4..3df81647 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -27,7 +27,7 @@ use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, T use crate::{ptrace, syscall}; use crate::scheme::FileHandle; use crate::start::usermode; -use crate::syscall::data::{ExecMemRange, SigAction, Stat}; +use crate::syscall::data::{CloneInfo, ExecMemRange, SigAction, Stat}; use crate::syscall::error::*; use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags, CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM, @@ -37,7 +37,7 @@ use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, use crate::syscall::ptrace_event; use crate::syscall::validate::{validate_slice, validate_slice_mut}; -pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { +pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> Result { let ppid; let pid; { @@ -61,6 +61,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { let cwd; let files; let actions; + let old_sigstack; // Copy from old process { @@ -78,6 +79,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { ens = context.ens; sigmask = context.sigmask; umask = context.umask; + old_sigstack = context.sigstack; // Uncomment to disable threads on different CPUs //TODO: fix memory allocation races when this is removed @@ -360,6 +362,12 @@ pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { context.files = files; context.actions = actions; + + if flags.contains(CLONE_VM) { + context.sigstack = info.and_then(|info| (info.target_sigstack != !0).then(|| info.target_sigstack)); + } else { + context.sigstack = old_sigstack; + } } } @@ -747,24 +755,23 @@ pub fn setpgid(pid: ContextId, pgid: ContextId) -> Result { } pub fn sigaction(sig: usize, act_opt: Option<&SigAction>, oldact_opt: Option<&mut SigAction>, restorer: usize) -> Result { - if sig > 0 && sig <= 0x7F { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let mut actions = context.actions.write(); - - if let Some(oldact) = oldact_opt { - *oldact = actions[sig].0; - } + if sig == 0 || sig > 0x7F { + return Err(Error::new(EINVAL)); + } + let contexts = context::contexts(); + let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context = context_lock.read(); + let mut actions = context.actions.write(); - if let Some(act) = act_opt { - actions[sig] = (*act, restorer); - } + if let Some(oldact) = oldact_opt { + *oldact = actions[sig].0; + } - Ok(0) - } else { - Err(Error::new(EINVAL)) + if let Some(act) = act_opt { + actions[sig] = (*act, restorer); } + + Ok(0) } pub fn sigprocmask(how: usize, mask_opt: Option<&[u64; 2]>, oldmask_opt: Option<&mut [u64; 2]>) -> Result { -- GitLab From 23f49414bd007c2aba6ed7b7b9a08f6019641ed7 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 10 Jun 2022 11:43:50 +0200 Subject: [PATCH 06/44] Fix phys offset, lock grants correctly. --- src/acpi/mod.rs | 12 ++++++------ src/context/memory.rs | 3 ++- src/syscall/process.rs | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index 45c384df..d65696e8 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -30,7 +30,7 @@ mod rsdp; pub fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'static Sdt { { - let page = Page::containing_address(VirtualAddress::new(sdt_address + crate::KERNEL_OFFSET)); + let page = Page::containing_address(VirtualAddress::new(sdt_address + crate::PHYS_OFFSET)); if active_table.translate_page(page).is_none() { let frame = Frame::containing_address(PhysicalAddress::new(sdt_address)); let result = active_table.map_to(page, frame, PageFlags::new()); @@ -38,15 +38,15 @@ pub fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'stat } } - let sdt = unsafe { &*((sdt_address + crate::KERNEL_OFFSET) as *const Sdt) }; + let sdt = unsafe { &*((sdt_address + crate::PHYS_OFFSET) as *const Sdt) }; // Map extra SDT frames if required { - let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096 + crate::KERNEL_OFFSET)); - let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize + crate::KERNEL_OFFSET)); + let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096 + crate::PHYS_OFFSET)); + let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize + crate::PHYS_OFFSET)); for page in Page::range_inclusive(start_page, end_page) { if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - crate::KERNEL_OFFSET)); + let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - crate::PHYS_OFFSET)); let result = active_table.map_to(page, frame, PageFlags::new()); result.flush(); } @@ -125,7 +125,7 @@ pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: O rxsdt.map_all(active_table); for sdt_address in rxsdt.iter() { - let sdt = &*((sdt_address + crate::KERNEL_OFFSET) as *const Sdt); + let sdt = &*((sdt_address + crate::PHYS_OFFSET) as *const Sdt); let signature = get_sdt_signature(sdt); if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) { diff --git a/src/context/memory.rs b/src/context/memory.rs index 802c49bd..244d51e1 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -94,9 +94,10 @@ impl UserGrants { // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). pub fn find_free(&self, size: usize) -> Option { // Get first available hole, but do reserve the page starting from zero as most compiled - // language cannot handle null pointers safely even if they do point to valid memory. If an + // languages cannot handle null pointers safely even if they point to valid memory. If an // application absolutely needs to map the 0th page, they will have to do so explicitly via // MAP_FIXED/MAP_FIXED_NOREPLACE. + // TODO: Allow explicitly allocating guard pages? let (hole_start, hole_size) = self.holes.iter().find(|(hole_offset, hole_size)| size <= if hole_offset.data() == 0 { hole_size.saturating_sub(PAGE_SIZE) } else { **hole_size })?; // Create new region diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 3df81647..243ca48a 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -191,8 +191,8 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> // If not cloning virtual memory, use fmap to re-obtain every grant where possible if !flags.contains(CLONE_VM) { - let grants = Arc::get_mut(&mut grants).ok_or(Error::new(EBUSY))?.get_mut(); - let old_grants = mem::take(grants); + let mut grants = grants.write(); + let old_grants = mem::take(&mut *grants); // TODO: Find some way to do this without having to allocate. @@ -1018,7 +1018,7 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { for (index, page) in grant.pages().enumerate() { let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); - unsafe { ((frame.start_address().data() + crate::KERNEL_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } + unsafe { ((frame.start_address().data() + crate::PHYS_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } } context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); -- GitLab From 31c4bc8a1cf04a5592327c18cfd6e2e7de958466 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 12 Jun 2022 11:42:28 +0200 Subject: [PATCH 07/44] Remove kernel support for fmap_old and funmap_old. --- src/scheme/memory.rs | 12 ------- src/scheme/user.rs | 83 -------------------------------------------- src/syscall/debug.rs | 12 ------- src/syscall/fs.rs | 39 --------------------- src/syscall/mod.rs | 18 ---------- 5 files changed, 164 deletions(-) diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 7ae887fa..f5f29a27 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -66,18 +66,6 @@ impl Scheme for MemoryScheme { fn fmap(&self, _id: usize, map: &Map) -> Result { Self::fmap_anonymous(map) } - fn fmap_old(&self, id: usize, map: &OldMap) -> Result { - if map.flags.contains(MapFlags::MAP_FIXED) { - // not supported for fmap, which lacks the address argument. - return Err(Error::new(EINVAL)); - } - self.fmap(id, &Map { - offset: map.offset, - size: map.size, - flags: map.flags, - address: 0, - }) - } fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { Ok(0) diff --git a/src/scheme/user.rs b/src/scheme/user.rs index e2a3cd2d..65433d20 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -381,59 +381,6 @@ impl Scheme for UserScheme { inner.call(SYS_FEVENT, file, flags.bits(), 0).map(EventFlags::from_bits_truncate) } - fn fmap_old(&self, file: usize, map: &OldMap) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - - let (pid, uid, gid, context_lock, desc) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - // TODO: Faster, cleaner mechanism to get descriptor - let scheme = inner.scheme_id.load(Ordering::SeqCst); - let mut desc_res = Err(Error::new(EBADF)); - for context_file_opt in context.files.read().iter() { - if let Some(context_file) = context_file_opt { - let (context_scheme, context_number) = { - let desc = context_file.description.read(); - (desc.scheme, desc.number) - }; - if context_scheme == scheme && context_number == file { - desc_res = Ok(context_file.clone()); - break; - } - } - } - let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) - }; - - let address = inner.capture(map)?; - - let id = inner.next_id.fetch_add(1, Ordering::SeqCst); - - inner.fmap.lock().insert(id, (context_lock, desc, Map { - offset: map.offset, - size: map.size, - flags: map.flags, - address: 0, - })); - - let result = inner.call_inner(Packet { - id, - pid: pid.into(), - uid, - gid, - a: SYS_FMAP_OLD, - b: file, - c: address, - d: mem::size_of::() - }); - - let _ = inner.release(address); - - result - } - fn fmap(&self, file: usize, map: &Map) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; @@ -482,36 +429,6 @@ impl Scheme for UserScheme { result } - fn funmap_old(&self, grant_address: usize) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address_opt = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let mut grants = context.grants.write(); - let funmap = &mut grants.funmap; - let entry = funmap.range(..=Region::byte(VirtualAddress::new(grant_address))).next_back(); - - let grant_address = VirtualAddress::new(grant_address); - - if let Some((&grant, &user_base)) = entry { - if grant_address >= grant.end_address() { - return Err(Error::new(EINVAL)); - } - funmap.remove(&grant); - let user = Region::new(user_base, grant.size()); - Some(grant.rebase(user, grant_address).data()) - } else { - None - } - }; - if let Some(user_address) = address_opt { - inner.call(SYS_FUNMAP_OLD, user_address, 0, 0) - } else { - Err(Error::new(EINVAL)) - } - } - fn funmap(&self, grant_address: usize, size: usize) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; let address_opt = { diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 5458998e..ced9eec4 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -106,14 +106,6 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - c, d ), - SYS_FMAP_OLD => format!( - "fmap_old({}, {:?})", - b, - validate_slice( - c as *const OldMap, - d/mem::size_of::() - ), - ), SYS_FMAP => format!( "fmap({}, {:?})", b, @@ -122,10 +114,6 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - d/mem::size_of::() ), ), - SYS_FUNMAP_OLD => format!( - "funmap_old({:#X})", - b - ), SYS_FUNMAP => format!( "funmap({:#X}, {:#X})", b, diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 642a80af..cf833dc0 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -469,45 +469,6 @@ pub fn fstat(fd: FileHandle, stat: &mut Stat) -> Result { scheme.fstat(description.number, stat) } -pub fn funmap_old(virtual_address: usize) -> Result { - if virtual_address == 0 { - Ok(0) - } else { - let mut desc_opt = None; - - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.write(); - - if let Some(region) = grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - let mut grant = grants.take(®ion).unwrap(); - desc_opt = grant.desc_opt.take(); - grant.unmap(); - } - } - - if let Some(file_ref) = desc_opt { - let scheme_id = { file_ref.desc.description.read().scheme }; - - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(scheme_id).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - let res = scheme.funmap_old(virtual_address); - - let _ = file_ref.desc.close(); - - res - } else { - Err(Error::new(EFAULT)) - } - } -} - pub fn funmap(virtual_address: usize, length: usize) -> Result { if virtual_address == 0 || length == 0 { return Ok(0); diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index 0f9f4408..2f4f7b28 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -85,24 +85,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_FCNTL => fcntl(fd, c, d), SYS_FRENAME => frename(fd, validate_str(c as *const u8, d)?), SYS_FUNMAP => funmap(b, c), - SYS_FMAP_OLD => { - { - let contexts = crate::context::contexts(); - let current = contexts.current().unwrap(); - let current = current.read(); - println!("{:?} using deprecated fmap(...) call", *current.name.read()); - } - file_op(a, fd, c, d) - }, - SYS_FUNMAP_OLD => { - { - let contexts = crate::context::contexts(); - let current = contexts.current().unwrap(); - let current = current.read(); - println!("{:?} using deprecated funmap(...) call", *current.name.read()); - } - funmap_old(b) - }, _ => file_op(a, fd, c, d) } } -- GitLab From de28cc4918bcf738ec99e06a06db584032d1e956 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 16 Aug 2021 13:40:53 +0200 Subject: [PATCH 08/44] Add a Scheme supertrait for kernel-only methods. --- src/scheme/acpi.rs | 1 + src/scheme/debug.rs | 1 + src/scheme/event.rs | 1 + src/scheme/initfs.rs | 1 + src/scheme/irq.rs | 1 + src/scheme/itimer.rs | 1 + src/scheme/live.rs | 1 + src/scheme/memory.rs | 1 + src/scheme/mod.rs | 12 +++++++----- src/scheme/pipe.rs | 1 + src/scheme/proc.rs | 1 + src/scheme/root.rs | 1 + src/scheme/serio.rs | 1 + src/scheme/sys/mod.rs | 1 + src/scheme/time.rs | 1 + src/scheme/user.rs | 1 + 16 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs index 41e731b6..fd0929ee 100644 --- a/src/scheme/acpi.rs +++ b/src/scheme/acpi.rs @@ -288,3 +288,4 @@ impl Scheme for AcpiScheme { Ok(0) } } +impl crate::scheme::KernelScheme for AcpiScheme {} diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs index 0aa6ee74..9fd1739c 100644 --- a/src/scheme/debug.rs +++ b/src/scheme/debug.rs @@ -165,3 +165,4 @@ impl Scheme for DebugScheme { Ok(0) } } +impl crate::scheme::KernelScheme for DebugScheme {} diff --git a/src/scheme/event.rs b/src/scheme/event.rs index 3ae5015c..c9cf6632 100644 --- a/src/scheme/event.rs +++ b/src/scheme/event.rs @@ -71,3 +71,4 @@ impl Scheme for EventScheme { queues_mut().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for EventScheme {} diff --git a/src/scheme/initfs.rs b/src/scheme/initfs.rs index 4c7f3a59..c881cbdf 100644 --- a/src/scheme/initfs.rs +++ b/src/scheme/initfs.rs @@ -273,3 +273,4 @@ impl Scheme for InitFsScheme { Ok(0) } } +impl crate::scheme::KernelScheme for InitFsScheme {} diff --git a/src/scheme/irq.rs b/src/scheme/irq.rs index 09d4e5cf..dcf01726 100644 --- a/src/scheme/irq.rs +++ b/src/scheme/irq.rs @@ -371,3 +371,4 @@ impl Scheme for IrqScheme { Ok(0) } } +impl crate::scheme::KernelScheme for IrqScheme {} diff --git a/src/scheme/itimer.rs b/src/scheme/itimer.rs index 11bc5593..1c8f27d2 100644 --- a/src/scheme/itimer.rs +++ b/src/scheme/itimer.rs @@ -106,3 +106,4 @@ impl Scheme for ITimerScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for ITimerScheme {} diff --git a/src/scheme/live.rs b/src/scheme/live.rs index 6cc69638..9ccb4b04 100644 --- a/src/scheme/live.rs +++ b/src/scheme/live.rs @@ -202,3 +202,4 @@ impl Scheme for DiskScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for DiskScheme {} diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index f5f29a27..4dd65c49 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -85,3 +85,4 @@ impl Scheme for MemoryScheme { Ok(0) } } +impl crate::scheme::KernelScheme for MemoryScheme {} diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index f8f1a1bd..d781d6a6 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -107,7 +107,7 @@ impl<'a> Iterator for SchemeIter<'a> { /// Scheme list type pub struct SchemeList { - map: BTreeMap>, + map: BTreeMap>, names: BTreeMap, SchemeId>>, next_ns: usize, next_id: usize @@ -201,7 +201,7 @@ impl SchemeList { Ok(to) } - pub fn iter(&self) -> ::alloc::collections::btree_map::Iter> { + pub fn iter(&self) -> ::alloc::collections::btree_map::Iter> { self.map.iter() } @@ -212,11 +212,11 @@ impl SchemeList { } /// Get the nth scheme. - pub fn get(&self, id: SchemeId) -> Option<&Arc> { + pub fn get(&self, id: SchemeId) -> Option<&Arc> { self.map.get(&id) } - pub fn get_name(&self, ns: SchemeNamespace, name: &str) -> Option<(SchemeId, &Arc)> { + pub fn get_name(&self, ns: SchemeNamespace, name: &str) -> Option<(SchemeId, &Arc)> { if let Some(names) = self.names.get(&ns) { if let Some(&id) = names.get(name) { return self.get(id).map(|scheme| (id, scheme)); @@ -227,7 +227,7 @@ impl SchemeList { /// Create a new scheme. pub fn insert(&mut self, ns: SchemeNamespace, name: &str, scheme_fn: F) -> Result - where F: Fn(SchemeId) -> Arc + where F: Fn(SchemeId) -> Arc { if let Some(names) = self.names.get(&ns) { if names.contains_key(name) { @@ -298,3 +298,5 @@ pub fn schemes() -> RwLockReadGuard<'static, SchemeList> { pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { SCHEMES.call_once(init_schemes).write() } + +pub trait KernelScheme: Scheme + Send + Sync + 'static {} diff --git a/src/scheme/pipe.rs b/src/scheme/pipe.rs index d9e3fdca..ab7cb163 100644 --- a/src/scheme/pipe.rs +++ b/src/scheme/pipe.rs @@ -264,3 +264,4 @@ impl Drop for PipeWrite { self.condition.notify(); } } +impl crate::scheme::KernelScheme for PipeScheme {} diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index cb2fc921..e3365e24 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -824,3 +824,4 @@ impl Scheme for ProcScheme { Ok(0) } } +impl crate::scheme::KernelScheme for ProcScheme {} diff --git a/src/scheme/root.rs b/src/scheme/root.rs index 248c4b58..ff2a13f7 100644 --- a/src/scheme/root.rs +++ b/src/scheme/root.rs @@ -344,3 +344,4 @@ impl Scheme for RootScheme { Ok(0) } } +impl crate::scheme::KernelScheme for RootScheme {} diff --git a/src/scheme/serio.rs b/src/scheme/serio.rs index 0a747423..ba0169d1 100644 --- a/src/scheme/serio.rs +++ b/src/scheme/serio.rs @@ -162,3 +162,4 @@ impl Scheme for SerioScheme { Ok(0) } } +impl crate::scheme::KernelScheme for SerioScheme {} diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs index 6c2c6f01..82cea67e 100644 --- a/src/scheme/sys/mod.rs +++ b/src/scheme/sys/mod.rs @@ -170,3 +170,4 @@ impl Scheme for SysScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for SysScheme {} diff --git a/src/scheme/time.rs b/src/scheme/time.rs index bc0143bd..a5f92864 100644 --- a/src/scheme/time.rs +++ b/src/scheme/time.rs @@ -117,3 +117,4 @@ impl Scheme for TimeScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for TimeScheme {} diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 65433d20..3ba08f48 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -528,3 +528,4 @@ impl Scheme for UserScheme { inner.call(SYS_CLOSE, file, 0, 0) } } +impl crate::scheme::KernelScheme for UserScheme {} -- GitLab From 37f9b292f18f15c1271732dcab09cf80a3beab91 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 12 Jun 2022 14:21:02 +0200 Subject: [PATCH 09/44] Add kfmap to fix properly reobtaining grants. --- src/context/memory.rs | 2 +- src/scheme/mod.rs | 9 +++- src/scheme/user.rs | 103 ++++++++++++++++++++++------------------- src/syscall/process.rs | 84 +++++++++++++-------------------- 4 files changed, 97 insertions(+), 101 deletions(-) diff --git a/src/context/memory.rs b/src/context/memory.rs index 244d51e1..6fd567cb 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -125,7 +125,7 @@ impl UserGrants { // ... but it already exists if flags.contains(MapFlags::MAP_FIXED_NOREPLACE) { - println!("grant: conflicts with: {:#x} - {:#x}", grant.start_address().data(), grant.end_address().data()); + println!("grant: {:#x} conflicts with: {:#x} - {:#x}", address.data(), grant.start_address().data(), grant.end_address().data()); return Err(Error::new(EEXIST)); } else if flags.contains(MapFlags::MAP_FIXED) { // TODO: Overwrite existing grant diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index d781d6a6..885c8e0f 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -16,6 +16,7 @@ use alloc::{ use core::sync::atomic::AtomicUsize; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::context::Context; use crate::syscall::error::*; use crate::syscall::scheme::Scheme; @@ -299,4 +300,10 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { SCHEMES.call_once(init_schemes).write() } -pub trait KernelScheme: Scheme + Send + Sync + 'static {} +pub trait KernelScheme: Scheme + Send + Sync + 'static { + #[allow(unused_arguments)] + fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc>) -> Result { + log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes"); + Err(Error::new(ENOSYS)) + } +} diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 3ba08f48..55c7f200 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -152,10 +152,6 @@ impl UserInner { let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; - /*if !dst_region.intersect(Region::new(VirtualAddress::new(0x39d000), 1)).is_empty() { - dbg!(dst_region); - }*/ - //TODO: Use syscall_head and syscall_tail to avoid leaking data grants.insert(Grant::map_inactive( src_region.start_address(), @@ -233,6 +229,9 @@ impl UserInner { _ => println!("Unknown scheme -> kernel message {}", packet.a) } } else { + // The motivation of doing this here instead of within the fmap handler, is that we + // can operate on an inactive table. This reduces the number of page table reloads + // from two (context switch + active TLB flush) to one (context switch). if let Some((context_weak, desc, map)) = self.fmap.lock().remove(&packet.id) { if let Ok(address) = Error::demux(packet.a) { if address % PAGE_SIZE > 0 { @@ -274,6 +273,51 @@ impl UserInner { pub fn fsync(&self) -> Result { Ok(0) } + + fn fmap_inner(&self, file: usize, map: &Map, context_lock: &Arc>) -> Result { + let (pid, uid, gid, context_weak, desc) = { + let context = context_lock.read(); + // TODO: Faster, cleaner mechanism to get descriptor + let scheme = self.scheme_id.load(Ordering::SeqCst); + let mut desc_res = Err(Error::new(EBADF)); + for context_file_opt in context.files.read().iter() { + if let Some(context_file) = context_file_opt { + let (context_scheme, context_number) = { + let desc = context_file.description.read(); + (desc.scheme, desc.number) + }; + if context_scheme == scheme && context_number == file { + desc_res = Ok(context_file.clone()); + break; + } + } + } + let desc = desc_res?; + (context.id, context.euid, context.egid, Arc::downgrade(context_lock), desc) + }; + drop(context_lock); + + let address = self.capture(map)?; + + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + + self.fmap.lock().insert(id, (context_weak, desc, *map)); + + let result = self.call_inner(Packet { + id, + pid: pid.into(), + uid, + gid, + a: SYS_FMAP, + b: file, + c: address, + d: mem::size_of::() + }); + + let _ = self.release(address); + + result + } } /// `UserInner` has to be wrapped @@ -384,49 +428,7 @@ impl Scheme for UserScheme { fn fmap(&self, file: usize, map: &Map) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let (pid, uid, gid, context_lock, desc) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - // TODO: Faster, cleaner mechanism to get descriptor - let scheme = inner.scheme_id.load(Ordering::SeqCst); - let mut desc_res = Err(Error::new(EBADF)); - for context_file_opt in context.files.read().iter() { - if let Some(context_file) = context_file_opt { - let (context_scheme, context_number) = { - let desc = context_file.description.read(); - (desc.scheme, desc.number) - }; - if context_scheme == scheme && context_number == file { - desc_res = Ok(context_file.clone()); - break; - } - } - } - let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) - }; - - let address = inner.capture(map)?; - - let id = inner.next_id.fetch_add(1, Ordering::SeqCst); - - inner.fmap.lock().insert(id, (context_lock, desc, *map)); - - let result = inner.call_inner(Packet { - id, - pid: pid.into(), - uid, - gid, - a: SYS_FMAP, - b: file, - c: address, - d: mem::size_of::() - }); - - let _ = inner.release(address); - - result + inner.fmap_inner(file, map, &Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?)) } fn funmap(&self, grant_address: usize, size: usize) -> Result { @@ -528,4 +530,9 @@ impl Scheme for UserScheme { inner.call(SYS_CLOSE, file, 0, 0) } } -impl crate::scheme::KernelScheme for UserScheme {} +impl crate::scheme::KernelScheme for UserScheme { + fn kfmap(&self, number: usize, map: &Map, target_context: &Arc>) -> Result { + let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; + inner.fmap_inner(number, map, target_context) + } +} diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 243ca48a..090b7ca9 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -189,55 +189,25 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> } } - // If not cloning virtual memory, use fmap to re-obtain every grant where possible - if !flags.contains(CLONE_VM) { - let mut grants = grants.write(); - let old_grants = mem::take(&mut *grants); - - // TODO: Find some way to do this without having to allocate. - - // TODO: Check that the current process is not allowed to serve any scheme this logic - // could interfere with. Deadlocks would otherwise seem inevitable. - - for mut grant in old_grants.into_iter() { - let region = *grant.region(); - let address = region.start_address().data(); - let size = region.size(); - - let new_grant = if let Some(ref mut file_ref) = grant.desc_opt.take() { - // TODO: Technically this is redundant as the grants are already secret_cloned. - // Maybe grants with fds can be excluded from that step? - grant.unmap(); - - let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; - let scheme_arc = match crate::scheme::schemes().get(scheme) { - Some(s) => Arc::clone(s), - None => continue, - }; - let map = crate::syscall::data::Map { - address, - size, - offset: file_ref.offset, - flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, - }; - - let ptr = match scheme_arc.fmap(number, &map) { - Ok(new_range) => new_range as *mut u8, - Err(_) => continue, - }; - - // This will eventually be freed from the parent context after move_to is - // called. - context::contexts().current().ok_or(Error::new(ESRCH))? - .read().grants.write() - .take(&Region::new(VirtualAddress::new(ptr as usize), map.size)) - .ok_or(Error::new(EFAULT))? - } else { - grant + let maps_to_reobtain = if flags.contains(CLONE_VM) { + Vec::new() + } else { + grants.read().iter().filter_map(|grant| grant.desc_opt.as_ref().and_then(|file_ref| { + let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; + let scheme_arc = match crate::scheme::schemes().get(scheme) { + Some(s) => Arc::downgrade(s), + None => return None, }; - grants.insert(new_grant); - } - } + let map = crate::syscall::data::Map { + address: grant.start_address().data(), + size: grant.size(), + offset: file_ref.offset, + flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, + }; + + Some((scheme_arc, number, map)) + })).collect() + }; // If vfork, block the current process // This has to be done after the operations that may require context switches @@ -252,7 +222,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> } // Set up new process - { + let new_context_lock = { let mut contexts = context::contexts_mut(); let context_lock = contexts.new_context()?; let mut context = context_lock.write(); @@ -277,7 +247,9 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> context.cpu_id = Some(pid.into() % crate::cpu_count()); } - context.status = context::Status::Runnable; + // Start as blocked. This is to ensure the context is never switched before any grants + // that have to be remapped, are mapped. + context.status = context::Status::Blocked; context.vfork = vfork; @@ -299,7 +271,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> let mut new_tables = setup_new_utable().expect("failed to allocate new page tables for cloned process"); let mut new_grants = UserGrants::new(); - for old_grant in grants.read().iter() { + for old_grant in grants.read().iter().filter(|g| g.desc_opt.is_none()) { new_grants.insert(old_grant.secret_clone(&mut new_tables.new_utable)); } context.grants = Arc::new(RwLock::new(new_grants)); @@ -368,7 +340,17 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> } else { context.sigstack = old_sigstack; } + + Arc::clone(context_lock) + }; + for (scheme_weak, number, map) in maps_to_reobtain { + let scheme = match scheme_weak.upgrade() { + Some(s) => s, + None => continue, + }; + let _ = scheme.kfmap(number, &map, &new_context_lock); } + new_context_lock.write().unblock(); } if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() { -- GitLab From 6e5015dcab718a21c5cfe494c7c8a5ca78e1c4a4 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Thu, 16 Jun 2022 14:51:23 +0200 Subject: [PATCH 10/44] WIP: Add necessary interfaces for setuid/setgid. --- Cargo.toml | 4 +- src/arch/x86_64/interrupt/handler.rs | 8 +- src/arch/x86_64/paging/mapper.rs | 11 +- src/context/memory.rs | 10 +- src/lib.rs | 2 + src/ptrace.rs | 84 ++++------- src/scheme/proc.rs | 215 ++++++++++++++++++++++----- src/syscall/process.rs | 6 +- 8 files changed, 225 insertions(+), 115 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 82ead039..2bd3aa7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,9 +44,7 @@ raw-cpuid = "10.2.0" x86 = { version = "0.47.0", default-features = false } [features] -# TODO: Fix multicore -#default = ["acpi", "multi_core", "graphical_debug", "serial_debug"] -default = ["acpi", "graphical_debug", "serial_debug"] +default = ["acpi", "multi_core", "graphical_debug", "serial_debug"] acpi = [] doc = [] graphical_debug = [] diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs index d8fa21f7..ddbef828 100644 --- a/src/arch/x86_64/interrupt/handler.rs +++ b/src/arch/x86_64/interrupt/handler.rs @@ -150,8 +150,6 @@ impl InterruptStack { /// Loads all registers from a struct used by the proc: /// scheme to read/write registers. pub fn load(&mut self, all: &IntRegisters) { - // TODO: Which of these should be allowed to change? - self.preserved.r15 = all.r15; self.preserved.r14 = all.r14; self.preserved.r13 = all.r13; @@ -168,9 +166,11 @@ impl InterruptStack { self.scratch.rcx = all.rcx; self.scratch.rax = all.rax; self.iret.rip = all.rip; + self.iret.rsp = all.rsp; + + // CS and SS are immutable - // These should probably be restricted - // self.iret.cs = all.cs; + // TODO: RFLAGS should be restricted before being changeable // self.iret.rflags = all.eflags; } /// Enables the "Trap Flag" in the FLAGS register, causing the CPU diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index babefcca..ecc2d0b2 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -171,17 +171,18 @@ impl<'table> Mapper<'table> { } pub fn translate_page(&self, page: Page) -> Option { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| p1[page.p1_index()].pointed_frame()) + self.translate_page_and_flags(page).map(|(frame, _)| frame) } pub fn translate_page_flags(&self, page: Page) -> Option> { + self.translate_page_and_flags(page).map(|(_, flags)| flags) + } + pub fn translate_page_and_flags(&self, page: Page) -> Option<(Frame, PageFlags)> { self.p4().next_table(page.p4_index()) .and_then(|p3| p3.next_table(page.p3_index())) .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| Some(p1[page.p1_index()].flags())) + .map(|p1| &p1[page.p1_index()]) + .and_then(|entry| Some((entry.pointed_frame()?, entry.flags()))) } /// Translate a virtual address to a physical one diff --git a/src/context/memory.rs b/src/context/memory.rs index 6fd567cb..619e93e6 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -168,20 +168,20 @@ impl UserGrants { } } } - fn unreserve(&mut self, grant: &Region) { + fn unreserve(holes: &mut BTreeMap, grant: &Region) { // The size of any possible hole directly after the to-be-freed region. - let exactly_after_size = self.holes.remove(&grant.end_address()); + let exactly_after_size = holes.remove(&grant.end_address()); // There was a range that began exactly prior to the to-be-freed region, so simply // increment the size such that it occupies the grant too. If in additional there was a // grant directly after the grant, include it too in the size. - if let Some((hole_offset, hole_size)) = self.holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { + if let Some((hole_offset, hole_size)) = holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { *hole_size = grant.end_address().data() - hole_offset.data() + exactly_after_size.unwrap_or(0); } else { // There was no free region directly before the to-be-freed region, however will // now unconditionally insert a new free region where the grant was, and add that extra // size if there was something after it. - self.holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); + holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); } } pub fn insert(&mut self, grant: Grant) { @@ -193,7 +193,7 @@ impl UserGrants { } pub fn take(&mut self, region: &Region) -> Option { let grant = self.inner.take(region)?; - self.unreserve(region); + Self::unreserve(&mut self.holes, region); Some(grant) } pub fn iter(&self) -> impl Iterator + '_ { diff --git a/src/lib.rs b/src/lib.rs index 1fbfbe8b..80ba7463 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,7 @@ #![deny(unused_must_use)] #![feature(allocator_api)] +#![feature(array_chunks)] #![feature(asm_const, asm_sym)] // TODO: Relax requirements of most asm invocations #![cfg_attr(target_arch = "aarch64", feature(llvm_asm))] // TODO: Rewrite using asm! #![feature(concat_idents)] @@ -53,6 +54,7 @@ #![feature(lang_items)] #![feature(naked_functions)] #![feature(ptr_internals)] +#![feature(slice_ptr_get, slice_ptr_len)] #![feature(thread_local)] #![no_std] diff --git a/src/ptrace.rs b/src/ptrace.rs index 7f7711a1..7327170e 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -2,6 +2,8 @@ //! handling should go here, unless they closely depend on the design //! of the scheme. +use rmm::Arch; + use crate::{ arch::{ interrupt::InterruptStack, @@ -21,6 +23,7 @@ use crate::{ flag::*, ptrace_event }, + CurrentRmmArch as RmmA, }; use alloc::{ @@ -445,66 +448,41 @@ pub unsafe fn regs_for_mut(context: &mut Context) -> Option<&mut InterruptStack> // |_| |_|\___|_| |_| |_|\___/|_| \__, | // |___/ -pub fn with_context_memory(context: &mut Context, offset: VirtualAddress, len: usize, f: F) -> Result<()> -where F: FnOnce(*mut u8) -> Result<()> -{ - // As far as I understand, mapping any regions following - // USER_TMP_MISC_OFFSET is safe because no other memory location - // is used after it. In the future it might be necessary to define - // a maximum amount of pages that can be mapped in one batch, - // which could be used to either internally retry `read`/`write` - // in `proc:/mem`, or return a partial read/write. - let start = Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET)); - - let mut active_page_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut target_page_table = unsafe { - InactivePageTable::from_address(context.arch.get_page_utable()) - }; - - // Find the physical frames for all pages - let mut frames = Vec::new(); +// Returns an iterator which splits [start, start + len) into an iterator of possibly trimmed +// pages. +fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator { + // TODO: Define this elsewhere! + #[cfg(target_arch = "x86_64")] + const KERNEL_SPLIT_START: usize = crate::PML4_SIZE * 256; - { - let mapper = target_page_table.mapper(); - - let mut inner = || -> Result<()> { - let start = Page::containing_address(offset); - let end = Page::containing_address(VirtualAddress::new(offset.data() + len - 1)); - for page in Page::range_inclusive(start, end) { - frames.push(( - mapper.translate_page(page).ok_or(Error::new(EFAULT))?, - mapper.translate_page_flags(page).ok_or(Error::new(EFAULT))? - )); - } - Ok(()) - }; - inner()?; + // Ensure no pages can overlap with kernel memory. + if start.saturating_add(len) > KERNEL_SPLIT_START { + len = KERNEL_SPLIT_START.saturating_sub(start); } - // Map all the physical frames into linear pages - let pages = frames.len(); - let mut page = start; - let flush_all = PageFlushAll::new(); - for (frame, mut flags) in frames { - flags = flags.execute(false).write(true); - flush_all.consume(active_page_table.map_to(page, frame, flags)); + let first_len = core::cmp::min(len, PAGE_SIZE - start % PAGE_SIZE); + let first = Some((start, first_len)).filter(|(_, len)| *len > 0); + start += first_len; + len -= first_len; - page = page.next(); - } + let last_len = len % PAGE_SIZE; + len -= last_len; + let last = Some((start + len, last_len)).filter(|(_, len)| *len > 0); - flush_all.flush(); + first.into_iter().chain((start..start + len).step_by(PAGE_SIZE).map(|off| (off, PAGE_SIZE))).chain(last) +} - let res = f((start.start_address().data() + offset.data() % PAGE_SIZE) as *mut u8); +pub fn context_memory(context: &mut Context, offset: VirtualAddress, len: usize) -> impl Iterator> + '_ { + let mut table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - // Unmap all the pages (but allow no deallocation!) - let mut page = start; - let flush_all = PageFlushAll::new(); - for _ in 0..pages { - flush_all.consume(active_page_table.unmap_return(page, true).0); - page = page.next(); - } + page_aligned_chunks(offset.data(), len).map(move |(addr, len)| unsafe { + // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the + // possible exception of an unaligned head/tail. - flush_all.flush(); + //log::info!("ADDR {:p} LEN {:#0x}", page as *const u8, len); - res + let frame = table.mapper().translate_page(Page::containing_address(VirtualAddress::new(addr)))?; + let start = RmmA::phys_to_virt(frame.start_address()).data() + addr % crate::memory::PAGE_SIZE; + Some(core::ptr::slice_from_raw_parts_mut(start as *mut u8, len)) + }) } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index e3365e24..76bff3cc 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,6 +1,7 @@ use crate::{ - arch::paging::VirtualAddress, - context::{self, Context, ContextId, Status}, + arch::paging::{ActivePageTable, InactivePageTable, mapper::{Mapper, PageFlushAll}, Page, VirtualAddress}, + context::{self, Context, ContextId, Status, memory::{Grant, page_flags, Region}}, + memory::PAGE_SIZE, ptrace, scheme::{AtomicSchemeId, SchemeId}, syscall::{ @@ -19,6 +20,8 @@ use crate::{ use alloc::{ boxed::Box, collections::BTreeMap, + string::{String, ToString}, + sync::Arc, vec::Vec, }; use core::{ @@ -31,6 +34,14 @@ use core::{ }; use spin::RwLock; +fn read_from(dst: &mut [u8], src: &[u8], offset: &mut usize) -> Result { + let byte_count = cmp::min(dst.len(), src.len().saturating_sub(*offset)); + let next_offset = offset.saturating_add(byte_count); + dst[..byte_count].copy_from_slice(&src[*offset..next_offset]); + *offset = next_offset; + Ok(byte_count) +} + fn with_context(pid: ContextId, callback: F) -> Result where F: FnOnce(&Context) -> Result, @@ -99,22 +110,27 @@ enum RegsKind { #[derive(Clone, Copy, PartialEq, Eq)] enum Operation { Memory, + Grants, Regs(RegsKind), Trace, Static(&'static str), Name, Sigstack, + Attr(Attr), + Files, +} +#[derive(Clone, Copy, PartialEq, Eq)] +enum Attr { + Uid, + Gid, + // TODO: namespace, tid, etc. } impl Operation { fn needs_child_process(self) -> bool { - match self { - Self::Memory => true, - Self::Regs(_) => true, - Self::Trace => true, - Self::Static(_) => false, - Self::Name => false, - Self::Sigstack => false, - } + matches!(self, Self::Memory | Self::Grants | Self::Regs(_) | Self::Trace | Self::Files) + } + fn needs_root(self) -> bool { + matches!(self, Self::Attr(_)) } } struct MemData { @@ -248,6 +264,7 @@ impl Scheme for ProcScheme { let operation = match parts.next() { Some("mem") => Operation::Memory, + Some("grants") => Operation::Grants, Some("regs/float") => Operation::Regs(RegsKind::Float), Some("regs/int") => Operation::Regs(RegsKind::Int), Some("regs/env") => Operation::Regs(RegsKind::Env), @@ -255,13 +272,16 @@ impl Scheme for ProcScheme { Some("exe") => Operation::Static("exe"), Some("name") => Operation::Name, Some("sigstack") => Operation::Sigstack, + Some("uid") => Operation::Attr(Attr::Uid), + Some("gid") => Operation::Attr(Attr::Gid), + Some("files") => Operation::Files, _ => return Err(Error::new(EINVAL)) }; let contexts = context::contexts(); let target = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let data; + let mut data; { let target = target.read(); @@ -303,6 +323,20 @@ impl Scheme for ProcScheme { None => return Err(Error::new(EPERM)), } } + } else if operation.needs_root() && (uid != 0 || gid != 0) { + return Err(Error::new(EPERM)); + } + + if matches!(operation, Operation::Files) { + data = OperationData::Static(StaticData::new({ + use core::fmt::Write; + + let mut data = String::new(); + for index in target.files.read().iter().enumerate().filter_map(|(idx, val)| val.as_ref().map(|_| idx)) { + write!(data, "{}\n", index).unwrap(); + } + data.into_bytes().into_boxed_slice() + })); } }; @@ -407,14 +441,23 @@ impl Scheme for ProcScheme { let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; let mut context = context.write(); - ptrace::with_context_memory(&mut context, data.offset, buf.len(), |ptr| { - buf.copy_from_slice(validate::validate_slice(ptr, buf.len())?); - Ok(()) - })?; + let mut bytes_read = 0; - data.offset = VirtualAddress::new(data.offset.data() + buf.len()); - Ok(buf.len()) + for chunk_opt in ptrace::context_memory(&mut *context, data.offset, buf.len()) { + let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let dst_slice = &mut buf[bytes_read..bytes_read + chunk.len()]; + unsafe { + chunk.as_mut_ptr().copy_to_nonoverlapping(dst_slice.as_mut_ptr(), dst_slice.len()); + } + bytes_read += chunk.len(); + } + + data.offset = VirtualAddress::new(data.offset.data() + bytes_read); + Ok(bytes_read) }, + // TODO: Allow reading process mappings? + Operation::Grants => return Err(Error::new(EBADF)), + Operation::Regs(kind) => { union Output { float: FloatRegisters, @@ -526,19 +569,22 @@ impl Scheme for ProcScheme { // Return read events Ok(read * mem::size_of::()) } - Operation::Name => match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.read() { - name => { - let to_copy = cmp::min(buf.len(), name.len()); - buf[..to_copy].copy_from_slice(&name.as_bytes()[..to_copy]); - Ok(to_copy) - } + Operation::Name => read_from(buf, context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.read().as_bytes(), &mut 0), + Operation::Sigstack => read_from(buf, &context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes(), &mut 0), + Operation::Attr(attr) => { + let src_buf = match (attr, &*Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?).read()) { + (Attr::Uid, context) => context.euid.to_string(), + (Attr::Gid, context) => context.egid.to_string(), + }.into_bytes(); + + read_from(buf, &src_buf, &mut 0) } - Operation::Sigstack => match context::contexts().current().ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes() { - sigstack => { - let to_copy = cmp::min(buf.len(), sigstack.len()); - buf[..to_copy].copy_from_slice(&sigstack[..to_copy]); - Ok(to_copy) - } + Operation::Files => { + let mut handles = self.handles.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + let data = handle.data.static_data().expect("operations can't change"); + + read_from(buf, &data.buf, &mut data.offset) } } } @@ -571,14 +617,89 @@ impl Scheme for ProcScheme { let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; let mut context = context.write(); - ptrace::with_context_memory(&mut context, data.offset, buf.len(), |ptr| { - validate::validate_slice_mut(ptr, buf.len())?.copy_from_slice(buf); - Ok(()) - })?; + let mut bytes_written = 0; - data.offset = VirtualAddress::new(data.offset.data() + buf.len()); - Ok(buf.len()) + for chunk_opt in ptrace::context_memory(&mut *context, data.offset, buf.len()) { + let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let src_slice = &buf[bytes_written..bytes_written + chunk.len()]; + unsafe { + chunk.as_mut_ptr().copy_from_nonoverlapping(src_slice.as_ptr(), src_slice.len()); + } + bytes_written += chunk.len(); + } + + data.offset = VirtualAddress::new(data.offset.data() + bytes_written); + Ok(bytes_written) }, + Operation::Grants => { + // FIXME: Forbid upgrading external mappings. + + let pid = self.handles.read() + .get(&id).ok_or(Error::new(EBADF))? + .info.pid; + + let mut chunks = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); + // Update grant mappings, like mprotect but allowed to target other contexts. + let base = chunks.next().ok_or(Error::new(EINVAL))?; + let size = chunks.next().ok_or(Error::new(EINVAL))?; + let flags = chunks.next().and_then(|f| MapFlags::from_bits(f)).ok_or(Error::new(EINVAL))?; + let region = Region::new(VirtualAddress::new(base), size); + + if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::PML4_SIZE * 256 { + return Err(Error::new(EINVAL)); + } + + let is_inactive = pid != context::context_id(); + + let callback = |context: &mut Context| { + let mut inactive = is_inactive.then(|| unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }); + + let mut grants = context.grants.write(); + + let conflicting = grants.conflicts(region).map(|g| *g.region()).collect::>(); + for conflicting_region in conflicting { + let whole_grant = grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; + let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; + + if let Some(before) = before_opt { + grants.insert(before); + } + if let Some(after) = after_opt { + grants.insert(after); + } + + let res = if let Some(ref mut inactive) = inactive { + current.unmap_inactive(inactive) + } else { + current.unmap() + }; + if res.file_desc.is_some() { + drop(grants); + return Err(Error::new(EBUSY)); + } + + // TODO: Partial free if grant is mapped externally. + } + + if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { + let base = VirtualAddress::new(base); + + if let Some(ref mut inactive) = inactive { + grants.insert(Grant::zeroed_inactive(Page::containing_address(base), size / PAGE_SIZE, page_flags(flags), inactive).unwrap()); + } else { + grants.insert(Grant::map(base, size, page_flags(flags))); + } + } + Ok(()) + }; + + if is_inactive { + with_context_mut(pid, callback)?; + } else { + try_stop_context(pid, callback)?; + } + Ok(3 * mem::size_of::()) + } Operation::Regs(kind) => match kind { RegsKind::Float => { if buf.len() < mem::size_of::() { @@ -727,15 +848,26 @@ impl Scheme for ProcScheme { }, Operation::Name => { let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?.into_boxed_str(); - *context::contexts().current().ok_or(Error::new(ESRCH))?.read().name.write() = utf8; + *context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.write() = utf8; Ok(buf.len()) } Operation::Sigstack => { let bytes = <[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?; let sigstack = usize::from_ne_bytes(bytes); - context::contexts().current().ok_or(Error::new(ESRCH))?.write().sigstack = (sigstack != !0).then(|| sigstack); + context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.write().sigstack = (sigstack != !0).then(|| sigstack); Ok(buf.len()) } + Operation::Attr(attr) => { + let context_lock = Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?); + let id = core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?.parse::().map_err(|_| Error::new(EINVAL))?; + + match attr { + Attr::Uid => context_lock.write().euid = id, + Attr::Gid => context_lock.write().egid = id, + } + Ok(buf.len()) + } + Operation::Files => return Err(Error::new(EBADF)), } } @@ -768,6 +900,7 @@ impl Scheme for ProcScheme { let path = format!("proc:{}/{}", handle.info.pid.into(), match handle.info.operation { Operation::Memory => "mem", + Operation::Grants => "grants", Operation::Regs(RegsKind::Float) => "regs/float", Operation::Regs(RegsKind::Int) => "regs/int", Operation::Regs(RegsKind::Env) => "regs/env", @@ -775,12 +908,12 @@ impl Scheme for ProcScheme { Operation::Static(path) => path, Operation::Name => "name", Operation::Sigstack => "sigstack", + Operation::Attr(Attr::Uid) => "uid", + Operation::Attr(Attr::Gid) => "gid", + Operation::Files => "files", }); - let len = cmp::min(path.len(), buf.len()); - buf[..len].copy_from_slice(&path.as_bytes()[..len]); - - Ok(len) + read_from(buf, &path.as_bytes(), &mut 0) } fn fstat(&self, id: usize, stat: &mut Stat) -> Result { diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 090b7ca9..51eecfe5 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -260,7 +260,9 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> #[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))] unsafe { context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize; + x86::bits64::segmentation::swapgs(); context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize; + x86::bits64::segmentation::swapgs(); } if flags.contains(CloneFlags::CLONE_VM) { @@ -1116,10 +1118,6 @@ pub fn exec(memranges: &[ExecMemRange], instruction_ptr: usize, stack_ptr: usize // TODO: Reuse in place if the file table is not shared. drop(context); - for file_slot in old_files.iter_mut().filter(|file_opt| file_opt.as_ref().map_or(false, |file| file.cloexec)) { - let file = file_slot.take().expect("iterator filter requires file slot to be occupied, not None"); - let _ = file.close(); - } let mut context = current_context_lock.write(); context.files = Arc::new(RwLock::new(old_files)); -- GitLab From 563121596de8edd6fa589c5ab70c54638f2b7649 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 17 Jun 2022 12:37:03 +0200 Subject: [PATCH 11/44] Fix running on multi_core. Turns out the problem all along was that the ActivePageTable was never dropped in usermode_bootstrap. So as soon as any other hardware thread tried to do page table business, it deadlocked! --- src/arch/x86_64/consts.rs | 6 ++---- src/context/memory.rs | 4 ++-- src/ptrace.rs | 8 ++------ src/scheme/proc.rs | 2 +- src/syscall/process.rs | 21 +++++++++++++-------- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs index 33432e82..1656c8c2 100644 --- a/src/arch/x86_64/consts.rs +++ b/src/arch/x86_64/consts.rs @@ -38,7 +38,5 @@ /// Offset to user image pub const USER_OFFSET: usize = 0; - /// Offset for usage in other temporary pages - // TODO: Currently used for ptrace but should be removed or replaced with a kernel address. - pub const USER_TMP_MISC_OFFSET: usize = USER_OFFSET + PML4_SIZE; - pub const USER_TMP_MISC_PML4: usize = (USER_TMP_MISC_OFFSET & PML4_MASK)/PML4_SIZE; + /// End offset of the user image, i.e. kernel start + pub const USER_END_OFFSET: usize = 256 * PML4_SIZE; diff --git a/src/context/memory.rs b/src/context/memory.rs index 619e93e6..7461da61 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -68,7 +68,7 @@ impl UserGrants { pub fn new() -> Self { Self { inner: BTreeSet::new(), - holes: core::iter::once((VirtualAddress::new(0), crate::PML4_SIZE * 256)).collect::>(), + holes: core::iter::once((VirtualAddress::new(0), crate::USER_END_OFFSET)).collect::>(), funmap: BTreeMap::new(), } } @@ -114,7 +114,7 @@ impl UserGrants { let mut requested = Region::new(address, size); if - requested.end_address().data() > crate::PML4_SIZE * 256 // There are 256 PML4 entries reserved for userspace + requested.end_address().data() > crate::USER_END_OFFSET || address.data() % PAGE_SIZE != 0 { // ... but it was invalid diff --git a/src/ptrace.rs b/src/ptrace.rs index 7327170e..2234aa6a 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -451,13 +451,9 @@ pub unsafe fn regs_for_mut(context: &mut Context) -> Option<&mut InterruptStack> // Returns an iterator which splits [start, start + len) into an iterator of possibly trimmed // pages. fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator { - // TODO: Define this elsewhere! - #[cfg(target_arch = "x86_64")] - const KERNEL_SPLIT_START: usize = crate::PML4_SIZE * 256; - // Ensure no pages can overlap with kernel memory. - if start.saturating_add(len) > KERNEL_SPLIT_START { - len = KERNEL_SPLIT_START.saturating_sub(start); + if start.saturating_add(len) > crate::USER_END_OFFSET { + len = crate::USER_END_OFFSET.saturating_sub(start); } let first_len = core::cmp::min(len, PAGE_SIZE - start % PAGE_SIZE); diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 76bff3cc..70bc0778 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -645,7 +645,7 @@ impl Scheme for ProcScheme { let flags = chunks.next().and_then(|f| MapFlags::from_bits(f)).ok_or(Error::new(EINVAL))?; let region = Region::new(VirtualAddress::new(base), size); - if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::PML4_SIZE * 256 { + if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::USER_END_OFFSET { return Err(Error::new(EINVAL)); } diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 51eecfe5..dc85d707 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -87,6 +87,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> cpu_id_opt = context.cpu_id; } + // TODO: Fill with newest registers. arch = context.arch.clone(); if let Some(ref fx) = context.kfx { @@ -115,6 +116,9 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> // // (base pointer - start of stack) - one offset = stack_base - stack.as_ptr() as usize - mem::size_of::(); // Add clone ret + // FIXME: This is incredibly UB, making Rust think the current stack being + // copied is simply a regular immutable slice. This part should either be + // written in assembly or have clone moved to userspace. let mut new_stack = stack.clone(); unsafe { @@ -352,7 +356,7 @@ pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> }; let _ = scheme.kfmap(number, &map, &new_context_lock); } - new_context_lock.write().unblock(); + new_context_lock.write().status = context::Status::Runnable; } if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() { @@ -997,12 +1001,14 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { const LOAD_BASE: usize = 0; let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), ((data.len()+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true)); - let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + { + let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - for (index, page) in grant.pages().enumerate() { - let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; - let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); - unsafe { ((frame.start_address().data() + crate::PHYS_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } + for (index, page) in grant.pages().enumerate() { + let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; + let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); + unsafe { ((frame.start_address().data() + crate::PHYS_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } + } } context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); @@ -1012,8 +1018,7 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { #[cfg(target_arch = "x86_64")] unsafe { let start = ((LOAD_BASE + 0x18) as *mut usize).read(); - // Start with the (probably) ELF executable loaded, without any stack the ability to load - // sections to arbitrary addresses. + // Start with the (probably) ELF executable loaded, without any stack. usermode(start, 0, 0, 0); } } -- GitLab From 283ada82a03057a4e1b7a0f049b19e2cd629e58a Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 4 Jul 2022 10:42:04 +0200 Subject: [PATCH 12/44] WIP: Remove SYS_CLONE (to be done in userspace). --- src/allocator/mod.rs | 3 +- src/arch/x86_64/interrupt/syscall.rs | 20 - src/arch/x86_64/paging/mapper.rs | 24 +- src/arch/x86_64/paging/mod.rs | 8 +- src/context/context.rs | 26 +- src/context/list.rs | 15 +- src/context/memory.rs | 316 ++++++--------- src/debugger.rs | 8 +- src/ptrace.rs | 10 +- src/scheme/live.rs | 3 +- src/scheme/memory.rs | 22 +- src/scheme/mod.rs | 2 +- src/scheme/proc.rs | 164 +++++--- src/scheme/sys/context.rs | 20 +- src/scheme/user.rs | 23 +- src/syscall/debug.rs | 3 +- src/syscall/driver.rs | 15 +- src/syscall/fs.rs | 10 +- src/syscall/mod.rs | 34 +- src/syscall/process.rs | 561 ++------------------------- 20 files changed, 379 insertions(+), 908 deletions(-) diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs index dfc618a4..0617843c 100644 --- a/src/allocator/mod.rs +++ b/src/allocator/mod.rs @@ -1,3 +1,4 @@ +use rmm::Flusher; use crate::paging::{ActivePageTable, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags}; #[cfg(not(feature="slab"))] @@ -13,7 +14,7 @@ mod linked_list; mod slab; unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usize) { - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size-1)); diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs index 70fd2a60..803a6d8d 100644 --- a/src/arch/x86_64/interrupt/syscall.rs +++ b/src/arch/x86_64/interrupt/syscall.rs @@ -160,23 +160,3 @@ interrupt_stack!(syscall, |stack| { syscall::syscall(scratch.rax, stack.preserved.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack) }) }); - -#[naked] -pub unsafe extern "C" fn clone_ret() { - core::arch::asm!(concat!( - // The address of this instruction is injected by `clone` in process.rs, on - // top of the stack syscall->inner in this file, which is done using the rbp - // register we save there. - // - // The top of our stack here is the address pointed to by rbp, which is: - // - // - the previous rbp - // - the return location - // - // Our goal is to return from the parent function, inner, so we restore - // rbp... - "pop rbp\n", - // ...and we return to the address at the top of the stack - "ret\n", - ), options(noreturn)); -} diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index ecc2d0b2..95f404b8 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -1,13 +1,15 @@ use super::{linear_phys_to_virt, Page, PAGE_SIZE, PageFlags, PhysicalAddress, VirtualAddress}; + +use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::{allocate_frames, deallocate_frames, Enomem, Frame}; use super::RmmA; use super::table::{Table, Level4}; -pub use rmm::{PageFlush, PageFlushAll}; +pub use rmm::{Flusher, PageFlush, PageFlushAll}; pub struct Mapper<'table> { - p4: &'table mut Table, + pub(in super) p4: &'table mut Table, } impl core::fmt::Debug for Mapper<'_> { @@ -192,3 +194,21 @@ impl<'table> Mapper<'table> { .map(|frame| PhysicalAddress::new(frame.start_address().data() + offset)) } } + +pub struct InactiveFlusher { _inner: () } +impl InactiveFlusher { + // TODO: cpu id + pub fn new() -> Self { Self { _inner: () } } +} + +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { flush.ignore(); } + } +} +impl Drop for InactiveFlusher { + fn drop(&mut self) { + ipi(IpiKind::Tlb, IpiTarget::Other); + } +} diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index cf732b93..aca613ea 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -14,6 +14,7 @@ use self::table::{Level4, Table}; pub use rmm::{ Arch as RmmArch, + Flusher, PageFlags, PhysicalAddress, TableKind, @@ -112,7 +113,7 @@ unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll { let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; let end = start + size; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(start)); let end_page = Page::containing_address(VirtualAddress::new(end - 1)); for page in Page::range_inclusive(start_page, end_page) { @@ -288,6 +289,11 @@ impl ActivePageTable { pub unsafe fn address(&self) -> usize { RmmA::table().data() } + pub fn mapper<'a>(&'a mut self) -> Mapper<'a> { + Mapper { + p4: self.p4, + } + } } impl Drop for ActivePageTable { diff --git a/src/context/context.rs b/src/context/context.rs index 614318a2..b351ee53 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -16,13 +16,13 @@ use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE}; use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::UserGrants; +use crate::context::memory::{AddrSpace, new_addrspace, UserGrants}; use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::scheme::{SchemeNamespace, FileHandle}; use crate::sync::WaitMap; use crate::syscall::data::SigAction; -use crate::syscall::error::{Result, Error, ENOMEM}; +use crate::syscall::error::{Result, Error, ENOMEM, ESRCH}; use crate::syscall::flag::{SIG_DFL, SigActionFlags}; /// Unique identifier for a context (i.e. `pid`). @@ -226,8 +226,9 @@ pub struct Context { pub ksig: Option<(arch::Context, Option>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, - /// User grants - pub grants: Arc>, + /// Address space containing a page table lock, and grants. Normally this will have a value, + /// but can be None while the context is being reaped. + pub addr_space: Option>>, /// The name of the context pub name: Arc>>, /// The current working directory @@ -307,7 +308,7 @@ impl Context { let syscall_head = AlignedBox::try_zeroed()?; let syscall_tail = AlignedBox::try_zeroed()?; - Ok(Context { + let mut this = Context { id, pgid: id, ppid: ContextId::from(0), @@ -336,7 +337,7 @@ impl Context { kstack: None, ksig: None, ksig_restore: false, - grants: Arc::new(RwLock::new(UserGrants::default())), + addr_space: None, name: Arc::new(RwLock::new(String::new().into_boxed_str())), cwd: Arc::new(RwLock::new(String::new())), files: Arc::new(RwLock::new(Vec::new())), @@ -351,7 +352,9 @@ impl Context { regs: None, ptrace_stop: false, sigstack: None, - }) + }; + this.set_addr_space(new_addrspace()?.1); + Ok(this) } /// Make a relative path absolute @@ -520,4 +523,13 @@ impl Context { None } } + + pub fn addr_space(&self) -> Result<&Arc>> { + self.addr_space.as_ref().ok_or(Error::new(ESRCH)) + } + pub fn set_addr_space(&mut self, addr_space: Arc>) { + assert!(!self.running); + self.arch.set_page_utable(addr_space.read().frame.utable.start_address().data()); + self.addr_space = Some(addr_space); + } } diff --git a/src/context/list.rs b/src/context/list.rs index f1a9b541..dc115364 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -7,7 +7,7 @@ use core::sync::atomic::Ordering; use crate::paging::{ActivePageTable, TableKind}; use spin::RwLock; -use crate::syscall::error::{Result, Error, EAGAIN}; +use crate::syscall::error::{Result, Error, EAGAIN, ENOMEM}; use super::context::{Context, ContextId}; /// Context list type @@ -79,7 +79,11 @@ impl ContextList { let context_lock = self.new_context()?; { let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]) }; + let mut fx = unsafe { + let ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; + if ptr.is_null() { return Err(Error::new(ENOMEM)); } + Box::from_raw(ptr) + }; for b in fx.iter_mut() { *b = 0; } @@ -100,13 +104,6 @@ impl ContextList { context.arch.set_context_handle(); } - let mut new_tables = super::memory::setup_new_utable()?; - new_tables.take(); - - context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); - context.arch.set_fx(fx.as_ptr() as usize); context.arch.set_stack(stack.as_ptr() as usize + offset); context.kfx = Some(fx); diff --git a/src/context/memory.rs b/src/context/memory.rs index 7461da61..95e57f57 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -5,7 +5,8 @@ use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; use core::intrinsics; use core::ops::Deref; -use spin::Mutex; +use core::sync::atomic; +use spin::{Mutex, RwLock}; use syscall::{ flag::MapFlags, error::*, @@ -14,9 +15,8 @@ use rmm::Arch as _; use crate::arch::paging::PAGE_SIZE; use crate::context::file::FileDescriptor; -use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::Frame; -use crate::paging::mapper::PageFlushAll; +use crate::paging::mapper::{Flusher, InactiveFlusher, Mapper, PageFlushAll}; use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, TableKind, VirtualAddress}; /// Round down to the nearest multiple of page size @@ -47,6 +47,76 @@ impl Drop for UnmapResult { } } +int_like!(PtId, usize); + +static ADDRSPACES: RwLock>>> = RwLock::new(BTreeMap::new()); +static NEXT_PTID: atomic::AtomicUsize = atomic::AtomicUsize::new(1); + +pub fn new_addrspace() -> Result<(PtId, Arc>)> { + let id = PtId::from(NEXT_PTID.fetch_add(1, atomic::Ordering::Relaxed)); + let arc = Arc::try_new(RwLock::new(AddrSpace::new(id)?)).map_err(|_| Error::new(ENOMEM))?; + ADDRSPACES.write().insert(id, Arc::clone(&arc)); + Ok((id, arc)) +} +pub fn addrspace(id: PtId) -> Option>> { + ADDRSPACES.read().get(&id).map(Arc::clone) +} + +#[derive(Debug)] +pub struct AddrSpace { + pub frame: Tables, + pub grants: UserGrants, + pub id: PtId, +} +impl AddrSpace { + /// Attempt to clone an existing address space so that all mappings are copied (CoW). + // TODO: Actually use CoW! + pub fn try_clone(&self) -> Result<(PtId, Arc>)> { + let (id, mut new) = new_addrspace()?; + + // TODO: Abstract away this. + let (mut inactive, mut active); + + // TODO: aarch64 + let mut this_mapper = if self.frame.utable.start_address().data() == unsafe { x86::controlregs::cr3() } as usize { + active = unsafe { ActivePageTable::new(rmm::TableKind::User) }; + active.mapper() + } else { + inactive = unsafe { InactivePageTable::from_address(self.frame.utable.start_address().data()) }; + inactive.mapper() + }; + let mut new_mapper = unsafe { InactivePageTable::from_address(new.read().frame.utable.start_address().data()) }; + + for grant in self.grants.iter() { + // TODO: Fail if there are borrowed grants, rather than simply ignoring them? + if !grant.is_owned() { continue; } + + let new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), &mut new_mapper.mapper(), ())?; + + for page in new_grant.pages() { + // FIXME: ENOMEM is wrong here, it cannot fail. + let current_frame = this_mapper.translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *const u8; + let new_frame = new_mapper.mapper().translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *mut u8; + + // TODO: Replace this with CoW + unsafe { + new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE); + } + } + + new.write().grants.insert(new_grant); + } + Ok((id, new)) + } + pub fn new(id: PtId) -> Result { + Ok(Self { + grants: UserGrants::new(), + frame: setup_new_utable()?, + id, + }) + } +} + #[derive(Debug)] pub struct UserGrants { inner: BTreeSet, @@ -406,7 +476,7 @@ impl Grant { pub fn physmap(from: PhysicalAddress, to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(to); let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); @@ -429,40 +499,10 @@ impl Grant { desc_opt: None, } } - - pub fn map(to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let result = active_table - .map(page, flags) - .expect("TODO: handle ENOMEM in Grant::map"); - flush_all.consume(result); - } - - flush_all.flush(); - - Grant { - region: Region { - start: to, - size, - }, - flags, - mapped: true, - owned: true, - desc_opt: None, - } - } - pub fn zeroed_inactive(dst: Page, page_count: usize, flags: PageFlags, table: &mut InactivePageTable) -> Result { - let mut inactive_mapper = table.mapper(); - + pub fn zeroed(dst: Page, page_count: usize, flags: PageFlags, mapper: &mut Mapper, mut flusher: impl Flusher) -> Result { for page in Page::range_exclusive(dst, dst.next_by(page_count)) { - let flush = inactive_mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?; - unsafe { flush.ignore(); } + let flush = mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?; + flusher.consume(flush); } Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None }) } @@ -487,8 +527,6 @@ impl Grant { unsafe { inactive_flush.ignore(); } } - ipi(IpiKind::Tlb, IpiTarget::Other); - Grant { region: Region { start: dst, @@ -501,97 +539,22 @@ impl Grant { } } - /// This function should only be used in clone! - pub(crate) fn secret_clone(&self, inactive_table: &mut InactivePageTable) -> Grant { - assert!(self.mapped); - - let active_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut inactive_mapper = inactive_table.mapper(); - - for page in self.pages() { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let old_frame = active_table.translate_page(page).expect("grant references unmapped memory"); - - let frame = if self.owned { - // TODO: CoW paging - let new_frame = crate::memory::allocate_frames(1) - .expect("TODO: handle ENOMEM in Grant::secret_clone"); - - unsafe { - // We might as well use self.start_address() directly, but if we were to - // introduce SMAP it would help to only move to/from kernel memory, and we are - // copying physical frames anyway. - let src_pointer = RmmA::phys_to_virt(old_frame.start_address()).data() as *const u8; - let dst_pointer = RmmA::phys_to_virt(new_frame.start_address()).data() as *mut u8; - dst_pointer.copy_from_nonoverlapping(src_pointer, PAGE_SIZE); - } - - new_frame - } else { - old_frame - }; - - let flush = inactive_mapper.map_to(page, frame, flags); - // SAFETY: This happens within an inactive table. - unsafe { flush.ignore() } - } - - Grant { - region: Region { - start: self.region.start, - size: self.region.size, - }, - flags: self.flags, - mapped: true, - owned: self.owned, - desc_opt: self.desc_opt.clone() - } - } - pub fn flags(&self) -> PageFlags { self.flags } - pub fn unmap(mut self) -> UnmapResult { - assert!(self.mapped); - - let mut active_table = unsafe { ActivePageTable::new(self.start_address().kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let (result, frame) = active_table.unmap_return(page, false); - if self.owned { - //TODO: make sure this frame can be safely freed, physical use counter - crate::memory::deallocate_frames(frame, 1); - } - flush_all.consume(result); - } - - flush_all.flush(); - - self.mapped = false; - - // TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap - UnmapResult { file_desc: self.desc_opt.take() } - } - - pub fn unmap_inactive(mut self, other_table: &mut InactivePageTable) -> UnmapResult { + pub fn unmap(mut self, mapper: &mut Mapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); for page in self.pages() { - let (result, frame) = other_table.mapper().unmap_return(page, false); + let (result, frame) = mapper.unmap_return(page, false); if self.owned { //TODO: make sure this frame can be safely freed, physical use counter crate::memory::deallocate_frames(frame, 1); } - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } + flusher.consume(result); } - ipi(IpiKind::Tlb, IpiTarget::Other); - self.mapped = false; // TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap @@ -636,34 +599,6 @@ impl Grant { Some((before_grant, self, after_grant)) } - pub fn move_to_address_space(&mut self, new_start: Page, new_page_table: &mut InactivePageTable, flags: PageFlags, flush_all: &mut PageFlushAll) -> Grant { - assert!(self.mapped); - - let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut new_mapper = new_page_table.mapper(); - let keep_parents = false; - - for (i, page) in self.pages().enumerate() { - unsafe { - let (flush, frame) = active_table.unmap_return(page, keep_parents); - flush_all.consume(flush); - - let flush = new_mapper.map_to(new_start.next_by(i), frame, flags); - flush.ignore(); - } - } - - let was_owned = core::mem::replace(&mut self.owned, false); - self.mapped = false; - - Self { - region: Region::new(new_start.start_address(), self.region.size), - flags, - mapped: true, - owned: was_owned, - desc_opt: self.desc_opt.clone(), - } - } } impl Deref for Grant { @@ -704,79 +639,68 @@ impl Drop for Grant { pub const DANGLING: usize = 1 << (usize::BITS - 2); -pub struct NewTables { +#[derive(Debug)] +pub struct Tables { #[cfg(target_arch = "aarch64")] - pub new_ktable: InactivePageTable, - pub new_utable: InactivePageTable, + pub ktable: Frame, - taken: bool, -} -impl NewTables { - pub fn take(&mut self) { - self.taken = true; - } + pub utable: Frame, } -impl Drop for NewTables { +impl Drop for Tables { fn drop(&mut self) { - if self.taken { return } - - unsafe { - use crate::memory::deallocate_frames; - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_utable.address())), 1); + use crate::memory::deallocate_frames; + deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.utable.start_address().data())), 1); - #[cfg(target_arch = "aarch64")] - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.new_ktable.address())), 1); - } + #[cfg(target_arch = "aarch64")] + deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.ktable.start_address().data())), 1); } } /// Allocates a new identically mapped ktable and empty utable (same memory on x86_64). -pub fn setup_new_utable() -> Result { - let mut new_utable = unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) }; +pub fn setup_new_utable() -> Result { + let mut new_utable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; - let mut new_ktable = if cfg!(target_arch = "aarch64") { - unsafe { InactivePageTable::new(crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?) } - } else { - unsafe { InactivePageTable::from_address(new_utable.address()) } - }; + // TODO: There is only supposed to be one ktable, right? Use a global variable to store the + // ktable (or access it from a control register) on architectures which have ktables, or obtain + // it from *any* utable on architectures which do not. + #[cfg(target_arch = "aarch64")] + let new_ktable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - // Copy kernel image mapping - { - let frame = active_ktable.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PML4].flags(); + #[cfg(target_arch = "aarch64")] + let ktable = &new_ktable; - new_ktable.mapper().p4_mut()[crate::KERNEL_PML4].set(frame, flags); - } + #[cfg(not(target_arch = "aarch64"))] + let ktable = &new_utable; - // Copy kernel heap mapping - { - let frame = active_ktable.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_HEAP_PML4].flags(); + let mut new_mapper = unsafe { InactivePageTable::from_address(ktable.start_address().data()) }; - new_ktable.mapper().p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags); - } + let mut copy_mapping = |p4_no| { + let frame = active_ktable.p4()[p4_no].pointed_frame().expect("kernel image not mapped"); + let flags = active_ktable.p4()[p4_no].flags(); + + new_mapper.mapper().p4_mut()[p4_no].set(frame, flags); + }; + // TODO: Just copy all 256 mappings? + + // Copy kernel image mapping + copy_mapping(crate::KERNEL_PML4); + + // Copy kernel heap mapping + copy_mapping(crate::KERNEL_HEAP_PML4); // Copy physmap mapping - { - let frame = active_ktable.p4()[crate::PHYS_PML4].pointed_frame().expect("physmap not mapped"); - let flags = active_ktable.p4()[crate::PHYS_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::PHYS_PML4].set(frame, flags); - } + copy_mapping(crate::PHYS_PML4); + // Copy kernel percpu (similar to TLS) mapping. - { - let frame = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].pointed_frame().expect("kernel TLS not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::KERNEL_PERCPU_PML4].set(frame, flags); - } + copy_mapping(crate::KERNEL_PERCPU_PML4); - Ok(NewTables { - taken: false, - new_utable, + Ok(Tables { + utable: new_utable, #[cfg(target_arch = "aarch64")] - new_ktable, + ktable: new_ktable, }) } diff --git a/src/debugger.rs b/src/debugger.rs index e49edf9a..6157f243 100644 --- a/src/debugger.rs +++ b/src/debugger.rs @@ -19,11 +19,11 @@ pub unsafe fn debugger() { if let Some((a, b, c, d, e, f)) = context.syscall { println!("syscall: {}", crate::syscall::debug::format_call(a, b, c, d, e, f)); } - { - let grants = context.grants.read(); - if ! grants.is_empty() { + if let Some(ref addr_space) = context.addr_space { + let addr_space = addr_space.read(); + if ! addr_space.grants.is_empty() { println!("grants:"); - for grant in grants.iter() { + for grant in addr_space.grants.iter() { let region = grant.region(); println!( " virt 0x{:016x}:0x{:016x} size 0x{:08x} {}", diff --git a/src/ptrace.rs b/src/ptrace.rs index 2234aa6a..dc76ee4d 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -190,7 +190,11 @@ pub fn is_traced(pid: ContextId) -> bool { /// Trigger a notification to the event: scheme fn proc_trigger_event(file_id: usize, flags: EventFlags) { - event::trigger(proc::PROC_SCHEME_ID.load(Ordering::SeqCst), file_id, flags); + if let Some(scheme_id) = proc::PROC_SCHEME_ID.get() { + event::trigger(*scheme_id, file_id, flags); + } else { + log::warn!("Failed to trigger proc event: scheme never initialized"); + } } /// Dispatch an event to any tracer tracing `self`. This will cause @@ -471,6 +475,10 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator impl Iterator> + '_ { let mut table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + // TODO: Iterate over grants instead to avoid yielding None too many times. What if + // context_memory is used for an entire process's address space, where the stack is at the very + // end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then + // onwards. page_aligned_chunks(offset.data(), len).map(move |(addr, len)| unsafe { // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the // possible exception of an unaligned head/tail. diff --git a/src/scheme/live.rs b/src/scheme/live.rs index 9ccb4b04..b5d1e177 100644 --- a/src/scheme/live.rs +++ b/src/scheme/live.rs @@ -5,6 +5,7 @@ use alloc::collections::BTreeMap; use core::{slice, str}; use core::sync::atomic::{AtomicUsize, Ordering}; use spin::RwLock; +use rmm::Flusher; use syscall::data::Stat; use syscall::error::*; @@ -55,7 +56,7 @@ impl DiskScheme { let virt = phys + crate::PHYS_OFFSET; unsafe { let mut active_table = ActivePageTable::new(TableKind::Kernel); - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(virt)); let end_page = Page::containing_address(VirtualAddress::new(virt + size - 1)); for page in Page::range_inclusive(start_page, end_page) { diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 4dd65c49..44c7d165 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,7 +1,7 @@ use crate::context; use crate::context::memory::{page_flags, Grant}; use crate::memory::{free_frames, used_frames, PAGE_SIZE}; -use crate::paging::{ActivePageTable, VirtualAddress}; +use crate::paging::{ActivePageTable, mapper::PageFlushAll, Page, VirtualAddress}; use crate::syscall::data::{Map, OldMap, StatVfs}; use crate::syscall::error::*; use crate::syscall::flag::MapFlags; @@ -23,25 +23,11 @@ impl MemoryScheme { let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); + let region = addr_space.grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); - { - // Make sure it's *absolutely* not mapped already - // TODO: Keep track of all allocated memory so this isn't necessary - - let active_table = unsafe { ActivePageTable::new(rmm::TableKind::User) }; - - for page in region.pages() { - if let Some(flags) = active_table.translate_page_flags(page).filter(|flags| flags.has_present()) { - println!("page at {:#x} was already mapped, flags: {:?}", page.start_address().data(), flags); - return Err(Error::new(EEXIST)) - } - } - } - - grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); + addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new())?); Ok(region.start_address().data()) } diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 885c8e0f..03113568 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -301,7 +301,7 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { } pub trait KernelScheme: Scheme + Send + Sync + 'static { - #[allow(unused_arguments)] + #[allow(unused_variables)] fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc>) -> Result { log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes"); Err(Error::new(ENOSYS)) diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 70bc0778..31222a37 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,6 +1,6 @@ use crate::{ - arch::paging::{ActivePageTable, InactivePageTable, mapper::{Mapper, PageFlushAll}, Page, VirtualAddress}, - context::{self, Context, ContextId, Status, memory::{Grant, page_flags, Region}}, + arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress}, + context::{self, Context, ContextId, Status, memory::{addrspace, Grant, new_addrspace, PtId, page_flags, Region}}, memory::PAGE_SIZE, ptrace, scheme::{AtomicSchemeId, SchemeId}, @@ -32,7 +32,7 @@ use core::{ str, sync::atomic::{AtomicUsize, Ordering}, }; -use spin::RwLock; +use spin::{Once, RwLock}; fn read_from(dst: &mut [u8], src: &[u8], offset: &mut usize) -> Result { let byte_count = cmp::min(dst.len(), src.len().saturating_sub(*offset)); @@ -68,7 +68,7 @@ where } fn try_stop_context(pid: ContextId, mut callback: F) -> Result where - F: FnMut(&mut Context) -> Result, + F: FnOnce(&mut Context) -> Result, { if pid == context::context_id() { return Err(Error::new(EBADF)); @@ -118,6 +118,8 @@ enum Operation { Sigstack, Attr(Attr), Files, + AddrSpace { id: PtId }, + CurrentAddrSpace, } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -216,7 +218,7 @@ impl Handle { } } -pub static PROC_SCHEME_ID: AtomicSchemeId = AtomicSchemeId::default(); +pub static PROC_SCHEME_ID: Once = Once::new(); pub struct ProcScheme { next_id: AtomicUsize, @@ -231,7 +233,7 @@ pub enum Access { impl ProcScheme { pub fn new(scheme_id: SchemeId) -> Self { - PROC_SCHEME_ID.store(scheme_id, Ordering::SeqCst); + PROC_SCHEME_ID.call_once(|| scheme_id); Self { next_id: AtomicUsize::new(0), @@ -246,6 +248,11 @@ impl ProcScheme { access: Access::Restricted, } } + fn new_handle(&self, handle: Handle) -> Result { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + let _ = self.handles.write().insert(id, handle); + Ok(id) + } } impl Scheme for ProcScheme { @@ -264,7 +271,8 @@ impl Scheme for ProcScheme { let operation = match parts.next() { Some("mem") => Operation::Memory, - Some("grants") => Operation::Grants, + Some("addrspace") => Operation::AddrSpace { id: context::contexts().current().ok_or(Error::new(ESRCH))?.read().addr_space()?.read().id }, + Some("current-addrspace") => Operation::CurrentAddrSpace, Some("regs/float") => Operation::Regs(RegsKind::Float), Some("regs/int") => Operation::Regs(RegsKind::Int), Some("regs/env") => Operation::Regs(RegsKind::Env), @@ -340,9 +348,16 @@ impl Scheme for ProcScheme { } }; - let id = self.next_id.fetch_add(1, Ordering::SeqCst); + let id = self.new_handle(Handle { + info: Info { + flags, + pid, + operation, + }, + data, + })?; - if let Operation::Trace { .. } = operation { + if let Operation::Trace = operation { if !ptrace::try_new_session(pid, id) { // There is no good way to handle id being occupied for nothing // here, is there? @@ -355,44 +370,41 @@ impl Scheme for ProcScheme { } } - self.handles.write().insert(id, Handle { - info: Info { - flags, - pid, - operation, - }, - data, - }); Ok(id) } - /// Using dup for `proc:` simply opens another operation on the same PID - /// ```rust,ignore - /// let trace = syscall::open("proc:1234/trace")?; - /// - /// // let regs = syscall::open("proc:1234/regs/int")?; - /// let regs = syscall::dup(trace, "regs/int")?; - /// ``` + /// Dup is currently used to implement clone() and execve(). fn dup(&self, old_id: usize, buf: &[u8]) -> Result { let info = { let handles = self.handles.read(); let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?; - handle.info - }; - - let buf_str = str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?; - - let mut path = format!("{}/", info.pid.into()); - path.push_str(buf_str); - let (uid, gid) = { - let contexts = context::contexts(); - let context = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context.read(); - (context.euid, context.egid) + handle.info }; - self.open(&path, info.flags, uid, gid) + self.new_handle(match info.operation { + Operation::AddrSpace { id } => { + let new_ptid = match buf { + // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But + // in that case, what scheme? + b"empty" => new_addrspace()?.0, + // Reuse same ID. + b"shared" => id, + b"exclusive" => addrspace(id).ok_or(Error::new(EBADFD))?.read().try_clone()?.0, + + _ => return Err(Error::new(EINVAL)), + }; + Handle { + info: Info { + flags: 0, + pid: info.pid, + operation: Operation::AddrSpace { id: new_ptid }, + }, + data: OperationData::Other, + } + } + _ => return Err(Error::new(EINVAL)), + }) } fn seek(&self, id: usize, pos: isize, whence: usize) -> Result { @@ -421,6 +433,7 @@ impl Scheme for ProcScheme { }; match info.operation { + Operation::Grants => return Err(Error::new(ENOSYS)), Operation::Static(_) => { let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; @@ -455,8 +468,7 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_read); Ok(bytes_read) }, - // TODO: Allow reading process mappings? - Operation::Grants => return Err(Error::new(EBADF)), + Operation::AddrSpace { .. } => return Err(Error::new(EBADF)), Operation::Regs(kind) => { union Output { @@ -586,6 +598,14 @@ impl Scheme for ProcScheme { read_from(buf, &data.buf, &mut data.offset) } + // TODO: Replace write() with SYS_DUP_FORWARD. + // TODO: Find a better way to switch address spaces, since they also require switching + // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it + // with `/ctx` + Operation::CurrentAddrSpace => { + //read_from(buf, &usize::to_ne_bytes(id.into()), &mut 0) + Ok(0) + } } } @@ -606,6 +626,7 @@ impl Scheme for ProcScheme { }; match info.operation { + Operation::Grants => Err(Error::new(ENOSYS)), Operation::Static(_) => Err(Error::new(EBADF)), Operation::Memory => { // Won't context switch, don't worry about the locks @@ -631,7 +652,7 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_written); Ok(bytes_written) }, - Operation::Grants => { + Operation::AddrSpace { .. } => { // FIXME: Forbid upgrading external mappings. let pid = self.handles.read() @@ -649,51 +670,52 @@ impl Scheme for ProcScheme { return Err(Error::new(EINVAL)); } - let is_inactive = pid != context::context_id(); + let is_active = pid == context::context_id(); let callback = |context: &mut Context| { - let mut inactive = is_inactive.then(|| unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }); + let (mut inactive, mut active); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let conflicting = grants.conflicts(region).map(|g| *g.region()).collect::>(); + let (mut mapper, mut flusher) = if is_active { + active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); + (active.0.mapper(), &mut active.1 as &mut dyn Flusher) + } else { + inactive = (unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }, InactiveFlusher::new()); + (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) + }; + + let conflicting = addr_space.grants.conflicts(region).map(|g| *g.region()).collect::>(); for conflicting_region in conflicting { - let whole_grant = grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; + let whole_grant = addr_space.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; if let Some(before) = before_opt { - grants.insert(before); + addr_space.grants.insert(before); } if let Some(after) = after_opt { - grants.insert(after); + addr_space.grants.insert(after); } - let res = if let Some(ref mut inactive) = inactive { - current.unmap_inactive(inactive) - } else { - current.unmap() - }; + let res = current.unmap(&mut mapper, &mut flusher); + if res.file_desc.is_some() { - drop(grants); return Err(Error::new(EBUSY)); } - // TODO: Partial free if grant is mapped externally. + // TODO: Partial free if grant is mapped externally, or fail and force + // userspace to do it. } if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { - let base = VirtualAddress::new(base); + let base = Page::containing_address(VirtualAddress::new(base)); - if let Some(ref mut inactive) = inactive { - grants.insert(Grant::zeroed_inactive(Page::containing_address(base), size / PAGE_SIZE, page_flags(flags), inactive).unwrap()); - } else { - grants.insert(Grant::map(base, size, page_flags(flags))); - } + addr_space.grants.insert(Grant::zeroed(base, size / PAGE_SIZE, page_flags(flags), &mut mapper, flusher)?); } Ok(()) }; - if is_inactive { + if is_active { with_context_mut(pid, callback)?; } else { try_stop_context(pid, callback)?; @@ -868,6 +890,24 @@ impl Scheme for ProcScheme { Ok(buf.len()) } Operation::Files => return Err(Error::new(EBADF)), + Operation::CurrentAddrSpace { .. } => { + let mut iter = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); + let id = iter.next().ok_or(Error::new(EINVAL))?; + let sp = iter.next().ok_or(Error::new(EINVAL))?; + let ip = iter.next().ok_or(Error::new(EINVAL))?; + + let space = addrspace(PtId::from(id)).ok_or(Error::new(EINVAL))?; + + try_stop_context(info.pid, |context| unsafe { + let regs = &mut ptrace::regs_for_mut(context).ok_or(Error::new(ESRCH))?.iret; + regs.rip = ip; + regs.rsp = sp; + + context.set_addr_space(space); + Ok(()) + })?; + Ok(3 * mem::size_of::()) + } } } @@ -911,6 +951,8 @@ impl Scheme for ProcScheme { Operation::Attr(Attr::Uid) => "uid", Operation::Attr(Attr::Gid) => "gid", Operation::Files => "files", + Operation::AddrSpace { .. } => "addrspace", + Operation::CurrentAddrSpace => "current-addrspace", }); read_from(buf, &path.as_bytes(), &mut 0) diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index 1a776a27..8602e9b1 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -28,11 +28,15 @@ pub fn resource() -> Result> { let mut stat_string = String::new(); // TODO: All user programs must have some grant in order for executable memory to even // exist, but is this a good indicator of whether it is user or kernel? - if context.grants.read().is_empty() { - stat_string.push('K'); + stat_string.push(if let Ok(addr_space) = context.addr_space() { + if addr_space.read().grants.is_empty() { + 'K' + } else { + 'U' + } } else { - stat_string.push('U'); - } + 'R' + }); match context.status { context::Status::Runnable => { stat_string.push('R'); @@ -79,9 +83,11 @@ pub fn resource() -> Result> { if let Some(ref kstack) = context.kstack { memory += kstack.len(); } - for grant in context.grants.read().iter() { - if grant.is_owned() { - memory += grant.size(); + if let Ok(addr_space) = context.addr_space() { + for grant in addr_space.read().grants.iter() { + if grant.is_owned() { + memory += grant.size(); + } } } diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 55c7f200..c87e694e 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -13,7 +13,7 @@ use crate::event; use crate::paging::{PAGE_SIZE, InactivePageTable, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; use crate::sync::{WaitQueue, WaitMap}; -use crate::syscall::data::{Map, OldMap, Packet, Stat, StatVfs, TimeSpec}; +use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec}; use crate::syscall::error::*; use crate::syscall::flag::{EventFlags, EVENT_READ, O_NONBLOCK, MapFlags, PROT_READ, PROT_WRITE}; use crate::syscall::number::*; @@ -145,15 +145,15 @@ impl UserInner { let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); let src_address = round_down_pages(address); let offset = address - src_address; let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); - let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; + let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; //TODO: Use syscall_head and syscall_tail to avoid leaking data - grants.insert(Grant::map_inactive( + addr_space.grants.insert(Grant::map_inactive( src_region.start_address(), dst_region.start_address(), src_region.size(), @@ -166,7 +166,6 @@ impl UserInner { } pub fn release(&self, address: usize) -> Result<()> { - //dbg!(address); if address == DANGLING { return Ok(()); } @@ -174,13 +173,13 @@ impl UserInner { let mut context = context_lock.write(); let mut other_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let region = match grants.contains(VirtualAddress::new(address)).map(Region::from) { + let region = match addr_space.grants.contains(VirtualAddress::new(address)).map(Region::from) { Some(region) => region, None => return Err(Error::new(EFAULT)), }; - grants.take(®ion).unwrap().unmap_inactive(&mut other_table); + addr_space.grants.take(®ion).unwrap().unmap(&mut other_table.mapper(), crate::paging::mapper::InactiveFlusher::new()); Ok(()) } @@ -242,8 +241,8 @@ impl UserInner { if let Ok(grant_address) = res { if let Some(context_lock) = context_weak.upgrade() { let context = context_lock.read(); - let mut grants = context.grants.write(); - grants.funmap.insert( + let mut addr_space = context.addr_space()?.write(); + addr_space.grants.funmap.insert( Region::new(grant_address, map.size), VirtualAddress::new(address) ); @@ -437,8 +436,8 @@ impl Scheme for UserScheme { let contexts = context::contexts(); let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); - let funmap = &mut grants.funmap; + let mut addr_space = context.addr_space()?.write(); + let funmap = &mut addr_space.grants.funmap; let entry = funmap.range(..=Region::byte(VirtualAddress::new(grant_address))).next_back(); let grant_address = VirtualAddress::new(grant_address); diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index ced9eec4..993575f8 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -1,8 +1,7 @@ use core::{ascii, mem}; use alloc::string::String; -use alloc::vec::Vec; -use super::data::{OldMap, Map, Stat, TimeSpec}; +use super::data::{Map, Stat, TimeSpec}; use super::flag::*; use super::number::*; use super::validate::*; diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index f1fc77f2..103602a8 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -88,9 +88,9 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let dst_address = grants.find_free(size).ok_or(Error::new(ENOMEM))?; + let dst_address = addr_space.grants.find_free(size).ok_or(Error::new(ENOMEM))?; let mut page_flags = PageFlags::new().user(true); if flags.contains(PHYSMAP_WRITE) { @@ -104,7 +104,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); } - grants.insert(Grant::physmap( + addr_space.grants.insert(Grant::physmap( PhysicalAddress::new(physical_address), dst_address.start_address(), size, @@ -113,6 +113,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) Ok(dst_address.start_address().data()) } +// TODO: Remove this syscall, funmap makes it redundant. pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { enforce_root()?; inner_physmap(physical_address, size, flags) @@ -126,10 +127,12 @@ pub fn inner_physunmap(virtual_address: usize) -> Result { let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - if let Some(region) = grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - grants.take(®ion).unwrap().unmap(); + if let Some(region) = addr_space.grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { + use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind}; + + addr_space.grants.take(®ion).unwrap().unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new()); return Ok(0); } diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index cf833dc0..21fc6922 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -2,7 +2,6 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::str; -use core::sync::atomic::Ordering; use spin::RwLock; use crate::context::file::{FileDescriptor, FileDescription}; @@ -482,11 +481,11 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { let requested = Region::new(virtual_address, length); { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); let context = context_lock.read(); - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); + let grants = &mut addr_space.grants; let conflicting: Vec = grants.conflicts(requested).map(Region::from).collect(); @@ -507,9 +506,10 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { if let Some(after) = after { grants.insert(after); } + use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind}; // Remove irrelevant region - grant.unmap(); + grant.unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new()); } } diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index 2f4f7b28..e25cf044 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,9 +25,9 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; -use self::data::{CloneInfo, ExecMemRange, Map, SigAction, Stat, TimeSpec}; +use self::data::{Map, SigAction, Stat, TimeSpec}; use self::error::{Error, Result, ENOSYS, EINVAL}; -use self::flag::{CloneFlags, MapFlags, PhysmapFlags, WaitFlags}; +use self::flag::{MapFlags, PhysmapFlags, WaitFlags}; use self::number::*; use crate::context::ContextId; @@ -112,36 +112,6 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_GETPGID => getpgid(ContextId::from(b)).map(ContextId::into), SYS_GETPPID => getppid().map(ContextId::into), - SYS_EXEC => exec(validate_slice(b as *const ExecMemRange, c)?, d, e), - SYS_CLONE => { - let b = CloneFlags::from_bits_truncate(b); - - let info = if b.contains(CloneFlags::CLONE_VM) { - if d < core::mem::size_of::() { - return Err(Error::new(EINVAL)); - } - Some(&validate_slice(c as *const CloneInfo, 1)?[0]) - } else { None }; - - #[cfg(not(target_arch = "x86_64"))] - { - //TODO: CLONE_STACK - let ret = clone(b, bp).map(ContextId::into); - ret - } - - #[cfg(target_arch = "x86_64")] - { - let old_rsp = stack.iret.rsp; - // TODO: Unify CLONE_STACK and CLONE_VM. - if b.contains(flag::CLONE_STACK) { - stack.iret.rsp = info.as_ref().ok_or(Error::new(EINVAL))?.target_stack; - } - let ret = clone(b, bp, info).map(ContextId::into); - stack.iret.rsp = old_rsp; - ret - } - }, SYS_EXIT => exit((b & 0xFF) << 8), SYS_KILL => kill(ContextId::from(b), c), SYS_WAITPID => waitpid(ContextId::from(b), c, WaitFlags::from_bits_truncate(d)).map(ContextId::into), diff --git a/src/syscall/process.rs b/src/syscall/process.rs index dc85d707..418f464e 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -1,380 +1,27 @@ use alloc::{ boxed::Box, - collections::BTreeSet, - string::String, sync::Arc, vec::Vec, }; -use core::alloc::{GlobalAlloc, Layout}; -use core::convert::TryFrom; -use core::ops::DerefMut; -use core::{intrinsics, mem, str}; -use crate::context::file::{FileDescription, FileDescriptor}; +use core::mem; use spin::{RwLock, RwLockWriteGuard}; -use crate::context::{Context, ContextId, WaitpidKey}; -use crate::context::memory::{Grant, Region, NewTables, page_flags, setup_new_utable, UserGrants}; +use crate::context::{Context, ContextId, memory, WaitpidKey}; use crate::context; -#[cfg(not(feature="doc"))] -use crate::elf::{self, program_header}; use crate::interrupt; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::{allocate_frames, Frame, PhysicalAddress}; -use crate::paging::mapper::PageFlushAll; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, TableKind, VirtualAddress, PAGE_SIZE}; -use crate::{ptrace, syscall}; -use crate::scheme::FileHandle; +use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; +use crate::ptrace; use crate::start::usermode; -use crate::syscall::data::{CloneInfo, ExecMemRange, SigAction, Stat}; +use crate::syscall::data::SigAction; use crate::syscall::error::*; -use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags, - CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM, - MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, PTRACE_EVENT_CLONE, - PTRACE_STOP_EXIT, SigActionFlags, SIG_BLOCK, SIG_DFL, SIG_SETMASK, SIG_UNBLOCK, - SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED}; +use crate::syscall::flag::{wifcontinued, wifstopped, MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, + PTRACE_STOP_EXIT, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, + SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED}; use crate::syscall::ptrace_event; -use crate::syscall::validate::{validate_slice, validate_slice_mut}; - -pub fn clone(flags: CloneFlags, stack_base: usize, info: Option<&CloneInfo>) -> Result { - let ppid; - let pid; - { - let pgid; - let ruid; - let rgid; - let rns; - let euid; - let egid; - let ens; - let umask; - let sigmask; - let mut cpu_id_opt = None; - let arch; - let vfork; - let mut kfx_opt = None; - let mut kstack_opt = None; - let mut offset = 0; - let mut grants; - let name; - let cwd; - let files; - let actions; - let old_sigstack; - - // Copy from old process - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - ppid = context.id; - pgid = context.pgid; - ruid = context.ruid; - rgid = context.rgid; - rns = context.rns; - euid = context.euid; - egid = context.egid; - ens = context.ens; - sigmask = context.sigmask; - umask = context.umask; - old_sigstack = context.sigstack; - - // Uncomment to disable threads on different CPUs - //TODO: fix memory allocation races when this is removed - if flags.contains(CLONE_VM) { - cpu_id_opt = context.cpu_id; - } - - // TODO: Fill with newest registers. - arch = context.arch.clone(); - - if let Some(ref fx) = context.kfx { - let new_fx = unsafe { - let new_fx_ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)); - if new_fx_ptr.is_null() { - // FIXME: It's mildly ironic that the only place where clone can fail with - // ENOMEM, is when copying 1024 bytes to merely store vector registers. - // Although in order to achieve full kernel-panic immunity, we'll need to - // completely phase out all usage of liballoc data structures, and use our - // own library/port liballoc, since panicking on OOM is not good for a - // kernel. - return Err(Error::new(ENOMEM)); - } - new_fx_ptr.copy_from_nonoverlapping(fx.as_ptr(), fx.len()); - Box::from_raw(new_fx_ptr as *mut [u8; 1024]) - }; - kfx_opt = Some(new_fx); - } - - #[cfg(target_arch = "x86_64")] - { - if let Some(ref stack) = context.kstack { - // Get the relative offset to the return address of the function - // obtaining `stack_base`. - // - // (base pointer - start of stack) - one - offset = stack_base - stack.as_ptr() as usize - mem::size_of::(); // Add clone ret - // FIXME: This is incredibly UB, making Rust think the current stack being - // copied is simply a regular immutable slice. This part should either be - // written in assembly or have clone moved to userspace. - let mut new_stack = stack.clone(); - - unsafe { - // Set clone's return value to zero. This is done because - // the clone won't return like normal, which means the value - // would otherwise never get set. - if let Some(regs) = ptrace::rebase_regs_ptr_mut(context.regs, Some(&mut new_stack)) { - (*regs).scratch.rax = 0; - } - - // Change the return address of the child (previously - // syscall) to the arch-specific clone_ret callback - let func_ptr = new_stack.as_mut_ptr().add(offset); - *(func_ptr as *mut usize) = interrupt::syscall::clone_ret as usize; - } - - kstack_opt = Some(new_stack); - } - } - - #[cfg(not(target_arch = "x86_64"))] - { - if let Some(ref stack) = context.kstack { - offset = stack_base - stack.as_ptr() as usize; - let mut new_stack = stack.clone(); - - kstack_opt = Some(new_stack); - } - } - - grants = Arc::clone(&context.grants); - - if flags.contains(CLONE_VM) { - name = Arc::clone(&context.name); - } else { - name = Arc::new(RwLock::new(context.name.read().clone())); - } - - if flags.contains(CLONE_FS) { - cwd = Arc::clone(&context.cwd); - } else { - cwd = Arc::new(RwLock::new(context.cwd.read().clone())); - } - - if flags.contains(CLONE_FILES) { - files = Arc::clone(&context.files); - } else { - files = Arc::new(RwLock::new(context.files.read().clone())); - } - - if flags.contains(CLONE_SIGHAND) { - actions = Arc::clone(&context.actions); - } else { - actions = Arc::new(RwLock::new(context.actions.read().clone())); - } - } - - // If not cloning files, dup to get a new number from scheme - // This has to be done outside the context lock to prevent deadlocks - if !flags.contains(CLONE_FILES) { - for (_fd, file_opt) in files.write().iter_mut().enumerate() { - let new_file_opt = if let Some(ref file) = *file_opt { - Some(FileDescriptor { - description: Arc::clone(&file.description), - cloexec: file.cloexec, - }) - } else { - None - }; - - *file_opt = new_file_opt; - } - } - - let maps_to_reobtain = if flags.contains(CLONE_VM) { - Vec::new() - } else { - grants.read().iter().filter_map(|grant| grant.desc_opt.as_ref().and_then(|file_ref| { - let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; - let scheme_arc = match crate::scheme::schemes().get(scheme) { - Some(s) => Arc::downgrade(s), - None => return None, - }; - let map = crate::syscall::data::Map { - address: grant.start_address().data(), - size: grant.size(), - offset: file_ref.offset, - flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, - }; - - Some((scheme_arc, number, map)) - })).collect() - }; - - // If vfork, block the current process - // This has to be done after the operations that may require context switches - if flags.contains(CLONE_VFORK) { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - context.block("vfork"); - vfork = true; - } else { - vfork = false; - } - - // Set up new process - let new_context_lock = { - let mut contexts = context::contexts_mut(); - let context_lock = contexts.new_context()?; - let mut context = context_lock.write(); - - pid = context.id; - - context.pgid = pgid; - context.ppid = ppid; - context.ruid = ruid; - context.rgid = rgid; - context.rns = rns; - context.euid = euid; - context.egid = egid; - context.ens = ens; - context.sigmask = sigmask; - context.umask = umask; - - //TODO: Better CPU balancing - if let Some(cpu_id) = cpu_id_opt { - context.cpu_id = Some(cpu_id); - } else { - context.cpu_id = Some(pid.into() % crate::cpu_count()); - } - - // Start as blocked. This is to ensure the context is never switched before any grants - // that have to be remapped, are mapped. - context.status = context::Status::Blocked; - - context.vfork = vfork; - - context.arch = arch; - - // This is needed because these registers may have changed after this context was - // switched to, but before this was called. - #[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))] - unsafe { - context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize; - x86::bits64::segmentation::swapgs(); - context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize; - x86::bits64::segmentation::swapgs(); - } - - if flags.contains(CloneFlags::CLONE_VM) { - // Reuse same CR3, same grants, everything. - context.grants = grants; - } else { - // TODO: Handle ENOMEM - let mut new_tables = setup_new_utable().expect("failed to allocate new page tables for cloned process"); - - let mut new_grants = UserGrants::new(); - for old_grant in grants.read().iter().filter(|g| g.desc_opt.is_none()) { - new_grants.insert(old_grant.secret_clone(&mut new_tables.new_utable)); - } - context.grants = Arc::new(RwLock::new(new_grants)); - - drop(grants); - - new_tables.take(); - - context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); - } - - if let Some(fx) = kfx_opt.take() { - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); - } - - // Set kernel stack - if let Some(stack) = kstack_opt.take() { - context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kstack = Some(stack); - #[cfg(target_arch = "aarch64")] - { - context.arch.set_lr(interrupt::syscall::clone_ret as usize); - } - } - - // TODO: Clone ksig? - - #[cfg(target_arch = "aarch64")] - { - if let Some(stack) = &mut context.kstack { - unsafe { - // stack_base contains a pointer to InterruptStack. Get its offset from - // stack_base itself - let istack_offset = *(stack_base as *const u64) - stack_base as u64; - - // Get the top of the new process' stack - let new_sp = stack.as_mut_ptr().add(offset); - - // Update the pointer to the InterruptStack to reflect the new process' - // stack. (Without this the pointer would be InterruptStack on the parent - // process' stack). - *(new_sp as *mut u64) = new_sp as u64 + istack_offset; - - // Update tpidr_el0 in the new process' InterruptStack - let mut interrupt_stack = &mut *(stack.as_mut_ptr().add(offset + istack_offset as usize) as *mut crate::arch::interrupt::InterruptStack); - interrupt_stack.iret.tpidr_el0 = tcb_addr; - } - } - } - - - context.name = name; - - context.cwd = cwd; - - context.files = files; - - context.actions = actions; - - if flags.contains(CLONE_VM) { - context.sigstack = info.and_then(|info| (info.target_sigstack != !0).then(|| info.target_sigstack)); - } else { - context.sigstack = old_sigstack; - } - - Arc::clone(context_lock) - }; - for (scheme_weak, number, map) in maps_to_reobtain { - let scheme = match scheme_weak.upgrade() { - Some(s) => s, - None => continue, - }; - let _ = scheme.kfmap(number, &map, &new_context_lock); - } - new_context_lock.write().status = context::Status::Runnable; - } - - if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() { - // Freeze the clone, allow ptrace to put breakpoints - // to it before it starts - let contexts = context::contexts(); - let context = contexts.get(pid).expect("Newly created context doesn't exist??"); - let mut context = context.write(); - context.ptrace_stop = true; - } - - // Race to pick up the new process! - ipi(IpiKind::Switch, IpiTarget::Other); - - let _ = unsafe { context::switch() }; - - Ok(pid) -} +use crate::syscall::validate::validate_slice_mut; fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGuard<'lock, Context>, reaping: bool) -> RwLockWriteGuard<'lock, Context> { // NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the @@ -383,27 +30,23 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu // remaining references to the grants, where there are in fact none. However, if either one is // reaped before, then that reference will disappear, and no leak will occur. // - // By removing the reference to the grants when the context will no longer be used, this + // By removing the reference to the address space when the context will no longer be used, this // problem will never occur. + let addr_space_arc = match context.addr_space.take() { + Some(a) => a, + None => return context, + }; - // FIXME, UNOPTIMIZED: Right now, this will allocate memory in order to store the new empty - // grants, which may not even be used (only in fexec I think). We should turn grants into an - // `Option`, and only reinitialize it there. - let mut grants_arc = mem::take(&mut context.grants); - - if let Some(grants_lock_mut) = Arc::get_mut(&mut grants_arc) { - let mut grants_guard = grants_lock_mut.get_mut(); - - let grants = mem::replace(&mut *grants_guard, UserGrants::default()); - for grant in grants.into_iter() { + if let Ok(addr_space) = Arc::try_unwrap(addr_space_arc).map(RwLock::into_inner) { + for grant in addr_space.grants.into_iter() { let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - grant.unmap_inactive(&mut new_table) + grant.unmap(&mut new_table.mapper(), &mut InactiveFlusher::new()) } else { - grant.unmap() + grant.unmap(&mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()) }; if unmap_result.file_desc.is_some() { @@ -418,14 +61,6 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu context } -struct ExecFile(FileHandle); - -impl Drop for ExecFile { - fn drop(&mut self) { - let _ = syscall::close(self.0); - } -} - pub fn exit(status: usize) -> ! { ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status))); @@ -436,16 +71,10 @@ pub fn exit(status: usize) -> ! { Arc::clone(&context_lock) }; - let mut close_files = Vec::new(); + let mut close_files; let pid = { let mut context = context_lock.write(); - { - let mut lock = context.files.write(); - if Arc::strong_count(&context.files) == 1 { - mem::swap(lock.deref_mut(), &mut close_files); - } - } - context.files = Arc::new(RwLock::new(Vec::new())); + close_files = Arc::try_unwrap(mem::take(&mut context.files)).map_or_else(|_| Vec::new(), RwLock::into_inner); context.id }; @@ -669,7 +298,7 @@ pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result { let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(address)); let end_page = Page::containing_address(VirtualAddress::new(end_address)); @@ -999,19 +628,31 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { assert!(!data.is_empty()); const LOAD_BASE: usize = 0; - let grant = context::memory::Grant::map(VirtualAddress::new(LOAD_BASE), ((data.len()+PAGE_SIZE-1)/PAGE_SIZE)*PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true)); { let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + let grant = context::memory::Grant::zeroed(Page::containing_address(VirtualAddress::new(LOAD_BASE)), (data.len()+PAGE_SIZE-1)/PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true), &mut active_table, PageFlushAll::new()).expect("failed to allocate memory for bootstrap"); + + for (index, page) in grant.pages().enumerate() { let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; - let frame = active_table.translate_page(page).expect("expected mapped init memory to have a corresponding frame"); - unsafe { ((frame.start_address().data() + crate::PHYS_OFFSET) as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); } + + let physaddr = active_table.translate_page(page) + .expect("expected mapped init memory to have a corresponding frame") + .start_address(); + + unsafe { + (RmmA::phys_to_virt(physaddr).data() as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); + } } + context::contexts().current() + .expect("expected a context to exist when executing init") + .read().addr_space() + .expect("expected bootstrap context to have an address space") + .write().grants.insert(grant); } - - context::contexts().current().expect("expected a context to exist when executing init").read().grants.write().insert(grant); + log::info!("Usermode bootstrap"); drop(data); @@ -1022,127 +663,3 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { usermode(start, 0, 0, 0); } } - -pub fn exec(memranges: &[ExecMemRange], instruction_ptr: usize, stack_ptr: usize) -> Result { - // TODO: rlimit? - if memranges.len() > 1024 { - return Err(Error::new(EINVAL)); - } - - let mut new_grants = UserGrants::new(); - - { - let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); - - // Linux will always destroy other threads immediately if one of them executes execve(2). - // At the moment the Redox kernel is ignorant of threads, other than them sharing files, - // memory, etc. We fail with EBUSY if any resources that are being replaced, are shared. - - let mut old_grants = Arc::try_unwrap(mem::take(&mut current_context_lock.write().grants)).map_err(|_| Error::new(EBUSY))?.into_inner(); - // TODO: Allow multiple contexts which share the file table, to have one of them run exec? - let mut old_files = Arc::try_unwrap(mem::take(&mut current_context_lock.write().files)).map_err(|_| Error::new(EBUSY))?.into_inner(); - - // FIXME: Handle leak in case of ENOMEM. - let mut new_tables = setup_new_utable()?; - - let mut flush = PageFlushAll::new(); - - // FIXME: This is to the extreme, but fetch with atomic volatile? - for memrange in memranges.iter().copied() { - let old_address = if memrange.old_address == !0 { None } else { Some(memrange.old_address) }; - - if memrange.address % PAGE_SIZE != 0 || old_address.map_or(false, |a| a % PAGE_SIZE != 0) || memrange.size % PAGE_SIZE != 0 { - return Err(Error::new(EINVAL)); - } - if memrange.size == 0 { continue } - - let new_start = Page::containing_address(VirtualAddress::new(memrange.address)); - let flags = MapFlags::from_bits(memrange.flags).ok_or(Error::new(EINVAL))?; - let page_count = memrange.size / PAGE_SIZE; - let flags = page_flags(flags); - - if let Some(old_address) = old_address { - let old_start = VirtualAddress::new(memrange.old_address); - - let entire_region = Region::new(old_start, memrange.size); - - // TODO: This will do one B-Tree search for each memrange. If a process runs exec - // and keeps every range the way it is, then this would be O(n log n)! - loop { - let region = match old_grants.conflicts(entire_region).next().map(|g| *g.region()) { - Some(r) => r, - None => break, - }; - let owned = old_grants.take(®ion).expect("cannot fail"); - let (before, mut current, after) = owned.extract(region).expect("cannot fail"); - - if let Some(before) = before { old_grants.insert(before); } - if let Some(after) = after { old_grants.insert(after); } - - new_grants.insert(current.move_to_address_space(new_start, &mut new_tables.new_utable, flags, &mut flush)); - } - } else { - new_grants.insert(Grant::zeroed_inactive(new_start, page_count, flags, &mut new_tables.new_utable)?); - } - } - - { - unsafe { flush.ignore(); } - - new_tables.take(); - - let mut context = current_context_lock.write(); - context.grants = Arc::new(RwLock::new(new_grants)); - - let old_utable = context.arch.get_page_utable(); - let old_frame = Frame::containing_address(PhysicalAddress::new(old_utable)); - - context.arch.set_page_utable(unsafe { new_tables.new_utable.address() }); - - #[cfg(target_arch = "x86_64")] - unsafe { x86::controlregs::cr3_write(new_tables.new_utable.address() as u64); } - - for old_grant in old_grants.into_iter() { - old_grant.unmap_inactive(&mut unsafe { InactivePageTable::from_address(old_utable) }); - } - crate::memory::deallocate_frames(old_frame, 1); - - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { new_tables.new_ktable.address() }); - - context.actions = Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])); - let was_vfork = mem::replace(&mut context.vfork, false); - - // TODO: Reuse in place if the file table is not shared. - drop(context); - - let mut context = current_context_lock.write(); - - context.files = Arc::new(RwLock::new(old_files)); - let ppid = context.ppid; - drop(context); - - // TODO: Should this code be preserved as is? - if was_vfork { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.get(ppid) { - let mut context = context_lock.write(); - if !context.unblock() { - println!("{} not blocked for exec vfork unblock", ppid.into()); - } - } else { - println!("{} not found for exec vfork unblock", ppid.into()); - } - } - } - } - - unsafe { usermode(instruction_ptr, stack_ptr, 0, 0); } -} -- GitLab From b50495bfa5f2c40fa1bb1498c0e468d7d9ec67c3 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Tue, 5 Jul 2022 12:26:30 +0200 Subject: [PATCH 13/44] WIP: Support clone in userspace Everything seems to work for the most part, but now there are tons of daemons which rely on syscall::clone, which is now implemented in relibc :( --- src/context/arch/x86_64.rs | 2 +- src/context/context.rs | 18 ++- src/context/memory.rs | 10 +- src/ptrace.rs | 6 +- src/scheme/mod.rs | 11 +- src/scheme/proc.rs | 283 ++++++++++++++++++++++++++----------- 6 files changed, 237 insertions(+), 93 deletions(-) diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index c3677296..09d0a79d 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -35,7 +35,7 @@ pub struct Context { /// Base pointer rbp: usize, /// Stack pointer - rsp: usize, + pub(crate) rsp: usize, /// FSBASE. /// /// NOTE: Same fsgsbase behavior as with gsbase. diff --git a/src/context/context.rs b/src/context/context.rs index b351ee53..34e11b8c 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -12,7 +12,7 @@ use core::{ }; use spin::RwLock; -use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE}; +use crate::arch::{interrupt::InterruptStack, paging::{PAGE_SIZE, RmmA, RmmArch}}; use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; @@ -250,6 +250,11 @@ pub struct Context { /// else than SIG_DFL, otherwise signals will not be delivered. Userspace is responsible for /// setting this. pub sigstack: Option, + /// An even hackier way to pass the return entry point and stack pointer to new contexts while + /// implementing clone. Before a context has returned to userspace, its IntRegisters cannot be + /// set since there is no interrupt stack (unless the kernel stack is copied, but that is in my + /// opinion hackier and less efficient than this (and UB to do in Rust)). + pub clone_entry: Option<[usize; 2]>, } // Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box @@ -352,6 +357,7 @@ impl Context { regs: None, ptrace_stop: false, sigstack: None, + clone_entry: None, }; this.set_addr_space(new_addrspace()?.1); Ok(this) @@ -528,8 +534,14 @@ impl Context { self.addr_space.as_ref().ok_or(Error::new(ESRCH)) } pub fn set_addr_space(&mut self, addr_space: Arc>) { - assert!(!self.running); - self.arch.set_page_utable(addr_space.read().frame.utable.start_address().data()); + let physaddr = addr_space.read().frame.utable.start_address(); + if self.running { + unsafe { + RmmA::set_table(physaddr); + } + } + + self.arch.set_page_utable(physaddr.data()); self.addr_space = Some(addr_space); } } diff --git a/src/context/memory.rs b/src/context/memory.rs index 95e57f57..1c729583 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -77,8 +77,7 @@ impl AddrSpace { // TODO: Abstract away this. let (mut inactive, mut active); - // TODO: aarch64 - let mut this_mapper = if self.frame.utable.start_address().data() == unsafe { x86::controlregs::cr3() } as usize { + let mut this_mapper = if self.is_current() { active = unsafe { ActivePageTable::new(rmm::TableKind::User) }; active.mapper() } else { @@ -95,8 +94,8 @@ impl AddrSpace { for page in new_grant.pages() { // FIXME: ENOMEM is wrong here, it cannot fail. - let current_frame = this_mapper.translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *const u8; - let new_frame = new_mapper.mapper().translate_page(page).ok_or(Error::new(ENOMEM))?.start_address().data() as *mut u8; + let current_frame = unsafe { RmmA::phys_to_virt(this_mapper.translate_page(page).ok_or(Error::new(ENOMEM))?.start_address()) }.data() as *const u8; + let new_frame = unsafe { RmmA::phys_to_virt(new_mapper.mapper().translate_page(page).ok_or(Error::new(ENOMEM))?.start_address()) }.data() as *mut u8; // TODO: Replace this with CoW unsafe { @@ -115,6 +114,9 @@ impl AddrSpace { id, }) } + pub fn is_current(&self) -> bool { + self.frame.utable.start_address() == unsafe { RmmA::table() } + } } #[derive(Debug)] diff --git a/src/ptrace.rs b/src/ptrace.rs index dc76ee4d..302646e8 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -13,7 +13,7 @@ use crate::{ } }, common::unique::Unique, - context::{self, signal, Context, ContextId}, + context::{self, signal, Context, ContextId, memory::AddrSpace}, event, scheme::proc, sync::WaitCondition, @@ -472,8 +472,8 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator impl Iterator> + '_ { - let mut table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; +pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: usize) -> impl Iterator> + '_ { + let mut table = unsafe { InactivePageTable::from_address(addrspace.frame.utable.start_address().data()) }; // TODO: Iterate over grants instead to avoid yielding None too many times. What if // context_memory is used for an entire process's address space, where the stack is at the very diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 03113568..4e89661b 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -16,7 +16,7 @@ use alloc::{ use core::sync::atomic::AtomicUsize; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; -use crate::context::Context; +use crate::context::{Context, memory::AddrSpace, file::FileDescriptor}; use crate::syscall::error::*; use crate::syscall::scheme::Scheme; @@ -300,10 +300,17 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { SCHEMES.call_once(init_schemes).write() } +#[allow(unused_variables)] pub trait KernelScheme: Scheme + Send + Sync + 'static { - #[allow(unused_variables)] fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc>) -> Result { log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes"); Err(Error::new(ENOSYS)) } + + fn as_filetable(&self, number: usize) -> Result>>>> { + Err(Error::new(EBADF)) + } + fn as_addrspace(&self, number: usize) -> Result>> { + Err(Error::new(EBADF)) + } } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 31222a37..2d0c3ccc 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,9 +1,9 @@ use crate::{ arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress}, - context::{self, Context, ContextId, Status, memory::{addrspace, Grant, new_addrspace, PtId, page_flags, Region}}, + context::{self, Context, ContextId, Status, file::FileDescriptor, memory::{AddrSpace, Grant, new_addrspace, PtId, page_flags, Region}}, memory::PAGE_SIZE, ptrace, - scheme::{AtomicSchemeId, SchemeId}, + scheme::{self, AtomicSchemeId, FileHandle, KernelScheme, SchemeId}, syscall::{ FloatRegisters, IntRegisters, @@ -107,19 +107,22 @@ enum RegsKind { Int, Env, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone)] enum Operation { - Memory, - Grants, + Memory { addrspace: Arc> }, Regs(RegsKind), Trace, Static(&'static str), Name, + Cwd, Sigstack, Attr(Attr), - Files, - AddrSpace { id: PtId }, + Filetable { filetable: Arc>>> }, + AddrSpace { addrspace: Arc> }, CurrentAddrSpace, + CurrentFiletable, + // TODO: Any better interface to access newly created contexts? Openat? + OpenViaDup, } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -128,10 +131,10 @@ enum Attr { // TODO: namespace, tid, etc. } impl Operation { - fn needs_child_process(self) -> bool { - matches!(self, Self::Memory | Self::Grants | Self::Regs(_) | Self::Trace | Self::Files) + fn needs_child_process(&self) -> bool { + matches!(self, Self::Memory { .. } | Self::Regs(_) | Self::Trace | Self::Filetable { .. } | Self::AddrSpace { .. } | Self::CurrentAddrSpace | Self::CurrentFiletable) } - fn needs_root(self) -> bool { + fn needs_root(&self) -> bool { matches!(self, Self::Attr(_)) } } @@ -186,7 +189,7 @@ impl OperationData { } } -#[derive(Clone, Copy)] +#[derive(Clone)] struct Info { pid: ContextId, flags: usize, @@ -255,34 +258,29 @@ impl ProcScheme { } } -impl Scheme for ProcScheme { - fn open(&self, path: &str, flags: usize, uid: u32, gid: u32) -> Result { - let mut parts = path.splitn(2, '/'); - let pid_str = parts.next() - .ok_or(Error::new(ENOENT))?; - - let pid = if pid_str == "current" { - context::context_id() - } else if self.access == Access::Restricted { - return Err(Error::new(EACCES)); - } else { - ContextId::from(pid_str.parse().map_err(|_| Error::new(ENOENT))?) - }; +fn current_addrspace() -> Result>> { + Ok(Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?.read().addr_space()?)) +} - let operation = match parts.next() { - Some("mem") => Operation::Memory, - Some("addrspace") => Operation::AddrSpace { id: context::contexts().current().ok_or(Error::new(ESRCH))?.read().addr_space()?.read().id }, +impl ProcScheme { + fn open_inner(&self, pid: ContextId, operation_str: Option<&str>, flags: usize, uid: u32, gid: u32) -> Result { + let operation = match operation_str { + Some("mem") => Operation::Memory { addrspace: current_addrspace()? }, + Some("addrspace") => Operation::AddrSpace { addrspace: current_addrspace()? }, + Some("filetable") => Operation::Filetable { filetable: Arc::clone(&context::contexts().current().ok_or(Error::new(ESRCH))?.read().files) }, Some("current-addrspace") => Operation::CurrentAddrSpace, + Some("current-filetable") => Operation::CurrentFiletable, Some("regs/float") => Operation::Regs(RegsKind::Float), Some("regs/int") => Operation::Regs(RegsKind::Int), Some("regs/env") => Operation::Regs(RegsKind::Env), Some("trace") => Operation::Trace, Some("exe") => Operation::Static("exe"), Some("name") => Operation::Name, + Some("cwd") => Operation::Cwd, Some("sigstack") => Operation::Sigstack, Some("uid") => Operation::Attr(Attr::Uid), Some("gid") => Operation::Attr(Attr::Gid), - Some("files") => Operation::Files, + Some("open_via_dup") => Operation::OpenViaDup, _ => return Err(Error::new(EINVAL)) }; @@ -295,7 +293,7 @@ impl Scheme for ProcScheme { let target = target.read(); data = match operation { - Operation::Memory => OperationData::Memory(MemData::default()), + Operation::Memory { .. } => OperationData::Memory(MemData::default()), Operation::Trace => OperationData::Trace(TraceData::default()), Operation::Static(_) => OperationData::Static(StaticData::new( target.name.read().clone().into() @@ -335,7 +333,7 @@ impl Scheme for ProcScheme { return Err(Error::new(EPERM)); } - if matches!(operation, Operation::Files) { + if matches!(operation, Operation::Filetable { .. }) { data = OperationData::Static(StaticData::new({ use core::fmt::Write; @@ -352,7 +350,7 @@ impl Scheme for ProcScheme { info: Info { flags, pid, - operation, + operation: operation.clone(), }, data, })?; @@ -372,6 +370,26 @@ impl Scheme for ProcScheme { Ok(id) } +} + +impl Scheme for ProcScheme { + fn open(&self, path: &str, flags: usize, uid: u32, gid: u32) -> Result { + let mut parts = path.splitn(2, '/'); + let pid_str = parts.next() + .ok_or(Error::new(ENOENT))?; + + let pid = if pid_str == "current" { + context::context_id() + } else if pid_str == "new" { + inherit_context()? + } else if self.access == Access::Restricted { + return Err(Error::new(EACCES)); + } else { + ContextId::from(pid_str.parse().map_err(|_| Error::new(ENOENT))?) + }; + + self.open_inner(pid, parts.next(), flags, uid, gid) + } /// Dup is currently used to implement clone() and execve(). fn dup(&self, old_id: usize, buf: &[u8]) -> Result { @@ -379,18 +397,41 @@ impl Scheme for ProcScheme { let handles = self.handles.read(); let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?; - handle.info + handle.info.clone() }; self.new_handle(match info.operation { - Operation::AddrSpace { id } => { - let new_ptid = match buf { + Operation::OpenViaDup => { + let (uid, gid) = match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read() { + context => (context.euid, context.egid), + }; + return self.open_inner(info.pid, Some(core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?).filter(|s| !s.is_empty()), O_RDWR | O_CLOEXEC, uid, gid); + }, + + Operation::Filetable { filetable } => { + // TODO: Maybe allow userspace to either copy or transfer recently dupped file + // descriptors between file tables. + if buf != b"copy" { + return Err(Error::new(EINVAL)); + } + let new_filetable = Arc::try_new(RwLock::new(filetable.read().iter().cloned().collect::>())).map_err(|_| Error::new(ENOMEM))?; + + Handle { + info: Info { + flags: 0, + pid: info.pid, + operation: Operation::Filetable { filetable: new_filetable }, + }, + data: OperationData::Other, + } + } + Operation::AddrSpace { addrspace } => { + let (new_addrspace, is_mem) = match buf { // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But // in that case, what scheme? - b"empty" => new_addrspace()?.0, - // Reuse same ID. - b"shared" => id, - b"exclusive" => addrspace(id).ok_or(Error::new(EBADFD))?.read().try_clone()?.0, + b"empty" => (new_addrspace()?.1, false), + b"exclusive" => (addrspace.read().try_clone()?.1, false), + b"mem" => (Arc::clone(&addrspace), true), _ => return Err(Error::new(EINVAL)), }; @@ -398,9 +439,9 @@ impl Scheme for ProcScheme { info: Info { flags: 0, pid: info.pid, - operation: Operation::AddrSpace { id: new_ptid }, + operation: if is_mem { Operation::Memory { addrspace: new_addrspace } } else { Operation::AddrSpace { addrspace: new_addrspace } }, }, - data: OperationData::Other, + data: if is_mem { OperationData::Memory(MemData { offset: VirtualAddress::new(0) }) } else { OperationData::Other }, } } _ => return Err(Error::new(EINVAL)), @@ -429,11 +470,10 @@ impl Scheme for ProcScheme { let info = { let handles = self.handles.read(); let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - handle.info + handle.info.clone() }; match info.operation { - Operation::Grants => return Err(Error::new(ENOSYS)), Operation::Static(_) => { let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; @@ -444,19 +484,15 @@ impl Scheme for ProcScheme { data.offset += len; Ok(len) }, - Operation::Memory => { + Operation::Memory { addrspace } => { // Won't context switch, don't worry about the locks let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; let data = handle.data.mem_data().expect("operations can't change"); - let contexts = context::contexts(); - let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; - let mut context = context.write(); - let mut bytes_read = 0; - for chunk_opt in ptrace::context_memory(&mut *context, data.offset, buf.len()) { + for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; let dst_slice = &mut buf[bytes_read..bytes_read + chunk.len()]; unsafe { @@ -468,6 +504,7 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_read); Ok(bytes_read) }, + // TODO: Support querying which grants exist and where Operation::AddrSpace { .. } => return Err(Error::new(EBADF)), Operation::Regs(kind) => { @@ -582,6 +619,7 @@ impl Scheme for ProcScheme { Ok(read * mem::size_of::()) } Operation::Name => read_from(buf, context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.read().as_bytes(), &mut 0), + Operation::Cwd => read_from(buf, context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().cwd.read().as_bytes(), &mut 0), Operation::Sigstack => read_from(buf, &context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes(), &mut 0), Operation::Attr(attr) => { let src_buf = match (attr, &*Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?).read()) { @@ -591,7 +629,7 @@ impl Scheme for ProcScheme { read_from(buf, &src_buf, &mut 0) } - Operation::Files => { + Operation::Filetable { .. } => { let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; let data = handle.data.static_data().expect("operations can't change"); @@ -602,10 +640,8 @@ impl Scheme for ProcScheme { // TODO: Find a better way to switch address spaces, since they also require switching // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it // with `/ctx` - Operation::CurrentAddrSpace => { - //read_from(buf, &usize::to_ne_bytes(id.into()), &mut 0) - Ok(0) - } + Operation::CurrentAddrSpace | Operation::CurrentFiletable => return Err(Error::new(EBADF)), + Operation::OpenViaDup => return Err(Error::new(EBADF)), } } @@ -622,25 +658,20 @@ impl Scheme for ProcScheme { let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; handle.continue_ignored_children(); - handle.info + handle.info.clone() }; match info.operation { - Operation::Grants => Err(Error::new(ENOSYS)), Operation::Static(_) => Err(Error::new(EBADF)), - Operation::Memory => { + Operation::Memory { addrspace } => { // Won't context switch, don't worry about the locks let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; let data = handle.data.mem_data().expect("operations can't change"); - let contexts = context::contexts(); - let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; - let mut context = context.write(); - let mut bytes_written = 0; - for chunk_opt in ptrace::context_memory(&mut *context, data.offset, buf.len()) { + for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; let src_slice = &buf[bytes_written..bytes_written + chunk.len()]; unsafe { @@ -652,7 +683,7 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_written); Ok(bytes_written) }, - Operation::AddrSpace { .. } => { + Operation::AddrSpace { addrspace } => { // FIXME: Forbid upgrading external mappings. let pid = self.handles.read() @@ -670,18 +701,19 @@ impl Scheme for ProcScheme { return Err(Error::new(EINVAL)); } - let is_active = pid == context::context_id(); + let mut addrspace = addrspace.write(); + let is_active = addrspace.is_current(); - let callback = |context: &mut Context| { + let callback = |addr_space: &mut AddrSpace| { let (mut inactive, mut active); - let mut addr_space = context.addr_space()?.write(); + //let mut addr_space = context.addr_space()?.write(); let (mut mapper, mut flusher) = if is_active { active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); (active.0.mapper(), &mut active.1 as &mut dyn Flusher) } else { - inactive = (unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }, InactiveFlusher::new()); + inactive = (unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }, InactiveFlusher::new()); (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) }; @@ -714,12 +746,17 @@ impl Scheme for ProcScheme { } Ok(()) }; + callback(&mut *addrspace)?; - if is_active { + // TODO: Set some "in use" flag every time an address space is switched to. This + // way, we know what hardware threads are using any given page table, which we need + // to know while doing TLB shootdown. + + /*if is_active { with_context_mut(pid, callback)?; } else { try_stop_context(pid, callback)?; - } + }*/ Ok(3 * mem::size_of::()) } Operation::Regs(kind) => match kind { @@ -868,11 +905,17 @@ impl Scheme for ProcScheme { Ok(mem::size_of::()) }, + // TODO: Deduplicate name and cwd Operation::Name => { let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?.into_boxed_str(); *context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.write() = utf8; Ok(buf.len()) } + Operation::Cwd => { + let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?; + *context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().cwd.write() = utf8; + Ok(buf.len()) + } Operation::Sigstack => { let bytes = <[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?; let sigstack = usize::from_ne_bytes(bytes); @@ -889,25 +932,49 @@ impl Scheme for ProcScheme { } Ok(buf.len()) } - Operation::Files => return Err(Error::new(EBADF)), + Operation::Filetable { .. } => return Err(Error::new(EBADF)), + Operation::CurrentFiletable => { + let filetable_fd = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + let (hopefully_this_scheme, number) = extract_scheme_number(filetable_fd)?; + + let mut filetable = hopefully_this_scheme.as_filetable(number)?; + + try_stop_context(info.pid, |context| { + context.files = filetable; + Ok(()) + })?; + Ok(mem::size_of::()) + } Operation::CurrentAddrSpace { .. } => { + println!("Setting current address space! ({} {})", info.pid.into(), context::context_id().into()); + let mut iter = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); - let id = iter.next().ok_or(Error::new(EINVAL))?; + let addrspace_fd = iter.next().ok_or(Error::new(EINVAL))?; let sp = iter.next().ok_or(Error::new(EINVAL))?; let ip = iter.next().ok_or(Error::new(EINVAL))?; - let space = addrspace(PtId::from(id)).ok_or(Error::new(EINVAL))?; + let (hopefully_this_scheme, number) = extract_scheme_number(addrspace_fd)?; + let space = hopefully_this_scheme.as_addrspace(number)?; - try_stop_context(info.pid, |context| unsafe { - let regs = &mut ptrace::regs_for_mut(context).ok_or(Error::new(ESRCH))?.iret; - regs.rip = ip; - regs.rsp = sp; + let callback = |context: &mut Context| unsafe { + if let Some(saved_regs) = ptrace::regs_for_mut(context) { + saved_regs.iret.rip = ip; + saved_regs.iret.rsp = sp; + } else { + context.clone_entry = Some([ip, sp]); + } context.set_addr_space(space); Ok(()) - })?; + }; + if info.pid == context::context_id() { + with_context_mut(info.pid, callback)?; + } else { + try_stop_context(info.pid, callback)?; + } Ok(3 * mem::size_of::()) } + Operation::OpenViaDup => return Err(Error::new(EBADF)), } } @@ -939,20 +1006,22 @@ impl Scheme for ProcScheme { let handle = handles.get(&id).ok_or(Error::new(EBADF))?; let path = format!("proc:{}/{}", handle.info.pid.into(), match handle.info.operation { - Operation::Memory => "mem", - Operation::Grants => "grants", + Operation::Memory { .. } => "mem", Operation::Regs(RegsKind::Float) => "regs/float", Operation::Regs(RegsKind::Int) => "regs/int", Operation::Regs(RegsKind::Env) => "regs/env", Operation::Trace => "trace", Operation::Static(path) => path, Operation::Name => "name", + Operation::Cwd => "cwd", Operation::Sigstack => "sigstack", Operation::Attr(Attr::Uid) => "uid", Operation::Attr(Attr::Gid) => "gid", - Operation::Files => "files", + Operation::Filetable { .. } => "filetable", Operation::AddrSpace { .. } => "addrspace", Operation::CurrentAddrSpace => "current-addrspace", + Operation::CurrentFiletable => "current-filetable", + Operation::OpenViaDup => "open-via-dup", }); read_from(buf, &path.as_bytes(), &mut 0) @@ -999,4 +1068,58 @@ impl Scheme for ProcScheme { Ok(0) } } -impl crate::scheme::KernelScheme for ProcScheme {} +impl KernelScheme for ProcScheme { + fn as_addrspace(&self, number: usize) -> Result>> { + if let Operation::AddrSpace { ref addrspace } | Operation::Memory { ref addrspace } = self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation { + Ok(Arc::clone(addrspace)) + } else { + Err(Error::new(EBADF)) + } + } + fn as_filetable(&self, number: usize) -> Result>>>> { + if !matches!(self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation, Operation::Filetable { .. }) { + return Err(Error::new(EBADF)); + } + Ok(Arc::clone(&context::contexts().current().ok_or(Error::new(ESRCH))?.read().files)) + } +} +extern "C" fn clone_handler() { + let context_lock = Arc::clone(context::contexts().current().expect("expected the current context to be set in a spawn closure")); + + #[cfg(target_arch = "x86_64")] + unsafe { + let [ip, sp] = context_lock.read().clone_entry.expect("clone_entry must be set"); + let [arg, is_singlestep] = [0; 2]; + + crate::start::usermode(ip, sp, arg, is_singlestep); + } +} + +fn inherit_context() -> Result { + let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); + let new_context_lock = Arc::clone(context::contexts_mut().spawn(clone_handler)?); + + let current_context = current_context_lock.read(); + let mut new_context = new_context_lock.write(); + + new_context.status = Status::Stopped(SIGSTOP); + new_context.euid = current_context.euid; + new_context.egid = current_context.egid; + new_context.ruid = current_context.ruid; + new_context.rgid = current_context.rgid; + new_context.ens = current_context.ens; + new_context.rns = current_context.rns; + new_context.ppid = current_context.id; + + // TODO: More to copy? + + Ok(new_context.id) +} +fn extract_scheme_number(fd: usize) -> Result<(Arc, usize)> { + let (scheme_id, number) = match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read().get_file(FileHandle::from(fd)).ok_or(Error::new(EBADF))?.description.read() { + desc => (desc.scheme, desc.number) + }; + let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(ENODEV))?); + + Ok((scheme, number)) +} -- GitLab From cb40eb37920c3037523ccbe9c760f0d68c22778a Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Thu, 7 Jul 2022 10:05:39 +0200 Subject: [PATCH 14/44] Support reading all grants and transferring grants. --- src/context/memory.rs | 91 ++++++++++++++++++----------- src/scheme/proc.rs | 127 ++++++++++++++++++++++++++++++++++------- src/scheme/user.rs | 24 +++++--- src/syscall/debug.rs | 4 -- src/syscall/process.rs | 1 - 5 files changed, 179 insertions(+), 68 deletions(-) diff --git a/src/context/memory.rs b/src/context/memory.rs index 1c729583..fd6315c9 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -70,7 +70,6 @@ pub struct AddrSpace { } impl AddrSpace { /// Attempt to clone an existing address space so that all mappings are copied (CoW). - // TODO: Actually use CoW! pub fn try_clone(&self) -> Result<(PtId, Arc>)> { let (id, mut new) = new_addrspace()?; @@ -87,20 +86,27 @@ impl AddrSpace { let mut new_mapper = unsafe { InactivePageTable::from_address(new.read().frame.utable.start_address().data()) }; for grant in self.grants.iter() { - // TODO: Fail if there are borrowed grants, rather than simply ignoring them? - if !grant.is_owned() { continue; } + if grant.desc_opt.is_some() { continue; } - let new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), &mut new_mapper.mapper(), ())?; + let new_grant; - for page in new_grant.pages() { - // FIXME: ENOMEM is wrong here, it cannot fail. - let current_frame = unsafe { RmmA::phys_to_virt(this_mapper.translate_page(page).ok_or(Error::new(ENOMEM))?.start_address()) }.data() as *const u8; - let new_frame = unsafe { RmmA::phys_to_virt(new_mapper.mapper().translate_page(page).ok_or(Error::new(ENOMEM))?.start_address()) }.data() as *mut u8; + // TODO: Replace this with CoW + if grant.owned { + new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), &mut new_mapper.mapper(), ())?; - // TODO: Replace this with CoW - unsafe { - new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE); + for page in new_grant.pages() { + let current_frame = unsafe { RmmA::phys_to_virt(this_mapper.translate_page(page).expect("grant containing unmapped pages").start_address()) }.data() as *const u8; + let new_frame = unsafe { RmmA::phys_to_virt(new_mapper.mapper().translate_page(page).expect("grant containing unmapped pages").start_address()) }.data() as *mut u8; + + unsafe { + new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE); + } } + } else { + // TODO: Remove reborrow? In that case, physmapped memory will need to either be + // remapped when cloning, or be backed by a file descriptor (like + // `memory:physical`). + new_grant = Grant::reborrow(&grant, Page::containing_address(grant.start_address()), &mut this_mapper, &mut new_mapper.mapper(), ()); } new.write().grants.insert(new_grant); @@ -508,35 +514,54 @@ impl Grant { } Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None }) } + pub fn borrow(src_base: Page, dst_base: Page, page_count: usize, flags: PageFlags, desc_opt: Option, src_mapper: &mut Mapper, dst_mapper: &mut Mapper, dst_flusher: impl Flusher) -> Grant { + Self::copy_inner(src_base, dst_base, page_count, flags, desc_opt, src_mapper, dst_mapper, (), dst_flusher, false, false) + } + pub fn reborrow(src_grant: &Grant, dst_base: Page, src_mapper: &mut Mapper, dst_mapper: &mut Mapper, dst_flusher: impl Flusher) -> Grant { + Self::borrow(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), src_grant.desc_opt.clone(), src_mapper, dst_mapper, dst_flusher) + } + pub fn transfer(mut src_grant: Grant, dst_base: Page, src_mapper: &mut Mapper, dst_mapper: &mut Mapper, src_flusher: impl Flusher, dst_flusher: impl Flusher) -> Grant { + assert!(core::mem::replace(&mut src_grant.mapped, false)); + let desc_opt = src_grant.desc_opt.take(); + + Self::copy_inner(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), desc_opt, src_mapper, dst_mapper, src_flusher, dst_flusher, src_grant.owned, true) + } + + fn copy_inner( + src_base: Page, + dst_base: Page, + page_count: usize, + flags: PageFlags, + desc_opt: Option, + src_mapper: &mut Mapper, + dst_mapper: &mut Mapper, + mut src_flusher: impl Flusher, + mut dst_flusher: impl Flusher, + owned: bool, + unmap: bool, + ) -> Grant { + for index in 0..page_count { + let src_page = src_base.next_by(index); + let frame = if unmap { + let (flush, frame) = src_mapper.unmap_return(src_page, false); + src_flusher.consume(flush); + frame + } else { + src_mapper.translate_page(src_page).expect("grant references unmapped memory") + }; - pub fn map_inactive(src: VirtualAddress, dst: VirtualAddress, size: usize, flags: PageFlags, desc_opt: Option, inactive_table: &mut InactivePageTable) -> Grant { - let active_table = unsafe { ActivePageTable::new(src.kind()) }; - let mut inactive_mapper = inactive_table.mapper(); - - let src_start_page = Page::containing_address(src); - let src_end_page = Page::containing_address(VirtualAddress::new(src.data() + size - 1)); - let src_range = Page::range_inclusive(src_start_page, src_end_page); - - let dst_start_page = Page::containing_address(dst); - let dst_end_page = Page::containing_address(VirtualAddress::new(dst.data() + size - 1)); - let dst_range = Page::range_inclusive(dst_start_page, dst_end_page); - - for (src_page, dst_page) in src_range.zip(dst_range) { - let frame = active_table.translate_page(src_page).expect("grant references unmapped memory"); - - let inactive_flush = inactive_mapper.map_to(dst_page, frame, flags); - // Ignore result due to mapping on inactive table - unsafe { inactive_flush.ignore(); } + let flush = dst_mapper.map_to(dst_base.next_by(index), frame, flags); + dst_flusher.consume(flush); } Grant { region: Region { - start: dst, - size, + start: dst_base.start_address(), + size: page_count * PAGE_SIZE, }, flags, - mapped: true, - owned: false, + mapped: !unmap, + owned, desc_opt, } } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 2d0c3ccc..1441bf65 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,6 +1,6 @@ use crate::{ arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress}, - context::{self, Context, ContextId, Status, file::FileDescriptor, memory::{AddrSpace, Grant, new_addrspace, PtId, page_flags, Region}}, + context::{self, Context, ContextId, Status, file::{FileDescription, FileDescriptor}, memory::{AddrSpace, Grant, new_addrspace, PtId, page_flags, Region}}, memory::PAGE_SIZE, ptrace, scheme::{self, AtomicSchemeId, FileHandle, KernelScheme, SchemeId}, @@ -121,8 +121,14 @@ enum Operation { AddrSpace { addrspace: Arc> }, CurrentAddrSpace, CurrentFiletable, - // TODO: Any better interface to access newly created contexts? Openat? + // TODO: Remove this once openat is implemented, or allow openat-via-dup via e.g. the top-level + // directory. OpenViaDup, + // Allows calling fmap directly on a FileDescriptor (as opposed to a FileDescriptor). + // + // TODO: Remove this once cross-scheme links are merged. That would allow acquiring a new + // FD to access the file descriptor behind grants. + GrantHandle { description: Arc> }, } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -166,6 +172,7 @@ enum OperationData { Memory(MemData), Trace(TraceData), Static(StaticData), + Offset(usize), Other, } impl OperationData { @@ -298,6 +305,7 @@ impl ProcScheme { Operation::Static(_) => OperationData::Static(StaticData::new( target.name.read().clone().into() )), + Operation::AddrSpace { .. } => OperationData::Offset(0), _ => OperationData::Other, }; @@ -426,12 +434,19 @@ impl Scheme for ProcScheme { } } Operation::AddrSpace { addrspace } => { - let (new_addrspace, is_mem) = match buf { + let (operation, is_mem) = match buf { // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But // in that case, what scheme? - b"empty" => (new_addrspace()?.1, false), - b"exclusive" => (addrspace.read().try_clone()?.1, false), - b"mem" => (Arc::clone(&addrspace), true), + b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()?.1 }, false), + b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.read().try_clone()?.1 }, false), + b"mem" => (Operation::Memory { addrspace: Arc::clone(&addrspace) }, true), + + grant_handle if grant_handle.starts_with(b"grant-") => { + let start_addr = usize::from_str_radix(core::str::from_utf8(&grant_handle[6..]).map_err(|_| Error::new(EINVAL))?, 16).map_err(|_| Error::new(EINVAL))?; + (Operation::GrantHandle { + description: Arc::clone(&addrspace.read().grants.contains(VirtualAddress::new(start_addr)).ok_or(Error::new(EINVAL))?.desc_opt.as_ref().ok_or(Error::new(EINVAL))?.desc.description) + }, false) + } _ => return Err(Error::new(EINVAL)), }; @@ -439,9 +454,9 @@ impl Scheme for ProcScheme { info: Info { flags: 0, pid: info.pid, - operation: if is_mem { Operation::Memory { addrspace: new_addrspace } } else { Operation::AddrSpace { addrspace: new_addrspace } }, + operation, }, - data: if is_mem { OperationData::Memory(MemData { offset: VirtualAddress::new(0) }) } else { OperationData::Other }, + data: if is_mem { OperationData::Memory(MemData { offset: VirtualAddress::new(0) }) } else { OperationData::Offset(0) }, } } _ => return Err(Error::new(EINVAL)), @@ -504,8 +519,35 @@ impl Scheme for ProcScheme { data.offset = VirtualAddress::new(data.offset.data() + bytes_read); Ok(bytes_read) }, - // TODO: Support querying which grants exist and where - Operation::AddrSpace { .. } => return Err(Error::new(EBADF)), + // TODO: Support reading only a specific address range. Maybe using seek? + Operation::AddrSpace { addrspace } => { + let mut handles = self.handles.write(); + let offset = if let OperationData::Offset(ref mut offset) = handles.get_mut(&id).ok_or(Error::new(EBADF))?.data { + offset + } else { + return Err(Error::new(EBADFD)); + }; + + // TODO: Define a struct somewhere? + const RECORD_SIZE: usize = mem::size_of::() * 4; + let start = core::cmp::min(buf.len(), *offset); + let records = buf[start..].array_chunks_mut::(); + + let addrspace = addrspace.read(); + let mut bytes_read = 0; + + for (record_bytes, grant) in records.zip(addrspace.grants.iter()).skip(*offset / RECORD_SIZE) { + let mut qwords = record_bytes.array_chunks_mut::<{mem::size_of::()}>(); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.start_address().data())); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.size())); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.flags().data() | if grant.desc_opt.is_some() { 0x8000_0000 } else { 0 })); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.desc_opt.as_ref().map_or(0, |d| d.offset))); + bytes_read += RECORD_SIZE; + } + + *offset += bytes_read; + Ok(bytes_read) + } Operation::Regs(kind) => { union Output { @@ -641,7 +683,7 @@ impl Scheme for ProcScheme { // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it // with `/ctx` Operation::CurrentAddrSpace | Operation::CurrentFiletable => return Err(Error::new(EBADF)), - Operation::OpenViaDup => return Err(Error::new(EBADF)), + Operation::OpenViaDup | Operation::GrantHandle { .. } => return Err(Error::new(EBADF)), } } @@ -695,7 +737,7 @@ impl Scheme for ProcScheme { let base = chunks.next().ok_or(Error::new(EINVAL))?; let size = chunks.next().ok_or(Error::new(EINVAL))?; let flags = chunks.next().and_then(|f| MapFlags::from_bits(f)).ok_or(Error::new(EINVAL))?; - let region = Region::new(VirtualAddress::new(base), size); + let src_address = chunks.next(); if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::USER_END_OFFSET { return Err(Error::new(EINVAL)); @@ -707,8 +749,6 @@ impl Scheme for ProcScheme { let callback = |addr_space: &mut AddrSpace| { let (mut inactive, mut active); - //let mut addr_space = context.addr_space()?.write(); - let (mut mapper, mut flusher) = if is_active { active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); (active.0.mapper(), &mut active.1 as &mut dyn Flusher) @@ -717,6 +757,34 @@ impl Scheme for ProcScheme { (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) }; + if let Some(src_address) = src_address { + // Forbid transferring grants to the same address space! + if is_active { return Err(Error::new(EBUSY)); } + + let src_grant = current_addrspace()?.write().grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; + + if src_address % PAGE_SIZE != 0 || src_address.saturating_add(size) > crate::USER_END_OFFSET { + return Err(Error::new(EINVAL)); + } + + // TODO: Allow downgrading flags? + + if let Some(grant) = addr_space.grants.conflicts(Region::new(VirtualAddress::new(base), size)).next() { + return Err(Error::new(EEXIST)); + } + + addr_space.grants.insert(Grant::transfer( + src_grant, + Page::containing_address(VirtualAddress::new(base)), + &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, + &mut mapper, + PageFlushAll::new(), + flusher, + )); + return Ok(()); + } + + let region = Region::new(VirtualAddress::new(base), size); let conflicting = addr_space.grants.conflicts(region).map(|g| *g.region()).collect::>(); for conflicting_region in conflicting { let whole_grant = addr_space.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; @@ -939,15 +1007,15 @@ impl Scheme for ProcScheme { let mut filetable = hopefully_this_scheme.as_filetable(number)?; - try_stop_context(info.pid, |context| { + let stopper = if info.pid == context::context_id() { with_context_mut } else { try_stop_context }; + + stopper(info.pid, |context: &mut Context| { context.files = filetable; Ok(()) })?; Ok(mem::size_of::()) } Operation::CurrentAddrSpace { .. } => { - println!("Setting current address space! ({} {})", info.pid.into(), context::context_id().into()); - let mut iter = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); let addrspace_fd = iter.next().ok_or(Error::new(EINVAL))?; let sp = iter.next().ok_or(Error::new(EINVAL))?; @@ -974,7 +1042,7 @@ impl Scheme for ProcScheme { } Ok(3 * mem::size_of::()) } - Operation::OpenViaDup => return Err(Error::new(EBADF)), + Operation::OpenViaDup | Operation::GrantHandle { .. } => return Err(Error::new(EBADF)), } } @@ -1022,6 +1090,8 @@ impl Scheme for ProcScheme { Operation::CurrentAddrSpace => "current-addrspace", Operation::CurrentFiletable => "current-filetable", Operation::OpenViaDup => "open-via-dup", + + Operation::GrantHandle { .. } => return Err(Error::new(EOPNOTSUPP)), }); read_from(buf, &path.as_bytes(), &mut 0) @@ -1067,6 +1137,20 @@ impl Scheme for ProcScheme { } Ok(0) } + // TODO: Support borrowing someone else's memory. + fn fmap(&self, id: usize, map: &syscall::data::Map) -> Result { + let description_lock = match self.handles.read().get(&id) { + Some(Handle { info: Info { operation: Operation::GrantHandle { ref description }, .. }, .. }) => Arc::clone(description), + _ => return Err(Error::new(EBADF)), + }; + let (scheme_id, number) = { + let description = description_lock.read(); + + (description.scheme, description.number) + }; + let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(EBADFD))?); + scheme.fmap(number, map) + } } impl KernelScheme for ProcScheme { fn as_addrspace(&self, number: usize) -> Result>> { @@ -1077,10 +1161,11 @@ impl KernelScheme for ProcScheme { } } fn as_filetable(&self, number: usize) -> Result>>>> { - if !matches!(self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation, Operation::Filetable { .. }) { - return Err(Error::new(EBADF)); + if let Operation::Filetable { ref filetable } = self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation { + Ok(Arc::clone(filetable)) + } else { + Err(Error::new(EBADF)) } - Ok(Arc::clone(&context::contexts().current().ok_or(Error::new(ESRCH))?.read().files)) } } extern "C" fn clone_handler() { diff --git a/src/scheme/user.rs b/src/scheme/user.rs index c87e694e..96101db6 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -8,9 +8,9 @@ use spin::{Mutex, RwLock}; use crate::context::{self, Context}; use crate::context::file::FileDescriptor; -use crate::context::memory::{DANGLING, page_flags, round_down_pages, Grant, Region, GrantFileRef}; +use crate::context::memory::{DANGLING, page_flags, round_down_pages, round_up_pages, Grant, Region, GrantFileRef}; use crate::event; -use crate::paging::{PAGE_SIZE, InactivePageTable, VirtualAddress}; +use crate::paging::{ActivePageTable, PAGE_SIZE, InactivePageTable, mapper::InactiveFlusher, Page, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; use crate::sync::{WaitQueue, WaitMap}; use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec}; @@ -123,6 +123,9 @@ impl UserInner { ).map(|addr| addr.data()) } + // TODO: Use an address space Arc over a context Arc. While contexts which share address spaces + // still can access borrowed scheme pages, it would both be cleaner and would handle the case + // where the initial context is closed. fn capture_inner(context_weak: &Weak>, dst_address: usize, address: usize, size: usize, flags: MapFlags, desc_opt: Option) -> Result { // TODO: More abstractions over grant creation! @@ -148,18 +151,21 @@ impl UserInner { let mut addr_space = context.addr_space()?.write(); let src_address = round_down_pages(address); + let dst_address = round_down_pages(dst_address); let offset = address - src_address; - let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); - let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; + let aligned_size = round_up_pages(offset + size); + let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), aligned_size, flags)?; //TODO: Use syscall_head and syscall_tail to avoid leaking data - addr_space.grants.insert(Grant::map_inactive( - src_region.start_address(), - dst_region.start_address(), - src_region.size(), + addr_space.grants.insert(Grant::borrow( + Page::containing_address(VirtualAddress::new(src_address)), + Page::containing_address(dst_region.start_address()), + aligned_size / PAGE_SIZE, page_flags(flags), desc_opt, - &mut new_table, + &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, + &mut new_table.mapper(), + InactiveFlusher::new(), )); Ok(VirtualAddress::new(dst_region.start_address().data() + offset)) diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 993575f8..717e1c3a 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -170,10 +170,6 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - b, validate_slice_mut(c as *mut TimeSpec, 1) ), - SYS_CLONE => format!( - "clone({:?})", - CloneFlags::from_bits(b) - ), SYS_EXIT => format!( "exit({})", b diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 418f464e..1a1e7259 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -652,7 +652,6 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { .expect("expected bootstrap context to have an address space") .write().grants.insert(grant); } - log::info!("Usermode bootstrap"); drop(data); -- GitLab From fa48c7aa979d7232c439f7f91b9053e88a4f1f89 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Thu, 7 Jul 2022 10:16:07 +0200 Subject: [PATCH 15/44] Deduplicate code for transferring/creating grants. --- src/scheme/proc.rs | 105 ++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 58 deletions(-) diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 1441bf65..b4117d47 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -746,77 +746,66 @@ impl Scheme for ProcScheme { let mut addrspace = addrspace.write(); let is_active = addrspace.is_current(); - let callback = |addr_space: &mut AddrSpace| { - let (mut inactive, mut active); + let (mut inactive, mut active); - let (mut mapper, mut flusher) = if is_active { - active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); - (active.0.mapper(), &mut active.1 as &mut dyn Flusher) - } else { - inactive = (unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }, InactiveFlusher::new()); - (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) - }; - - if let Some(src_address) = src_address { - // Forbid transferring grants to the same address space! - if is_active { return Err(Error::new(EBUSY)); } + let (mut mapper, mut flusher) = if is_active { + active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); + (active.0.mapper(), &mut active.1 as &mut dyn Flusher) + } else { + inactive = (unsafe { InactivePageTable::from_address(addrspace.frame.utable.start_address().data()) }, InactiveFlusher::new()); + (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) + }; - let src_grant = current_addrspace()?.write().grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; + let region = Region::new(VirtualAddress::new(base), size); + let conflicting = addrspace.grants.conflicts(region).map(|g| *g.region()).collect::>(); + for conflicting_region in conflicting { + let whole_grant = addrspace.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; + let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; - if src_address % PAGE_SIZE != 0 || src_address.saturating_add(size) > crate::USER_END_OFFSET { - return Err(Error::new(EINVAL)); - } + if let Some(before) = before_opt { + addrspace.grants.insert(before); + } + if let Some(after) = after_opt { + addrspace.grants.insert(after); + } - // TODO: Allow downgrading flags? + let res = current.unmap(&mut mapper, &mut flusher); - if let Some(grant) = addr_space.grants.conflicts(Region::new(VirtualAddress::new(base), size)).next() { - return Err(Error::new(EEXIST)); - } - - addr_space.grants.insert(Grant::transfer( - src_grant, - Page::containing_address(VirtualAddress::new(base)), - &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, - &mut mapper, - PageFlushAll::new(), - flusher, - )); - return Ok(()); + if res.file_desc.is_some() { + // We prefer avoiding file operations from within the kernel. If userspace + // updates grants that overlap, it might as well enumerate grants and call + // partial funmap on its own. + return Err(Error::new(EBUSY)); } + } - let region = Region::new(VirtualAddress::new(base), size); - let conflicting = addr_space.grants.conflicts(region).map(|g| *g.region()).collect::>(); - for conflicting_region in conflicting { - let whole_grant = addr_space.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; - let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; - - if let Some(before) = before_opt { - addr_space.grants.insert(before); - } - if let Some(after) = after_opt { - addr_space.grants.insert(after); - } + let base_page = Page::containing_address(VirtualAddress::new(base)); - let res = current.unmap(&mut mapper, &mut flusher); + if let Some(src_address) = src_address { + // Forbid transferring grants to the same address space! + if is_active { return Err(Error::new(EBUSY)); } - if res.file_desc.is_some() { - return Err(Error::new(EBUSY)); - } + let src_grant = current_addrspace()?.write().grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; - // TODO: Partial free if grant is mapped externally, or fail and force - // userspace to do it. + if src_address % PAGE_SIZE != 0 || src_address.saturating_add(size) > crate::USER_END_OFFSET { + return Err(Error::new(EINVAL)); } - if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { - let base = Page::containing_address(VirtualAddress::new(base)); - - addr_space.grants.insert(Grant::zeroed(base, size / PAGE_SIZE, page_flags(flags), &mut mapper, flusher)?); - } - Ok(()) - }; - callback(&mut *addrspace)?; + // TODO: Allow downgrading flags? + + addrspace.grants.insert(Grant::transfer( + src_grant, + base_page, + &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, + &mut mapper, + PageFlushAll::new(), + flusher, + )); + } else if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { + addrspace.grants.insert(Grant::zeroed(base_page, size / PAGE_SIZE, page_flags(flags), &mut mapper, flusher)?); + } - // TODO: Set some "in use" flag every time an address space is switched to. This + // TODO: Set some "in use" flag every time an address space is switched to? This // way, we know what hardware threads are using any given page table, which we need // to know while doing TLB shootdown. -- GitLab From 240d91f9514bf0b9190ba7b3f72729b5b1fd4508 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Thu, 7 Jul 2022 12:11:58 +0200 Subject: [PATCH 16/44] Set address space/files when closing, not writing. This fixes file descriptor leaks. Suppose relibc is just about to set the address space. For this, it needs to write the address space fd to the selection fd. To avoid having to close them in the kernel, it rather memorizes what the file descriptors refer to internally, and then do the actual operation when they are gone, i.e. when closing. --- src/scheme/mod.rs | 5 --- src/scheme/proc.rs | 83 +++++++++++++++++++++++++++------------------- src/scheme/user.rs | 18 +++++----- 3 files changed, 56 insertions(+), 50 deletions(-) diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 4e89661b..6f66a44e 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -302,11 +302,6 @@ pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { #[allow(unused_variables)] pub trait KernelScheme: Scheme + Send + Sync + 'static { - fn kfmap(&self, number: usize, map: &syscall::data::Map, target_context: &Arc>) -> Result { - log::error!("Returning ENOSYS since kfmap can only be called on UserScheme schemes"); - Err(Error::new(ENOSYS)) - } - fn as_filetable(&self, number: usize) -> Result>>>> { Err(Error::new(EBADF)) } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index b4117d47..05da3cf9 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -120,7 +120,20 @@ enum Operation { Filetable { filetable: Arc>>> }, AddrSpace { addrspace: Arc> }, CurrentAddrSpace, + + // "operations CAN change". The reason we split changing the address space into two handle + // types, is that we would rather want the actual switch to occur when closing, as opposed to + // when writing. This is so that we can actually guarantee that no file descriptors are leaked. + AwaitingAddrSpaceChange { + new: Arc>, + new_sp: usize, + new_ip: usize, + }, + CurrentFiletable, + + AwaitingFiletableChange(Arc>>>), + // TODO: Remove this once openat is implemented, or allow openat-via-dup via e.g. the top-level // directory. OpenViaDup, @@ -530,8 +543,7 @@ impl Scheme for ProcScheme { // TODO: Define a struct somewhere? const RECORD_SIZE: usize = mem::size_of::() * 4; - let start = core::cmp::min(buf.len(), *offset); - let records = buf[start..].array_chunks_mut::(); + let records = buf.array_chunks_mut::(); let addrspace = addrspace.read(); let mut bytes_read = 0; @@ -682,8 +694,7 @@ impl Scheme for ProcScheme { // TODO: Find a better way to switch address spaces, since they also require switching // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it // with `/ctx` - Operation::CurrentAddrSpace | Operation::CurrentFiletable => return Err(Error::new(EBADF)), - Operation::OpenViaDup | Operation::GrantHandle { .. } => return Err(Error::new(EBADF)), + _ => return Err(Error::new(EBADF)), } } @@ -996,12 +1007,8 @@ impl Scheme for ProcScheme { let mut filetable = hopefully_this_scheme.as_filetable(number)?; - let stopper = if info.pid == context::context_id() { with_context_mut } else { try_stop_context }; + self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingFiletableChange(filetable); - stopper(info.pid, |context: &mut Context| { - context.files = filetable; - Ok(()) - })?; Ok(mem::size_of::()) } Operation::CurrentAddrSpace { .. } => { @@ -1013,25 +1020,11 @@ impl Scheme for ProcScheme { let (hopefully_this_scheme, number) = extract_scheme_number(addrspace_fd)?; let space = hopefully_this_scheme.as_addrspace(number)?; - let callback = |context: &mut Context| unsafe { - if let Some(saved_regs) = ptrace::regs_for_mut(context) { - saved_regs.iret.rip = ip; - saved_regs.iret.rsp = sp; - } else { - context.clone_entry = Some([ip, sp]); - } + self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingAddrSpaceChange { new: space, new_sp: sp, new_ip: ip }; - context.set_addr_space(space); - Ok(()) - }; - if info.pid == context::context_id() { - with_context_mut(info.pid, callback)?; - } else { - try_stop_context(info.pid, callback)?; - } Ok(3 * mem::size_of::()) } - Operation::OpenViaDup | Operation::GrantHandle { .. } => return Err(Error::new(EBADF)), + _ => return Err(Error::new(EBADF)), } } @@ -1080,7 +1073,7 @@ impl Scheme for ProcScheme { Operation::CurrentFiletable => "current-filetable", Operation::OpenViaDup => "open-via-dup", - Operation::GrantHandle { .. } => return Err(Error::new(EOPNOTSUPP)), + _ => return Err(Error::new(EOPNOTSUPP)), }); read_from(buf, &path.as_bytes(), &mut 0) @@ -1111,18 +1104,38 @@ impl Scheme for ProcScheme { let mut handle = self.handles.write().remove(&id).ok_or(Error::new(EBADF))?; handle.continue_ignored_children(); - if let Operation::Trace = handle.info.operation { - ptrace::close_session(handle.info.pid); + let stop_context = if handle.info.pid == context::context_id() { with_context_mut } else { try_stop_context }; - if handle.info.flags & O_EXCL == O_EXCL { - syscall::kill(handle.info.pid, SIGKILL)?; - } + match handle.info.operation { + Operation::AwaitingAddrSpaceChange { new, new_sp, new_ip } => stop_context(handle.info.pid, |context: &mut Context| unsafe { + if let Some(saved_regs) = ptrace::regs_for_mut(context) { + saved_regs.iret.rip = new_ip; + saved_regs.iret.rsp = new_sp; + } else { + context.clone_entry = Some([new_ip, new_sp]); + } - let contexts = context::contexts(); - if let Some(context) = contexts.get(handle.info.pid) { - let mut context = context.write(); - context.ptrace_stop = false; + context.set_addr_space(new); + Ok(()) + })?, + Operation::AwaitingFiletableChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { + context.files = new; + Ok(()) + })?, + Operation::Trace => { + ptrace::close_session(handle.info.pid); + + if handle.info.flags & O_EXCL == O_EXCL { + syscall::kill(handle.info.pid, SIGKILL)?; + } + + let contexts = context::contexts(); + if let Some(context) = contexts.get(handle.info.pid) { + let mut context = context.write(); + context.ptrace_stop = false; + } } + _ => (), } Ok(0) } diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 96101db6..3fce59a5 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -279,9 +279,13 @@ impl UserInner { Ok(0) } - fn fmap_inner(&self, file: usize, map: &Map, context_lock: &Arc>) -> Result { + fn fmap_inner(&self, file: usize, map: &Map) -> Result { let (pid, uid, gid, context_weak, desc) = { + let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); let context = context_lock.read(); + if map.size % PAGE_SIZE != 0 { + log::warn!("Unaligned map size for context {:?}", context.name.try_read().as_deref()); + } // TODO: Faster, cleaner mechanism to get descriptor let scheme = self.scheme_id.load(Ordering::SeqCst); let mut desc_res = Err(Error::new(EBADF)); @@ -298,9 +302,8 @@ impl UserInner { } } let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(context_lock), desc) + (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) }; - drop(context_lock); let address = self.capture(map)?; @@ -433,7 +436,7 @@ impl Scheme for UserScheme { fn fmap(&self, file: usize, map: &Map) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.fmap_inner(file, map, &Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?)) + inner.fmap_inner(file, map) } fn funmap(&self, grant_address: usize, size: usize) -> Result { @@ -535,9 +538,4 @@ impl Scheme for UserScheme { inner.call(SYS_CLOSE, file, 0, 0) } } -impl crate::scheme::KernelScheme for UserScheme { - fn kfmap(&self, number: usize, map: &Map, target_context: &Arc>) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - inner.fmap_inner(number, map, target_context) - } -} +impl crate::scheme::KernelScheme for UserScheme {} -- GitLab From 0b67997c7b171dd1637ede11036c4c1b581aa7e9 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 8 Jul 2022 12:35:27 +0200 Subject: [PATCH 17/44] Fix "id == current" check in set_addr_space. --- src/context/context.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/context/context.rs b/src/context/context.rs index 34e11b8c..3a008a5f 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -535,7 +535,7 @@ impl Context { } pub fn set_addr_space(&mut self, addr_space: Arc>) { let physaddr = addr_space.read().frame.utable.start_address(); - if self.running { + if self.id == super::context_id() { unsafe { RmmA::set_table(physaddr); } -- GitLab From 59d74689dc8f98965a688c4b4f18349f5d04a071 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 8 Jul 2022 12:36:36 +0200 Subject: [PATCH 18/44] Copy filetable more efficiently. --- src/context/memory.rs | 4 ++-- src/scheme/proc.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/context/memory.rs b/src/context/memory.rs index fd6315c9..255723e7 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -251,8 +251,8 @@ impl UserGrants { let exactly_after_size = holes.remove(&grant.end_address()); // There was a range that began exactly prior to the to-be-freed region, so simply - // increment the size such that it occupies the grant too. If in additional there was a - // grant directly after the grant, include it too in the size. + // increment the size such that it occupies the grant too. If in addition there was a grant + // directly after the grant, include it too in the size. if let Some((hole_offset, hole_size)) = holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { *hole_size = grant.end_address().data() - hole_offset.data() + exactly_after_size.unwrap_or(0); } else { diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 05da3cf9..28225a68 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -435,7 +435,7 @@ impl Scheme for ProcScheme { if buf != b"copy" { return Err(Error::new(EINVAL)); } - let new_filetable = Arc::try_new(RwLock::new(filetable.read().iter().cloned().collect::>())).map_err(|_| Error::new(ENOMEM))?; + let new_filetable = Arc::try_new(RwLock::new(filetable.read().clone())).map_err(|_| Error::new(ENOMEM))?; Handle { info: Info { -- GitLab From 549c0233989e2699a4b4efff49eb8f10730ce2ae Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 10 Jul 2022 15:18:42 +0200 Subject: [PATCH 19/44] Return correct bytes written in proc:X/addrspace. --- src/scheme/proc.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 28225a68..a4a43ff0 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -820,12 +820,7 @@ impl Scheme for ProcScheme { // way, we know what hardware threads are using any given page table, which we need // to know while doing TLB shootdown. - /*if is_active { - with_context_mut(pid, callback)?; - } else { - try_stop_context(pid, callback)?; - }*/ - Ok(3 * mem::size_of::()) + Ok((3 + usize::from(src_address.is_some())) * mem::size_of::()) } Operation::Regs(kind) => match kind { RegsKind::Float => { -- GitLab From 60e3e0af34627da4958e94480ddb170e66a461af Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 10 Jul 2022 15:19:57 +0200 Subject: [PATCH 20/44] Flush less frequently in funmap. --- src/syscall/fs.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 21fc6922..40ea1dd6 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -8,7 +8,7 @@ use crate::context::file::{FileDescriptor, FileDescription}; use crate::context::memory::Region; use crate::context; use crate::memory::PAGE_SIZE; -use crate::paging::VirtualAddress; +use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind, VirtualAddress}; use crate::scheme::{self, FileHandle}; use crate::syscall::data::{Packet, Stat}; use crate::syscall::error::*; @@ -479,6 +479,7 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { let virtual_address = VirtualAddress::new(virtual_address); let requested = Region::new(virtual_address, length); + let mut flusher = PageFlushAll::new(); { let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); @@ -506,10 +507,9 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { if let Some(after) = after { grants.insert(after); } - use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind}; // Remove irrelevant region - grant.unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new()); + grant.unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, &mut flusher); } } -- GitLab From b141cdaad22bf32f2a52f867751db9effa376eb7 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 10 Jul 2022 15:21:33 +0200 Subject: [PATCH 21/44] Fail if funmap's length isn't page size divisible. --- src/syscall/fs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 40ea1dd6..38e2cd31 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -471,7 +471,7 @@ pub fn fstat(fd: FileHandle, stat: &mut Stat) -> Result { pub fn funmap(virtual_address: usize, length: usize) -> Result { if virtual_address == 0 || length == 0 { return Ok(0); - } else if virtual_address % PAGE_SIZE != 0 { + } else if virtual_address % PAGE_SIZE != 0 || length % PAGE_SIZE != 0 { return Err(Error::new(EINVAL)); } -- GitLab From 4aea0cfd0c45d7017acc82a6c915170f337d5848 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 11 Jul 2022 12:39:08 +0200 Subject: [PATCH 22/44] Fix AddrSpace memory leak. --- src/context/context.rs | 7 ++++--- src/context/memory.rs | 37 ++++++++++++++++--------------------- src/scheme/proc.rs | 32 +++++++++++++++++++++----------- src/scheme/user.rs | 8 ++++---- 4 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 3a008a5f..082ef9e4 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -359,7 +359,7 @@ impl Context { sigstack: None, clone_entry: None, }; - this.set_addr_space(new_addrspace()?.1); + let _ = this.set_addr_space(new_addrspace()?); Ok(this) } @@ -533,7 +533,8 @@ impl Context { pub fn addr_space(&self) -> Result<&Arc>> { self.addr_space.as_ref().ok_or(Error::new(ESRCH)) } - pub fn set_addr_space(&mut self, addr_space: Arc>) { + #[must_use = "grants must be manually unmapped, otherwise it WILL panic!"] + pub fn set_addr_space(&mut self, addr_space: Arc>) -> Option>> { let physaddr = addr_space.read().frame.utable.start_address(); if self.id == super::context_id() { unsafe { @@ -542,6 +543,6 @@ impl Context { } self.arch.set_page_utable(physaddr.data()); - self.addr_space = Some(addr_space); + self.addr_space.replace(addr_space) } } diff --git a/src/context/memory.rs b/src/context/memory.rs index 255723e7..2ce74606 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -35,6 +35,14 @@ pub fn page_flags(flags: MapFlags) -> PageFlags { .write(flags.contains(MapFlags::PROT_WRITE)) //TODO: PROT_READ } +pub fn map_flags(page_flags: PageFlags) -> MapFlags { + let mut flags = MapFlags::PROT_READ; + if page_flags.has_write() { flags |= MapFlags::PROT_WRITE; } + if page_flags.has_execute() { flags |= MapFlags::PROT_EXEC; } + // TODO: MAP_SHARED/MAP_PRIVATE (requires that grants keep track of what they borrow and if + // they borrow shared or CoW). + flags +} pub struct UnmapResult { pub file_desc: Option, @@ -47,31 +55,19 @@ impl Drop for UnmapResult { } } -int_like!(PtId, usize); - -static ADDRSPACES: RwLock>>> = RwLock::new(BTreeMap::new()); -static NEXT_PTID: atomic::AtomicUsize = atomic::AtomicUsize::new(1); - -pub fn new_addrspace() -> Result<(PtId, Arc>)> { - let id = PtId::from(NEXT_PTID.fetch_add(1, atomic::Ordering::Relaxed)); - let arc = Arc::try_new(RwLock::new(AddrSpace::new(id)?)).map_err(|_| Error::new(ENOMEM))?; - ADDRSPACES.write().insert(id, Arc::clone(&arc)); - Ok((id, arc)) -} -pub fn addrspace(id: PtId) -> Option>> { - ADDRSPACES.read().get(&id).map(Arc::clone) +pub fn new_addrspace() -> Result>> { + Arc::try_new(RwLock::new(AddrSpace::new()?)).map_err(|_| Error::new(ENOMEM)) } #[derive(Debug)] pub struct AddrSpace { pub frame: Tables, pub grants: UserGrants, - pub id: PtId, } impl AddrSpace { /// Attempt to clone an existing address space so that all mappings are copied (CoW). - pub fn try_clone(&self) -> Result<(PtId, Arc>)> { - let (id, mut new) = new_addrspace()?; + pub fn try_clone(&self) -> Result>> { + let mut new = new_addrspace()?; // TODO: Abstract away this. let (mut inactive, mut active); @@ -111,13 +107,12 @@ impl AddrSpace { new.write().grants.insert(new_grant); } - Ok((id, new)) + Ok(new) } - pub fn new(id: PtId) -> Result { + pub fn new() -> Result { Ok(Self { grants: UserGrants::new(), frame: setup_new_utable()?, - id, }) } pub fn is_current(&self) -> bool { @@ -271,7 +266,7 @@ impl UserGrants { } pub fn take(&mut self, region: &Region) -> Option { let grant = self.inner.take(region)?; - Self::unreserve(&mut self.holes, region); + Self::unreserve(&mut self.holes, grant.region()); Some(grant) } pub fn iter(&self) -> impl Iterator + '_ { @@ -560,7 +555,7 @@ impl Grant { size: page_count * PAGE_SIZE, }, flags, - mapped: !unmap, + mapped: true, owned, desc_opt, } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index a4a43ff0..83423f65 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,6 +1,6 @@ use crate::{ arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress}, - context::{self, Context, ContextId, Status, file::{FileDescription, FileDescriptor}, memory::{AddrSpace, Grant, new_addrspace, PtId, page_flags, Region}}, + context::{self, Context, ContextId, Status, file::{FileDescription, FileDescriptor}, memory::{AddrSpace, Grant, new_addrspace, map_flags, page_flags, Region}}, memory::PAGE_SIZE, ptrace, scheme::{self, AtomicSchemeId, FileHandle, KernelScheme, SchemeId}, @@ -8,7 +8,7 @@ use crate::{ FloatRegisters, IntRegisters, EnvRegisters, - data::{PtraceEvent, Stat}, + data::{Map, PtraceEvent, Stat}, error::*, flag::*, scheme::{calc_seek_offset_usize, Scheme}, @@ -450,8 +450,8 @@ impl Scheme for ProcScheme { let (operation, is_mem) = match buf { // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But // in that case, what scheme? - b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()?.1 }, false), - b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.read().try_clone()?.1 }, false), + b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()? }, false), + b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.read().try_clone()? }, false), b"mem" => (Operation::Memory { addrspace: Arc::clone(&addrspace) }, true), grant_handle if grant_handle.starts_with(b"grant-") => { @@ -552,7 +552,7 @@ impl Scheme for ProcScheme { let mut qwords = record_bytes.array_chunks_mut::<{mem::size_of::()}>(); qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.start_address().data())); qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.size())); - qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.flags().data() | if grant.desc_opt.is_some() { 0x8000_0000 } else { 0 })); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(map_flags(grant.flags()).bits() | if grant.desc_opt.is_some() { 0x8000_0000 } else { 0 })); qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.desc_opt.as_ref().map_or(0, |d| d.offset))); bytes_read += RECORD_SIZE; } @@ -739,10 +739,6 @@ impl Scheme for ProcScheme { Operation::AddrSpace { addrspace } => { // FIXME: Forbid upgrading external mappings. - let pid = self.handles.read() - .get(&id).ok_or(Error::new(EBADF))? - .info.pid; - let mut chunks = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); // Update grant mappings, like mprotect but allowed to target other contexts. let base = chunks.next().ok_or(Error::new(EINVAL))?; @@ -1110,7 +1106,21 @@ impl Scheme for ProcScheme { context.clone_entry = Some([new_ip, new_sp]); } - context.set_addr_space(new); + let prev_addr_space = context.set_addr_space(new); + + if let Some(prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { + // We are the last reference to the address space; therefore it must be + // unmapped. + + let mut table = unsafe { InactivePageTable::from_address(prev.frame.utable.start_address().data()) }; + + // TODO: Optimize away clearing of page tables? In that case, what about memory + // deallocation? + for grant in prev.grants.into_iter() { + grant.unmap(&mut table.mapper(), ()); + } + } + Ok(()) })?, Operation::AwaitingFiletableChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { @@ -1135,7 +1145,7 @@ impl Scheme for ProcScheme { Ok(0) } // TODO: Support borrowing someone else's memory. - fn fmap(&self, id: usize, map: &syscall::data::Map) -> Result { + fn fmap(&self, id: usize, map: &Map) -> Result { let description_lock = match self.handles.read().get(&id) { Some(Handle { info: Info { operation: Operation::GrantHandle { ref description }, .. }, .. }) => Arc::clone(description), _ => return Err(Error::new(EBADF)), diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 3fce59a5..2a6fd212 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -146,10 +146,10 @@ impl UserInner { let context_lock = context_weak.upgrade().ok_or(Error::new(ESRCH))?; let mut context = context_lock.write(); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - let mut addr_space = context.addr_space()?.write(); + let mut new_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; + let src_address = round_down_pages(address); let dst_address = round_down_pages(dst_address); let offset = address - src_address; @@ -178,14 +178,14 @@ impl UserInner { let context_lock = self.context.upgrade().ok_or(Error::new(ESRCH))?; let mut context = context_lock.write(); - let mut other_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; let mut addr_space = context.addr_space()?.write(); + let mut other_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; let region = match addr_space.grants.contains(VirtualAddress::new(address)).map(Region::from) { Some(region) => region, None => return Err(Error::new(EFAULT)), }; - addr_space.grants.take(®ion).unwrap().unmap(&mut other_table.mapper(), crate::paging::mapper::InactiveFlusher::new()); + addr_space.grants.take(®ion).unwrap().unmap(&mut other_table.mapper(), InactiveFlusher::new()); Ok(()) } -- GitLab From db3b834f19b47a425ceca0909a49133bfb486a58 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 11 Jul 2022 13:51:34 +0200 Subject: [PATCH 23/44] Also inherit pgid, umask, sigmask. --- src/scheme/proc.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 83423f65..fc2802ab 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1202,6 +1202,9 @@ fn inherit_context() -> Result { new_context.ens = current_context.ens; new_context.rns = current_context.rns; new_context.ppid = current_context.id; + new_context.pgid = current_context.pgid; + new_context.umask = current_context.umask; + new_context.sigmask = current_context.sigmask; // TODO: More to copy? -- GitLab From 94578efd1ea839e84cb52d6d741b7f8523cd8662 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 11 Jul 2022 13:55:21 +0200 Subject: [PATCH 24/44] Use alloc_zeroed when allocating FX. --- src/context/list.rs | 6 ++---- src/context/mod.rs | 10 ++++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/context/list.rs b/src/context/list.rs index dc115364..a7459550 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -80,13 +80,11 @@ impl ContextList { { let mut context = context_lock.write(); let mut fx = unsafe { - let ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; + // TODO: Alignment must match, the following can be UB. Use AlignedBox. + let ptr = crate::ALLOCATOR.alloc_zeroed(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; if ptr.is_null() { return Err(Error::new(ENOMEM)); } Box::from_raw(ptr) }; - for b in fx.iter_mut() { - *b = 0; - } let mut stack = vec![0; 65_536].into_boxed_slice(); let offset = stack.len() - mem::size_of::(); diff --git a/src/context/mod.rs b/src/context/mod.rs index 9268b142..6b80e0fc 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -57,10 +57,12 @@ pub fn init() { let mut contexts = contexts_mut(); let context_lock = contexts.new_context().expect("could not initialize first context"); let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]) }; - for b in fx.iter_mut() { - *b = 0; - } + let fx = unsafe { + // TODO: Alignment must match, the following can be UB. Use AlignedBox. + let ptr = crate::ALLOCATOR.alloc_zeroed(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; + assert!(!ptr.is_null(), "failed to allocate FX to kmain!"); + Box::from_raw(ptr) + }; context.arch.set_fx(fx.as_ptr() as usize); context.kfx = Some(fx); -- GitLab From 351d77ad9b469faf236c373ca0c88782602bf69c Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 11 Jul 2022 18:51:05 +0200 Subject: [PATCH 25/44] Improve floating point handling. --- src/context/arch/x86_64.rs | 43 ++++++++------------------------------ src/context/context.rs | 35 +++++++++++++++++++++++++++---- src/context/list.rs | 10 ++------- src/context/mod.rs | 11 ++-------- src/context/switch.rs | 4 ++-- src/memory/mod.rs | 9 +++++++- src/scheme/proc.rs | 2 +- 7 files changed, 55 insertions(+), 59 deletions(-) diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 09d0a79d..3066e503 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -13,6 +13,9 @@ pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; +pub const KFX_SIZE: usize = 512; +pub const KFX_ALIGN: usize = 16; + #[derive(Clone, Debug)] #[repr(C)] pub struct Context { @@ -46,21 +49,11 @@ pub struct Context { /// running. With fsgsbase, this is neither saved nor restored upon every syscall (there is no /// need to!), and thus it must be re-read from the register before copying this struct. pub(crate) gsbase: usize, - /// FX valid? - loadable: AbiCompatBool, -} - -#[repr(u8)] -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum AbiCompatBool { - False, - True, } impl Context { pub fn new() -> Context { Context { - loadable: AbiCompatBool::False, fx: 0, cr3: 0, rflags: 0, @@ -80,10 +73,7 @@ impl Context { self.cr3 } - pub fn get_fx_regs(&self) -> Option { - if self.loadable == AbiCompatBool::False { - return None; - } + pub fn get_fx_regs(&self) -> FloatRegisters { let mut regs = unsafe { *(self.fx as *const FloatRegisters) }; regs._reserved = 0; let mut new_st = regs.st_space; @@ -92,14 +82,10 @@ impl Context { *st &= !ST_RESERVED; } regs.st_space = new_st; - Some(regs) + regs } - pub fn set_fx_regs(&mut self, mut new: FloatRegisters) -> bool { - if self.loadable == AbiCompatBool::False { - return false; - } - + pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { { let old = unsafe { &*(self.fx as *const FloatRegisters) }; new._reserved = old._reserved; @@ -117,7 +103,6 @@ impl Context { unsafe { *(self.fx as *mut FloatRegisters) = new; } - true } pub fn set_fx(&mut self, address: usize) { @@ -220,20 +205,12 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { // save processor SSE/FPU/AVX state in `prev.fx` pointee fxsave64 [rax] - // set `prev.loadable` to true - mov BYTE PTR [rdi + {off_loadable}], {true} - // compare `next.loadable` with true - cmp BYTE PTR [rsi + {off_loadable}], {true} - je 3f - - fninit - jmp 3f - -2: + // load `next.fx` mov rax, [rsi + {off_fx}] + + // load processor SSE/FPU/AVX state from `next.fx` pointee fxrstor64 [rax] -3: // Save the current CR3, and load the next CR3 if not identical mov rcx, cr3 mov [rdi + {off_cr3}], rcx @@ -292,7 +269,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { off_fx = const(offset_of!(Cx, fx)), off_cr3 = const(offset_of!(Cx, cr3)), off_rflags = const(offset_of!(Cx, rflags)), - off_loadable = const(offset_of!(Cx, loadable)), off_rbx = const(offset_of!(Cx, rbx)), off_r12 = const(offset_of!(Cx, r12)), @@ -308,7 +284,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { MSR_FSBASE = const(x86::msr::IA32_FS_BASE), MSR_KERNELGSBASE = const(x86::msr::IA32_KERNEL_GSBASE), - true = const(AbiCompatBool::True as u8), switch_hook = sym crate::context::switch_finish_hook, options(noreturn), ); diff --git a/src/context/context.rs b/src/context/context.rs index 082ef9e4..2c8b2ef9 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -18,6 +18,7 @@ use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; use crate::context::memory::{AddrSpace, new_addrspace, UserGrants}; use crate::ipi::{ipi, IpiKind, IpiTarget}; +use crate::memory::Enomem; use crate::scheme::{SchemeNamespace, FileHandle}; use crate::sync::WaitMap; @@ -219,11 +220,11 @@ pub struct Context { /// The architecture specific context pub arch: arch::Context, /// Kernel FX - used to store SIMD and FPU registers on context switch - pub kfx: Option>, + pub kfx: Option>, /// Kernel stack pub kstack: Option>, /// Kernel signal backup: Registers, Kernel FX, Kernel Stack, Signal number - pub ksig: Option<(arch::Context, Option>, Option>, u8)>, + pub ksig: Option<(arch::Context, Option>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, /// Address space containing a page table lock, and grants. Normally this will have a value, @@ -278,14 +279,14 @@ impl AlignedBox { } }; #[inline(always)] - pub fn try_zeroed() -> Result + pub fn try_zeroed() -> Result where T: ValidForZero, { Ok(unsafe { let ptr = crate::ALLOCATOR.alloc_zeroed(Self::LAYOUT); if ptr.is_null() { - return Err(Error::new(ENOMEM))?; + return Err(Enomem)?; } Self { inner: Unique::new_unchecked(ptr.cast()), @@ -307,6 +308,25 @@ impl Drop for AlignedBox { } } } +impl core::ops::Deref for AlignedBox { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.inner.as_ptr() } + } +} +impl core::ops::DerefMut for AlignedBox { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.inner.as_ptr() } + } +} +impl Clone for AlignedBox { + fn clone(&self) -> Self { + let mut new = Self::try_zeroed().unwrap_or_else(|_| alloc::alloc::handle_alloc_error(Self::LAYOUT)); + T::clone_from(&mut new, self); + new + } +} impl Context { pub fn new(id: ContextId) -> Result { @@ -545,4 +565,11 @@ impl Context { self.arch.set_page_utable(physaddr.data()); self.addr_space.replace(addr_space) } + + pub fn init_fx(&mut self) -> Result<(), Enomem> { + let mut fx = AlignedBox::<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>::try_zeroed()?; + self.arch.set_fx(fx.as_mut_ptr() as usize); + self.kfx = Some(fx); + Ok(()) + } } diff --git a/src/context/list.rs b/src/context/list.rs index a7459550..7019a14c 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -79,12 +79,8 @@ impl ContextList { let context_lock = self.new_context()?; { let mut context = context_lock.write(); - let mut fx = unsafe { - // TODO: Alignment must match, the following can be UB. Use AlignedBox. - let ptr = crate::ALLOCATOR.alloc_zeroed(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; - if ptr.is_null() { return Err(Error::new(ENOMEM)); } - Box::from_raw(ptr) - }; + context.init_fx()?; + let mut stack = vec![0; 65_536].into_boxed_slice(); let offset = stack.len() - mem::size_of::(); @@ -102,9 +98,7 @@ impl ContextList { context.arch.set_context_handle(); } - context.arch.set_fx(fx.as_ptr() as usize); context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kfx = Some(fx); context.kstack = Some(stack); } Ok(context_lock) diff --git a/src/context/mod.rs b/src/context/mod.rs index 6b80e0fc..a45efdc9 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -57,15 +57,8 @@ pub fn init() { let mut contexts = contexts_mut(); let context_lock = contexts.new_context().expect("could not initialize first context"); let mut context = context_lock.write(); - let fx = unsafe { - // TODO: Alignment must match, the following can be UB. Use AlignedBox. - let ptr = crate::ALLOCATOR.alloc_zeroed(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]; - assert!(!ptr.is_null(), "failed to allocate FX to kmain!"); - Box::from_raw(ptr) - }; - - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); + context.init_fx().expect("failed to allocate FX for first context"); + context.status = Status::Runnable; context.running = true; context.cpu_id = Some(crate::cpu_id()); diff --git a/src/context/switch.rs b/src/context/switch.rs index 9d7c38c1..e6411425 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -30,13 +30,13 @@ unsafe fn update(context: &mut Context, cpu_id: usize) { context.arch = ksig.0; if let Some(ref mut kfx) = context.kfx { - kfx.clone_from_slice(&ksig.1.expect("context::switch: ksig kfx not set with ksig_restore")); + kfx.copy_from_slice(&*ksig.1.expect("context::switch: ksig kfx not set with ksig_restore")); } else { panic!("context::switch: kfx not set with ksig_restore"); } if let Some(ref mut kstack) = context.kstack { - kstack.clone_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); + kstack.copy_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); } else { panic!("context::switch: kstack not set with ksig_restore"); } diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 42dc53bf..e72f5afd 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -10,7 +10,8 @@ use rmm::{ FrameAllocator, FrameCount, }; -use syscall::{PartialAllocStrategy, PhysallocFlags}; +use crate::syscall::flag::{PartialAllocStrategy, PhysallocFlags}; +use crate::syscall::error::{ENOMEM, Error}; /// A memory map area #[derive(Copy, Clone, Debug, Default)] @@ -125,3 +126,9 @@ impl Iterator for FrameIter { #[derive(Debug)] pub struct Enomem; + +impl From for Error { + fn from(_: Enomem) -> Self { + Self::new(ENOMEM) + } +} diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index fc2802ab..5318b995 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -575,7 +575,7 @@ impl Scheme for ProcScheme { // In the rare case of not having floating // point registers uninitiated, return // empty everything. - let fx = context.arch.get_fx_regs().unwrap_or_default(); + let fx = context.kfx.as_ref().map(|_| context.arch.get_fx_regs()).unwrap_or_default(); Ok((Output { float: fx }, mem::size_of::())) })?, RegsKind::Int => try_stop_context(info.pid, |context| match unsafe { ptrace::regs_for(&context) } { -- GitLab From 1cdd462244a1bfe49d8d2bee7440b59e9d696da5 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Tue, 12 Jul 2022 14:09:55 +0200 Subject: [PATCH 26/44] Move the initfs scheme to userspace. --- Cargo.lock | 9 -- Cargo.toml | 1 - src/arch/x86_64/start.rs | 95 +++++++------- src/lib.rs | 47 +++---- src/scheme/initfs.rs | 276 --------------------------------------- src/scheme/live.rs | 2 +- src/scheme/mod.rs | 5 - src/scheme/sys/mod.rs | 2 +- src/syscall/process.rs | 37 ++---- 9 files changed, 74 insertions(+), 400 deletions(-) delete mode 100644 src/scheme/initfs.rs diff --git a/Cargo.lock b/Cargo.lock index 7c81df02..8c23a1fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,7 +76,6 @@ dependencies = [ "log", "memoffset", "raw-cpuid", - "redox-initfs", "redox_syscall", "rmm", "rustc-cfg", @@ -147,14 +146,6 @@ dependencies = [ "bitflags", ] -[[package]] -name = "redox-initfs" -version = "0.1.0" -source = "git+https://gitlab.redox-os.org/redox-os/redox-initfs.git#89b8fb8984cf96c418880b7dcd9ce3d6afc3f71c" -dependencies = [ - "plain", -] - [[package]] name = "redox_syscall" version = "0.2.16" diff --git a/Cargo.toml b/Cargo.toml index 2bd3aa7e..182aac59 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,6 @@ slab_allocator = { path = "slab_allocator", optional = true } # FIXME: There is some undefined behavior probably in the kernel, which forces us to use spin 0.9.0 and not 0.9.2. spin = "=0.9.0" rmm = { path = "rmm", default-features = false } -redox-initfs = { git = "https://gitlab.redox-os.org/redox-os/redox-initfs.git", features = ["kernel"], default-features = false } [dependencies.goblin] version = "0.2.1" diff --git a/src/arch/x86_64/start.rs b/src/arch/x86_64/start.rs index f7e5225e..fb4c8a34 100644 --- a/src/arch/x86_64/start.rs +++ b/src/arch/x86_64/start.rs @@ -39,12 +39,12 @@ static BSP_READY: AtomicBool = AtomicBool::new(false); #[repr(packed)] pub struct KernelArgs { - kernel_base: u64, - kernel_size: u64, - stack_base: u64, - stack_size: u64, - env_base: u64, - env_size: u64, + kernel_base: usize, + kernel_size: usize, + stack_base: usize, + stack_size: usize, + env_base: usize, + env_size: usize, /// The base 64-bit pointer to an array of saved RSDPs. It's up to the kernel (and possibly /// userspace), to decide which RSDP to use. The buffer will be a linked list containing a @@ -53,36 +53,26 @@ pub struct KernelArgs { /// This field can be NULL, and if so, the system has not booted with UEFI or in some other way /// retrieved the RSDPs. The kernel or a userspace driver will thus try searching the BIOS /// memory instead. On UEFI systems, BIOS-like searching is not guaranteed to actually work though. - acpi_rsdps_base: u64, + acpi_rsdps_base: usize, /// The size of the RSDPs region. - acpi_rsdps_size: u64, + acpi_rsdps_size: usize, - areas_base: u64, - areas_size: u64, + areas_base: usize, + areas_size: usize, - /// The physical base 64-bit pointer to the contiguous initfs. - initfs_base: u64, - initfs_size: u64, + /// The physical base 64-bit pointer to the contiguous bootstrap/initfs. + bootstrap_base: usize, + /// Size of contiguous bootstrap/initfs physical region, not necessarily page aligned. + bootstrap_size: usize, + /// Entry point the kernel will jump to. + bootstrap_entry: usize, } /// The entry to Rust, all things must be initialized #[no_mangle] pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { - let env = { - let args = &*args_ptr; - - let kernel_base = args.kernel_base as usize; - let kernel_size = args.kernel_size as usize; - let stack_base = args.stack_base as usize; - let stack_size = args.stack_size as usize; - let env_base = args.env_base as usize; - let env_size = args.env_size as usize; - let acpi_rsdps_base = args.acpi_rsdps_base; - let acpi_rsdps_size = args.acpi_rsdps_size; - let areas_base = args.areas_base as usize; - let areas_size = args.areas_size as usize; - let initfs_base = args.initfs_base as usize; - let initfs_size = args.initfs_size as usize; + let bootstrap = { + let args = args_ptr.read(); // BSS should already be zero { @@ -90,12 +80,11 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { assert_eq!(DATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); } - KERNEL_BASE.store(kernel_base, Ordering::SeqCst); - KERNEL_SIZE.store(kernel_size, Ordering::SeqCst); + KERNEL_BASE.store(args.kernel_base, Ordering::SeqCst); + KERNEL_SIZE.store(args.kernel_size, Ordering::SeqCst); // Convert env to slice - let env = slice::from_raw_parts((env_base + crate::PHYS_OFFSET) as *const u8, env_size); - let initfs = slice::from_raw_parts((initfs_base + crate::PHYS_OFFSET) as *const u8, initfs_size); + let env = slice::from_raw_parts((args.env_base + crate::PHYS_OFFSET) as *const u8, args.env_size); // Set up graphical debug #[cfg(feature = "graphical_debug")] @@ -117,12 +106,13 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { }); info!("Redox OS starting..."); - info!("Kernel: {:X}:{:X}", kernel_base, kernel_base + kernel_size); - info!("Stack: {:X}:{:X}", stack_base, stack_base + stack_size); - info!("Env: {:X}:{:X}", env_base, env_base + env_size); - info!("RSDPs: {:X}:{:X}", acpi_rsdps_base, acpi_rsdps_base + acpi_rsdps_size); - info!("Areas: {:X}:{:X}", areas_base, areas_base + areas_size); - info!("Initfs: {:X}:{:X}", initfs_base, initfs_base + initfs_size); + info!("Kernel: {:X}:{:X}", args.kernel_base, args.kernel_base + args.kernel_size); + info!("Stack: {:X}:{:X}", args.stack_base, args.stack_base + args.stack_size); + info!("Env: {:X}:{:X}", args.env_base, args.env_base + args.env_size); + info!("RSDPs: {:X}:{:X}", args.acpi_rsdps_base, args.acpi_rsdps_base + args.acpi_rsdps_size); + info!("Areas: {:X}:{:X}", args.areas_base, args.areas_base + args.areas_size); + info!("Bootstrap: {:X}:{:X}", args.bootstrap_base, args.bootstrap_base + args.bootstrap_size); + info!("Bootstrap entry point: {:X}", args.bootstrap_entry); // Set up GDT before paging gdt::init(); @@ -132,19 +122,19 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { // Initialize RMM crate::arch::rmm::init( - kernel_base, kernel_size, - stack_base, stack_size, - env_base, env_size, - acpi_rsdps_base as usize, acpi_rsdps_size as usize, - areas_base, areas_size, - initfs_base, initfs_size, + args.kernel_base, args.kernel_size, + args.stack_base, args.stack_size, + args.env_base, args.env_size, + args.acpi_rsdps_base, args.acpi_rsdps_size, + args.areas_base, args.areas_size, + args.bootstrap_base, args.bootstrap_size, ); // Initialize paging let (mut active_table, tcb_offset) = paging::init(0); // Set up GDT after paging with TLS - gdt::init_paging(0, tcb_offset, stack_base + stack_size); + gdt::init_paging(0, tcb_offset, args.stack_base + args.stack_size); // Set up IDT idt::init_paging_bsp(); @@ -185,8 +175,8 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { // Read ACPI tables, starts APs #[cfg(feature = "acpi")] { - acpi::init(&mut active_table, if acpi_rsdps_base != 0 && acpi_rsdps_size > 0 { - Some((acpi_rsdps_base + crate::PHYS_OFFSET as u64, acpi_rsdps_size)) + acpi::init(&mut active_table, if args.acpi_rsdps_base != 0 && args.acpi_rsdps_size > 0 { + Some(((args.acpi_rsdps_base + crate::PHYS_OFFSET) as u64, args.acpi_rsdps_size as u64)) } else { None }); @@ -196,18 +186,21 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { // Initialize all of the non-core devices not otherwise needed to complete initialization device::init_noncore(); - crate::scheme::initfs::init(initfs); - // Stop graphical debug #[cfg(feature = "graphical_debug")] graphical_debug::fini(); BSP_READY.store(true, Ordering::SeqCst); - env + crate::Bootstrap { + base: crate::memory::Frame::containing_address(crate::paging::PhysicalAddress::new(args.bootstrap_base)), + page_count: args.bootstrap_size / crate::memory::PAGE_SIZE, + entry: args.bootstrap_entry, + env, + } }; - crate::kmain(CPU_COUNT.load(Ordering::SeqCst), env); + crate::kmain(CPU_COUNT.load(Ordering::SeqCst), bootstrap); } #[repr(packed)] diff --git a/src/lib.rs b/src/lib.rs index 80ba7463..f4f4c476 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,7 +74,6 @@ extern crate spin; #[cfg(feature = "slab")] extern crate slab_allocator; -use alloc::vec::Vec; use core::sync::atomic::{AtomicUsize, Ordering}; use crate::scheme::{FileHandle, SchemeNamespace}; @@ -169,48 +168,36 @@ pub fn cpu_count() -> usize { CPU_COUNT.load(Ordering::Relaxed) } -static mut INIT_ENV: &[u8] = &[]; - -/// Initialize userspace by running the initfs:bin/init process -/// This function will also set the CWD to initfs:bin and open debug: as stdio -pub extern fn userspace_init() { - let path = "initfs:/bin/bootstrap"; - - if let Err(err) = syscall::chdir("initfs:") { - info!("Failed to enter initfs ({}).", err); - panic!("Unexpected error while trying to enter initfs:."); - } - - let fd = syscall::open(path, syscall::flag::O_RDONLY).expect("failed to open init"); - - let mut total_bytes_read = 0; - let mut data = Vec::new(); - - loop { - data.resize(total_bytes_read + 4096, 0); - let bytes_read = syscall::file_op_mut_slice(syscall::number::SYS_READ, fd, &mut data[total_bytes_read..]).expect("failed to read init"); - if bytes_read == 0 { break } - total_bytes_read += bytes_read; - } - data.truncate(total_bytes_read); +pub fn init_env() -> &'static [u8] { + crate::BOOTSTRAP.get().expect("BOOTSTRAP was not set").env +} - let _ = syscall::close(fd); +pub extern "C" fn userspace_init() { + let bootstrap = crate::BOOTSTRAP.get().expect("BOOTSTRAP was not set"); + unsafe { crate::syscall::process::usermode_bootstrap(bootstrap) } +} - crate::syscall::process::usermode_bootstrap(data.into_boxed_slice()); +pub struct Bootstrap { + pub base: crate::memory::Frame, + pub page_count: usize, + pub entry: usize, + pub env: &'static [u8], } +static BOOTSTRAP: spin::Once = spin::Once::new(); /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this -pub fn kmain(cpus: usize, env: &'static [u8]) -> ! { +pub fn kmain(cpus: usize, bootstrap: Bootstrap) -> ! { CPU_ID.store(0, Ordering::SeqCst); CPU_COUNT.store(cpus, Ordering::SeqCst); - unsafe { INIT_ENV = env }; //Initialize the first context, stored in kernel/src/context/mod.rs context::init(); let pid = syscall::getpid(); info!("BSP: {:?} {}", pid, cpus); - info!("Env: {:?}", ::core::str::from_utf8(unsafe { INIT_ENV })); + info!("Env: {:?}", ::core::str::from_utf8(bootstrap.env)); + + BOOTSTRAP.call_once(|| bootstrap); match context::contexts_mut().spawn(userspace_init) { Ok(context_lock) => { diff --git a/src/scheme/initfs.rs b/src/scheme/initfs.rs deleted file mode 100644 index c881cbdf..00000000 --- a/src/scheme/initfs.rs +++ /dev/null @@ -1,276 +0,0 @@ -use core::convert::TryFrom; -use core::str; -use core::sync::atomic::{AtomicUsize, Ordering}; - -use alloc::collections::BTreeMap; -use alloc::string::String; -use alloc::vec::Vec; - -use spin::{Once, RwLock}; - -use redox_initfs::{InitFs, InodeStruct, Inode, InodeDir, InodeKind, types::Timespec}; - -use crate::syscall::data::Stat; -use crate::syscall::error::*; -use crate::syscall::flag::{MODE_DIR, MODE_FILE}; -use crate::syscall::scheme::{calc_seek_offset_usize, Scheme}; - -struct Handle { - inode: Inode, - seek: usize, - // TODO: Any better way to implement fpath? Or maybe work around it, e.g. by giving paths such - // as `initfs:__inodes__/`? - filename: String, -} - -static NEXT_ID: AtomicUsize = AtomicUsize::new(0); -static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); - -static FS: Once> = Once::new(); - -fn fs() -> Result> { - FS.get().copied().ok_or(Error::new(ENODEV)) -} -fn get_inode(inode: Inode) -> Result> { - fs()?.get_inode(inode).ok_or_else(|| Error::new(EIO)) -} - -pub fn init(bytes: &'static [u8]) { - let mut called = false; - - FS.call_once(|| { - called = true; - - InitFs::new(bytes) - .expect("failed to parse initfs header") - }); - - assert!(called, "called initfs::init more than once"); -} - -fn next_id() -> usize { - let old = NEXT_ID.fetch_add(1, Ordering::Relaxed); - assert_ne!(old, usize::MAX, "usize overflow in initfs scheme"); - old -} - -pub struct InitFsScheme; - -struct Iter { - dir: InodeDir<'static>, - idx: u32, -} -impl Iterator for Iter { - type Item = Result>; - - fn next(&mut self) -> Option { - let entry = self.dir.get_entry(self.idx).map_err(|_| Error::new(EIO)); - self.idx += 1; - entry.transpose() - } - fn size_hint(&self) -> (usize, Option) { - match self.dir.entry_count().ok() { - Some(size) => { - let size = usize::try_from(size).expect("expected u32 to be convertible into usize"); - (size, Some(size)) - } - None => (0, None), - } - } -} - -fn entries_iter(dir: InodeDir<'static>) -> impl IntoIterator>> + 'static { - let mut index = 0_u32; - - core::iter::from_fn(move || { - let idx = index; - index += 1; - - dir.get_entry(idx).map_err(|_| Error::new(EIO)).transpose() - }) -} -fn inode_len(inode: InodeStruct<'static>) -> Result { - Ok(match inode.kind() { - InodeKind::File(file) => file.data().map_err(|_| Error::new(EIO))?.len(), - InodeKind::Dir(dir) => (Iter { dir, idx: 0 }) - .fold(0, |len, entry| len + entry.and_then(|entry| entry.name().map_err(|_| Error::new(EIO))).map_or(0, |name| name.len() + 1)), - InodeKind::Unknown => return Err(Error::new(EIO)), - }) -} - -impl Scheme for InitFsScheme { - fn open(&self, path: &str, _flags: usize, _uid: u32, _gid: u32) -> Result { - let mut components = path - // trim leading and trailing slash - .trim_matches('/') - // divide into components - .split('/') - // filter out double slashes (e.g. /usr//bin/...) - .filter(|c| !c.is_empty()); - - let mut current_inode = InitFs::ROOT_INODE; - - while let Some(component) = components.next() { - match component { - "." => continue, - ".." => { - let _ = components.next_back(); - continue - } - - _ => (), - } - - let current_inode_struct = get_inode(current_inode)?; - - let dir = match current_inode_struct.kind() { - InodeKind::Dir(dir) => dir, - - // If we still have more components in the path, and the file tree for that - // particular branch is not all directories except the last, then that file cannot - // exist. - InodeKind::File(_) | InodeKind::Unknown => return Err(Error::new(ENOENT)), - }; - - let mut entries = Iter { - dir, - idx: 0, - }; - - current_inode = loop { - let entry_res = match entries.next() { - Some(e) => e, - None => return Err(Error::new(ENOENT)), - }; - let entry = entry_res?; - let name = entry.name().map_err(|_| Error::new(EIO))?; - if name == component.as_bytes() { - break entry.inode(); - } - }; - } - - let id = next_id(); - let old = HANDLES.write().insert(id, Handle { - inode: current_inode, - seek: 0_usize, - filename: path.into(), - }); - assert!(old.is_none()); - - Ok(id) - } - - fn read(&self, id: usize, buffer: &mut [u8]) -> Result { - let mut handles = HANDLES.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - match get_inode(handle.inode)?.kind() { - InodeKind::Dir(dir) => { - let mut bytes_read = 0; - let mut bytes_skipped = 0; - - for entry_res in (Iter { dir, idx: 0 }) { - let entry = entry_res?; - let name = entry.name().map_err(|_| Error::new(EIO))?; - let entry_len = name.len() + 1; - - let to_skip = core::cmp::min(handle.seek - bytes_skipped, entry_len); - let max_to_read = core::cmp::min(entry_len - to_skip, buffer.len()); - - let to_copy = entry_len.saturating_sub(to_skip).saturating_sub(1); - buffer[bytes_read..bytes_read + to_copy].copy_from_slice(&name[..to_copy]); - - if to_copy.saturating_sub(to_skip) == 1 { - buffer[bytes_read + to_copy] = b'\n'; - bytes_read += 1; - } - - bytes_read += to_copy; - bytes_skipped += to_skip; - } - - handle.seek = handle.seek.checked_add(bytes_read).ok_or(Error::new(EOVERFLOW))?; - - Ok(bytes_read) - } - InodeKind::File(file) => { - let data = file.data().map_err(|_| Error::new(EIO))?; - let src_buf = &data[core::cmp::min(handle.seek, data.len())..]; - - let to_copy = core::cmp::min(src_buf.len(), buffer.len()); - buffer[..to_copy].copy_from_slice(&src_buf[..to_copy]); - - handle.seek = handle.seek.checked_add(to_copy).ok_or(Error::new(EOVERFLOW))?; - - Ok(to_copy) - } - InodeKind::Unknown => return Err(Error::new(EIO)), - } - } - - fn seek(&self, id: usize, pos: isize, whence: usize) -> Result { - let mut handles = HANDLES.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - let new_offset = calc_seek_offset_usize(handle.seek, pos, whence, inode_len(get_inode(handle.inode)?)?)?; - handle.seek = new_offset as usize; - Ok(new_offset) - } - - fn fcntl(&self, id: usize, _cmd: usize, _arg: usize) -> Result { - let handles = HANDLES.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - Ok(0) - } - - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let handles = HANDLES.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - // TODO: Copy scheme part in kernel - let scheme_path = b"initfs:"; - let scheme_bytes = core::cmp::min(scheme_path.len(), buf.len()); - buf[..scheme_bytes].copy_from_slice(&scheme_path[..scheme_bytes]); - - let source = handle.filename.as_bytes(); - let path_bytes = core::cmp::min(buf.len() - scheme_bytes, source.len()); - buf[scheme_bytes..scheme_bytes + path_bytes].copy_from_slice(&source[..path_bytes]); - - Ok(scheme_bytes + path_bytes) - } - - fn fstat(&self, id: usize, stat: &mut Stat) -> Result { - let handles = HANDLES.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - let Timespec { sec, nsec } = fs()?.image_creation_time(); - - let inode = get_inode(handle.inode)?; - - stat.st_mode = inode.mode() | match inode.kind() { InodeKind::Dir(_) => MODE_DIR, InodeKind::File(_) => MODE_FILE, _ => 0 }; - stat.st_uid = inode.uid(); - stat.st_gid = inode.gid(); - stat.st_size = u64::try_from(inode_len(inode)?).unwrap_or(u64::MAX); - - stat.st_ctime = sec.get(); - stat.st_ctime_nsec = nsec.get(); - stat.st_mtime = sec.get(); - stat.st_mtime_nsec = nsec.get(); - - Ok(0) - } - - fn fsync(&self, id: usize) -> Result { - let handles = HANDLES.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - Ok(0) - } - - fn close(&self, id: usize) -> Result { - let _ = HANDLES.write().remove(&id).ok_or(Error::new(EBADF))?; - Ok(0) - } -} -impl crate::scheme::KernelScheme for InitFsScheme {} diff --git a/src/scheme/live.rs b/src/scheme/live.rs index b5d1e177..63877c81 100644 --- a/src/scheme/live.rs +++ b/src/scheme/live.rs @@ -37,7 +37,7 @@ impl DiskScheme { let mut phys = 0; let mut size = 0; - for line in str::from_utf8(unsafe { crate::INIT_ENV }).unwrap_or("").lines() { + for line in str::from_utf8(crate::init_env()).unwrap_or("").lines() { let mut parts = line.splitn(2, '='); let name = parts.next().unwrap_or(""); let value = parts.next().unwrap_or(""); diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index 6f66a44e..ac81fbbd 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -25,7 +25,6 @@ use self::acpi::AcpiScheme; use self::debug::DebugScheme; use self::event::EventScheme; -use self::initfs::InitFsScheme; use self::irq::IrqScheme; use self::itimer::ITimerScheme; use self::memory::MemoryScheme; @@ -46,9 +45,6 @@ pub mod debug; /// `event:` - allows reading of `Event`s which are registered using `fevent` pub mod event; -/// `initfs:` - a readonly filesystem used for initializing the system -pub mod initfs; - /// `irq:` - allows userspace handling of IRQs pub mod irq; @@ -166,7 +162,6 @@ impl SchemeList { self.insert(ns, "kernel/acpi", |scheme_id| Arc::new(AcpiScheme::new(scheme_id))).unwrap(); } self.insert(ns, "debug", |scheme_id| Arc::new(DebugScheme::new(scheme_id))).unwrap(); - self.insert(ns, "initfs", |_| Arc::new(InitFsScheme)).unwrap(); self.insert(ns, "irq", |scheme_id| Arc::new(IrqScheme::new(scheme_id))).unwrap(); self.insert(ns, "proc", |scheme_id| Arc::new(ProcScheme::new(scheme_id))).unwrap(); self.insert(ns, "thisproc", |_| Arc::new(ProcScheme::restricted())).unwrap(); diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs index 82cea67e..d8f1989c 100644 --- a/src/scheme/sys/mod.rs +++ b/src/scheme/sys/mod.rs @@ -52,7 +52,7 @@ impl SysScheme { files.insert("scheme_num", Box::new(scheme_num::resource)); files.insert("syscall", Box::new(syscall::resource)); files.insert("uname", Box::new(uname::resource)); - files.insert("env", Box::new(|| Ok(Vec::from(unsafe { crate::INIT_ENV })))); + files.insert("env", Box::new(|| Ok(Vec::from(crate::init_env())))); #[cfg(target_arch = "x86_64")] files.insert("spurious_irq", Box::new(irq::spurious_irq_resource)); diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 1a1e7259..7ad39bba 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -9,6 +9,7 @@ use spin::{RwLock, RwLockWriteGuard}; use crate::context::{Context, ContextId, memory, WaitpidKey}; +use crate::Bootstrap; use crate::context; use crate::interrupt; use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; @@ -624,28 +625,17 @@ pub fn waitpid(pid: ContextId, status_ptr: usize, flags: WaitFlags) -> Result) -> ! { - assert!(!data.is_empty()); - - const LOAD_BASE: usize = 0; +pub unsafe fn usermode_bootstrap(bootstrap: &Bootstrap) -> ! { + assert_ne!(bootstrap.page_count, 0); { - let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - - let grant = context::memory::Grant::zeroed(Page::containing_address(VirtualAddress::new(LOAD_BASE)), (data.len()+PAGE_SIZE-1)/PAGE_SIZE, PageFlags::new().user(true).write(true).execute(true), &mut active_table, PageFlushAll::new()).expect("failed to allocate memory for bootstrap"); - - - for (index, page) in grant.pages().enumerate() { - let len = if data.len() - index * PAGE_SIZE < PAGE_SIZE { data.len() % PAGE_SIZE } else { PAGE_SIZE }; - - let physaddr = active_table.translate_page(page) - .expect("expected mapped init memory to have a corresponding frame") - .start_address(); + let grant = context::memory::Grant::physmap( + bootstrap.base.start_address(), + VirtualAddress::new(0), + bootstrap.page_count * PAGE_SIZE, + PageFlags::new().user(true).write(true).execute(true), + ); - unsafe { - (RmmA::phys_to_virt(physaddr).data() as *mut u8).copy_from_nonoverlapping(data.as_ptr().add(index * PAGE_SIZE), len); - } - } context::contexts().current() .expect("expected a context to exist when executing init") .read().addr_space() @@ -653,12 +643,7 @@ pub fn usermode_bootstrap(mut data: Box<[u8]>) -> ! { .write().grants.insert(grant); } - drop(data); - #[cfg(target_arch = "x86_64")] - unsafe { - let start = ((LOAD_BASE + 0x18) as *mut usize).read(); - // Start with the (probably) ELF executable loaded, without any stack. - usermode(start, 0, 0, 0); - } + // Start in a minimal environment without any stack. + usermode(bootstrap.entry, 0, 0, 0); } -- GitLab From 8970ce1fe74cd1a865d7368bc0230dacdd77cbdc Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Wed, 13 Jul 2022 15:42:33 +0200 Subject: [PATCH 27/44] Benefit from addrspace abstraction in switch. --- src/context/arch/x86_64.rs | 202 +++++++++++++++---------------------- src/context/context.rs | 19 ++-- src/context/list.rs | 2 +- src/context/memory.rs | 60 +++++------ src/context/mod.rs | 12 +-- src/context/switch.rs | 14 +-- src/scheme/proc.rs | 8 +- src/syscall/process.rs | 6 +- 8 files changed, 128 insertions(+), 195 deletions(-) diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 3066e503..85855dc9 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -1,9 +1,13 @@ use core::mem; use core::sync::atomic::AtomicBool; +use alloc::sync::Arc; + +use crate::paging::{RmmA, RmmArch}; use crate::syscall::FloatRegisters; use memoffset::offset_of; +use spin::Once; /// This must be used by the kernel to ensure that context switches are done atomically /// Compare and exchange this to true when beginning a context switch on any CPU @@ -19,10 +23,6 @@ pub const KFX_ALIGN: usize = 16; #[derive(Clone, Debug)] #[repr(C)] pub struct Context { - /// FX location - fx: usize, - /// Page table pointer - cr3: usize, /// RFLAGS register rflags: usize, /// RBX register @@ -54,8 +54,6 @@ pub struct Context { impl Context { pub fn new() -> Context { Context { - fx: 0, - cr3: 0, rflags: 0, rbx: 0, r12: 0, @@ -69,12 +67,30 @@ impl Context { } } - pub fn get_page_utable(&self) -> usize { - self.cr3 + pub fn set_stack(&mut self, address: usize) { + self.rsp = address; + } + + pub unsafe fn signal_stack(&mut self, handler: extern fn(usize), sig: u8) { + self.push_stack(sig as usize); + self.push_stack(handler as usize); + self.push_stack(signal_handler_wrapper as usize); } + pub unsafe fn push_stack(&mut self, value: usize) { + self.rsp -= mem::size_of::(); + *(self.rsp as *mut usize) = value; + } + + pub unsafe fn pop_stack(&mut self) -> usize { + let value = *(self.rsp as *const usize); + self.rsp += mem::size_of::(); + value + } +} +impl super::Context { pub fn get_fx_regs(&self) -> FloatRegisters { - let mut regs = unsafe { *(self.fx as *const FloatRegisters) }; + let mut regs = unsafe { self.kfx.as_ptr().cast::().read() }; regs._reserved = 0; let mut new_st = regs.st_space; for st in &mut new_st { @@ -87,7 +103,7 @@ impl Context { pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { { - let old = unsafe { &*(self.fx as *const FloatRegisters) }; + let old = unsafe { &*(self.kfx.as_ptr().cast::()) }; new._reserved = old._reserved; let old_st = new.st_space; let mut new_st = new.st_space; @@ -101,94 +117,74 @@ impl Context { } unsafe { - *(self.fx as *mut FloatRegisters) = new; + self.kfx.as_mut_ptr().cast::().write(new); } } +} - pub fn set_fx(&mut self, address: usize) { - self.fx = address; - } - - pub fn set_page_utable(&mut self, address: usize) { - self.cr3 = address; - } +pub static EMPTY_CR3: Once = Once::new(); - pub fn set_stack(&mut self, address: usize) { - self.rsp = address; - } +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} - pub unsafe fn signal_stack(&mut self, handler: extern fn(usize), sig: u8) { - self.push_stack(sig as usize); - self.push_stack(handler as usize); - self.push_stack(signal_handler_wrapper as usize); - } +/// Switch to the next context by restoring its stack and registers +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + core::arch::asm!(" + fxsave64 [{prev_fx}] + fxrstor64 [{next_fx}] + ", prev_fx = in(reg) prev.kfx.as_mut_ptr(), + next_fx = in(reg) next.kfx.as_ptr(), + ); - pub unsafe fn push_stack(&mut self, value: usize) { - self.rsp -= mem::size_of::(); - *(self.rsp as *mut usize) = value; + { + use x86::{bits64::segmentation::*, msr}; + + // This is so much shorter in Rust! + + if cfg!(feature = "x86_fsgsbase") { + prev.arch.fsbase = rdfsbase() as usize; + wrfsbase(next.arch.fsbase as u64); + swapgs(); + prev.arch.gsbase = rdgsbase() as usize; + wrgsbase(next.arch.gsbase as u64); + swapgs(); + } else { + prev.arch.fsbase = msr::rdmsr(msr::IA32_FS_BASE) as usize; + msr::wrmsr(msr::IA32_FS_BASE, next.arch.fsbase as u64); + prev.arch.gsbase = msr::rdmsr(msr::IA32_KERNEL_GSBASE) as usize; + msr::wrmsr(msr::IA32_KERNEL_GSBASE, next.arch.gsbase as u64); + } } - pub unsafe fn pop_stack(&mut self) -> usize { - let value = *(self.rsp as *const usize); - self.rsp += mem::size_of::(); - value + match next.addr_space { + // Since Arc is essentially just wraps a pointer, in this case a regular pointer (as + // opposed to dyn or slice fat pointers), and NonNull optimization exists, map_or will + // hopefully be optimized down to checking prev and next pointers, as next cannot be null. + Some(ref next_space) => if prev.addr_space.as_ref().map_or(true, |prev_space| !Arc::ptr_eq(&prev_space, &next_space)) { + // Suppose we have two sibling threads A and B. A runs on CPU 0 and B on CPU 1. A + // recently called yield and is now here about to switch back. Meanwhile, B is + // currently creating a new mapping in their shared address space, for example a + // message on a channel. + // + // Unless we acquire this lock, it may be possible that the TLB will not contain new + // entries. While this can be caught and corrected in a page fault handler, this is not + // true when entries are removed from a page table! + let next_space = next_space.read(); + RmmA::set_table(next_space.frame.utable.start_address()); + } + None => { + RmmA::set_table(empty_cr3()); + } } + switch_to_inner(&mut prev.arch, &mut next.arch) } -macro_rules! load_msr( - ($name:literal, $offset:literal) => { - concat!(" - mov ecx, {", $name, "} - mov rdx, [rsi + {", $offset, "}] - mov eax, edx - shr rdx, 32 - - // MSR <= EDX:EAX - wrmsr - ") - } -); - -// NOTE: RAX is a scratch register and can be set to whatever. There is also no return -// value in switch_to, to it will also never be read. The same goes for RDX, and RCX. -// TODO: Use runtime code patching (perhaps in the bootloader) by pushing alternative code -// sequences into a specialized section, with some macro resembling Linux's `.ALTERNATIVE`. -#[cfg(feature = "x86_fsgsbase")] -macro_rules! switch_fsgsbase( - () => { - " - // placeholder: {MSR_FSBASE} {MSR_KERNELGSBASE} - - rdfsbase rax - mov [rdi + {off_fsbase}], rax - mov rax, [rsi + {off_fsbase}] - wrfsbase rax - - swapgs - rdgsbase rax - mov [rdi + {off_gsbase}], rax - mov rax, [rsi + {off_gsbase}] - wrgsbase rax - swapgs - " - } -); - -#[cfg(not(feature = "x86_fsgsbase"))] -macro_rules! switch_fsgsbase( - () => { - concat!( - load_msr!("MSR_FSBASE", "off_fsbase"), - load_msr!("MSR_KERNELGSBASE", "off_gsbase"), - ) - } -); - - -/// Switch to the next context by restoring its stack and registers -/// Check disassembly! +// Check disassembly! #[naked] -pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { +unsafe extern "sysv64" fn switch_to_inner(_prev: &mut Context, _next: &mut Context) { use Context as Cx; core::arch::asm!( @@ -199,28 +195,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we // store them here in the first place. concat!(" - // load `prev.fx` - mov rax, [rdi + {off_fx}] - - // save processor SSE/FPU/AVX state in `prev.fx` pointee - fxsave64 [rax] - - // load `next.fx` - mov rax, [rsi + {off_fx}] - - // load processor SSE/FPU/AVX state from `next.fx` pointee - fxrstor64 [rax] - - // Save the current CR3, and load the next CR3 if not identical - mov rcx, cr3 - mov [rdi + {off_cr3}], rcx - mov rax, [rsi + {off_cr3}] - cmp rax, rcx - - je 4f - mov cr3, rax - -4: // Save old registers, and load new ones mov [rdi + {off_rbx}], rbx mov rbx, [rsi + {off_rbx}] @@ -243,10 +217,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { mov [rdi + {off_rsp}], rsp mov rsp, [rsi + {off_rsp}] - ", - switch_fsgsbase!(), - " - // push RFLAGS (can only be modified via stack) pushfq // pop RFLAGS into `self.rflags` @@ -266,8 +236,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { "), - off_fx = const(offset_of!(Cx, fx)), - off_cr3 = const(offset_of!(Cx, cr3)), off_rflags = const(offset_of!(Cx, rflags)), off_rbx = const(offset_of!(Cx, rbx)), @@ -278,12 +246,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { off_rbp = const(offset_of!(Cx, rbp)), off_rsp = const(offset_of!(Cx, rsp)), - off_fsbase = const(offset_of!(Cx, fsbase)), - off_gsbase = const(offset_of!(Cx, gsbase)), - - MSR_FSBASE = const(x86::msr::IA32_FS_BASE), - MSR_KERNELGSBASE = const(x86::msr::IA32_KERNEL_GSBASE), - switch_hook = sym crate::context::switch_finish_hook, options(noreturn), ); diff --git a/src/context/context.rs b/src/context/context.rs index 2c8b2ef9..76248d6b 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -220,15 +220,17 @@ pub struct Context { /// The architecture specific context pub arch: arch::Context, /// Kernel FX - used to store SIMD and FPU registers on context switch - pub kfx: Option>, + pub kfx: AlignedBox<[u8; {arch::KFX_SIZE}], {arch::KFX_ALIGN}>, /// Kernel stack pub kstack: Option>, /// Kernel signal backup: Registers, Kernel FX, Kernel Stack, Signal number - pub ksig: Option<(arch::Context, Option>, Option>, u8)>, + pub ksig: Option<(arch::Context, AlignedBox<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, /// Address space containing a page table lock, and grants. Normally this will have a value, - /// but can be None while the context is being reaped. + /// but can be None while the context is being reaped or when a new context is created but has + /// not yet had its address space changed. Note that these are only for user mappings; kernel + /// mappings are universal and independent on address spaces or contexts. pub addr_space: Option>>, /// The name of the context pub name: Arc>>, @@ -358,7 +360,7 @@ impl Context { pending: VecDeque::new(), wake: None, arch: arch::Context::new(), - kfx: None, + kfx: AlignedBox::<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>::try_zeroed()?, kstack: None, ksig: None, ksig_restore: false, @@ -379,7 +381,6 @@ impl Context { sigstack: None, clone_entry: None, }; - let _ = this.set_addr_space(new_addrspace()?); Ok(this) } @@ -562,14 +563,6 @@ impl Context { } } - self.arch.set_page_utable(physaddr.data()); self.addr_space.replace(addr_space) } - - pub fn init_fx(&mut self) -> Result<(), Enomem> { - let mut fx = AlignedBox::<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>::try_zeroed()?; - self.arch.set_fx(fx.as_mut_ptr() as usize); - self.kfx = Some(fx); - Ok(()) - } } diff --git a/src/context/list.rs b/src/context/list.rs index 7019a14c..6594e4c7 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -79,7 +79,7 @@ impl ContextList { let context_lock = self.new_context()?; { let mut context = context_lock.write(); - context.init_fx()?; + let _ = context.set_addr_space(super::memory::new_addrspace()?); let mut stack = vec![0; 65_536].into_boxed_slice(); let offset = stack.len() - mem::size_of::(); diff --git a/src/context/memory.rs b/src/context/memory.rs index 2ce74606..eaed5db0 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -258,6 +258,7 @@ impl UserGrants { } } pub fn insert(&mut self, grant: Grant) { + assert!(self.conflicts(*grant).next().is_none()); self.reserve(&grant); self.inner.insert(grant); } @@ -663,9 +664,6 @@ pub const DANGLING: usize = 1 << (usize::BITS - 2); #[derive(Debug)] pub struct Tables { - #[cfg(target_arch = "aarch64")] - pub ktable: Frame, - pub utable: Frame, } @@ -673,9 +671,6 @@ impl Drop for Tables { fn drop(&mut self) { use crate::memory::deallocate_frames; deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.utable.start_address().data())), 1); - - #[cfg(target_arch = "aarch64")] - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.ktable.start_address().data())), 1); } } @@ -683,46 +678,37 @@ impl Drop for Tables { pub fn setup_new_utable() -> Result { let mut new_utable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; - // TODO: There is only supposed to be one ktable, right? Use a global variable to store the - // ktable (or access it from a control register) on architectures which have ktables, or obtain - // it from *any* utable on architectures which do not. - #[cfg(target_arch = "aarch64")] - let new_ktable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; - - let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - - #[cfg(target_arch = "aarch64")] - let ktable = &new_ktable; + #[cfg(target_arch = "x86_64")] + { + let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; + let mut new_ktable = unsafe { InactivePageTable::from_address(new_utable.start_address().data()) }; - #[cfg(not(target_arch = "aarch64"))] - let ktable = &new_utable; + let mut copy_mapping = |p4_no| { + let frame = active_ktable.p4()[p4_no].pointed_frame() + .unwrap_or_else(|| panic!("expected kernel PML {} to be mapped", p4_no)); + let flags = active_ktable.p4()[p4_no].flags(); - let mut new_mapper = unsafe { InactivePageTable::from_address(ktable.start_address().data()) }; - - let mut copy_mapping = |p4_no| { - let frame = active_ktable.p4()[p4_no].pointed_frame().expect("kernel image not mapped"); - let flags = active_ktable.p4()[p4_no].flags(); - - new_mapper.mapper().p4_mut()[p4_no].set(frame, flags); - }; - // TODO: Just copy all 256 mappings? + new_ktable.mapper().p4_mut()[p4_no].set(frame, flags); + }; + // TODO: Just copy all 256 mappings? Or copy KERNEL_PML4+KERNEL_PERCPU_PML4 (needed for + // paranoid ISRs which can occur anywhere; we don't want interrupts to triple fault!) and + // map lazily via page faults in the kernel. - // Copy kernel image mapping - copy_mapping(crate::KERNEL_PML4); + // Copy kernel image mapping + copy_mapping(crate::KERNEL_PML4); - // Copy kernel heap mapping - copy_mapping(crate::KERNEL_HEAP_PML4); + // Copy kernel heap mapping + copy_mapping(crate::KERNEL_HEAP_PML4); - // Copy physmap mapping - copy_mapping(crate::PHYS_PML4); + // Copy physmap mapping + copy_mapping(crate::PHYS_PML4); - // Copy kernel percpu (similar to TLS) mapping. - copy_mapping(crate::KERNEL_PERCPU_PML4); + // Copy kernel percpu (similar to TLS) mapping. + copy_mapping(crate::KERNEL_PERCPU_PML4); + } Ok(Tables { utable: new_utable, - #[cfg(target_arch = "aarch64")] - ktable: new_ktable, }) } diff --git a/src/context/mod.rs b/src/context/mod.rs index a45efdc9..c282c8a2 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -6,6 +6,8 @@ use core::alloc::{GlobalAlloc, Layout}; use core::sync::atomic::Ordering; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::paging::{RmmA, RmmArch}; + pub use self::context::{Context, ContextId, ContextSnapshot, Status, WaitpidKey}; pub use self::list::ContextList; pub use self::switch::switch; @@ -53,11 +55,14 @@ static CONTEXTS: RwLock = RwLock::new(ContextList::new()); #[thread_local] static CONTEXT_ID: context::AtomicContextId = context::AtomicContextId::default(); +pub use self::arch::empty_cr3; + pub fn init() { let mut contexts = contexts_mut(); let context_lock = contexts.new_context().expect("could not initialize first context"); let mut context = context_lock.write(); - context.init_fx().expect("failed to allocate FX for first context"); + + self::arch::EMPTY_CR3.call_once(|| unsafe { RmmA::table() }); context.status = Status::Runnable; context.running = true; @@ -65,11 +70,6 @@ pub fn init() { CONTEXT_ID.store(context.id, Ordering::SeqCst); } -/// Initialize contexts, called if needed -fn init_contexts() -> RwLock { - RwLock::new(ContextList::new()) -} - /// Get the global schemes list, const pub fn contexts() -> RwLockReadGuard<'static, ContextList> { CONTEXTS.read() diff --git a/src/context/switch.rs b/src/context/switch.rs index e6411425..8a565e56 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -29,11 +29,7 @@ unsafe fn update(context: &mut Context, cpu_id: usize) { let ksig = context.ksig.take().expect("context::switch: ksig not set with ksig_restore"); context.arch = ksig.0; - if let Some(ref mut kfx) = context.kfx { - kfx.copy_from_slice(&*ksig.1.expect("context::switch: ksig kfx not set with ksig_restore")); - } else { - panic!("context::switch: kfx not set with ksig_restore"); - } + context.kfx.copy_from_slice(&*ksig.1); if let Some(ref mut kstack) = context.kstack { kstack.copy_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); @@ -194,11 +190,11 @@ pub unsafe fn switch() -> bool { to_context.arch.signal_stack(signal_handler, sig); } - let from_arch_ptr: *mut arch::Context = &mut from_context_guard.arch; + let from_ptr: *mut Context = &mut *from_context_guard; core::mem::forget(from_context_guard); - let prev_arch: &mut arch::Context = &mut *from_arch_ptr; - let next_arch: &mut arch::Context = &mut to_context.arch; + let prev: &mut Context = &mut *from_ptr; + let next: &mut Context = &mut *to_context; // to_context_guard only exists as a raw pointer, but is still locked @@ -207,7 +203,7 @@ pub unsafe fn switch() -> bool { next_lock: to_context_lock, })); - arch::switch_to(prev_arch, next_arch); + arch::switch_to(prev, next); // NOTE: After switch_to is called, the return address can even be different from the // current return address, meaning that we cannot use local variables here, and that we diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 5318b995..3fa2a935 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -572,11 +572,7 @@ impl Scheme for ProcScheme { RegsKind::Float => with_context(info.pid, |context| { // NOTE: The kernel will never touch floats - // In the rare case of not having floating - // point registers uninitiated, return - // empty everything. - let fx = context.kfx.as_ref().map(|_| context.arch.get_fx_regs()).unwrap_or_default(); - Ok((Output { float: fx }, mem::size_of::())) + Ok((Output { float: context.get_fx_regs() }, mem::size_of::())) })?, RegsKind::Int => try_stop_context(info.pid, |context| match unsafe { ptrace::regs_for(&context) } { None => { @@ -835,7 +831,7 @@ impl Scheme for ProcScheme { // Ignore the rare case of floating point // registers being uninitiated - let _ = context.arch.set_fx_regs(regs); + let _ = context.set_fx_regs(regs); Ok(mem::size_of::()) }) diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 7ad39bba..4dd223ff 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -7,13 +7,13 @@ use core::mem; use spin::{RwLock, RwLockWriteGuard}; -use crate::context::{Context, ContextId, memory, WaitpidKey}; +use crate::context::{Context, ContextId, WaitpidKey}; use crate::Bootstrap; use crate::context; use crate::interrupt; use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmA, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; +use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; use crate::ptrace; use crate::start::usermode; use crate::syscall::data::SigAction; @@ -43,7 +43,7 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + let mut new_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; grant.unmap(&mut new_table.mapper(), &mut InactiveFlusher::new()) } else { -- GitLab From 0aec4d3341b470d9d82b0ba00efb8a68c1b5b04d Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:07:56 +0200 Subject: [PATCH 28/44] Partial: migrate ACPI to RMM. --- src/acpi/hpet.rs | 23 ++++++++++-------- src/acpi/madt.rs | 28 ++++++++++++++++------ src/acpi/mod.rs | 61 +++++++++++++++++++++++++---------------------- src/acpi/rsdp.rs | 14 ++++++----- src/acpi/rxsdt.rs | 9 ++++--- 5 files changed, 81 insertions(+), 54 deletions(-) diff --git a/src/acpi/hpet.rs b/src/acpi/hpet.rs index 498a6681..18e9ee09 100644 --- a/src/acpi/hpet.rs +++ b/src/acpi/hpet.rs @@ -3,7 +3,7 @@ use core::{mem, ptr}; use core::intrinsics::{volatile_load, volatile_store}; use crate::memory::Frame; -use crate::paging::{ActivePageTable, PhysicalAddress, Page, PageFlags, VirtualAddress}; +use crate::paging::{KernelMapper, PhysicalAddress, PageFlags}; use super::sdt::Sdt; use super::{ACPI_TABLE, find_sdt}; @@ -35,10 +35,10 @@ pub struct Hpet { } impl Hpet { - pub fn init(active_table: &mut ActivePageTable) { + pub fn init() { let hpet_sdt = find_sdt("HPET"); let hpet = if hpet_sdt.len() == 1 { - Hpet::new(hpet_sdt[0], active_table) + Hpet::new(hpet_sdt[0]) } else { println!("Unable to find HPET"); return; @@ -52,10 +52,10 @@ impl Hpet { } } - pub fn new(sdt: &'static Sdt, active_table: &mut ActivePageTable) -> Option { + pub fn new(sdt: &'static Sdt) -> Option { if &sdt.signature == b"HPET" && sdt.length as usize >= mem::size_of::() { let s = unsafe { ptr::read((sdt as *const Sdt) as *const Hpet) }; - unsafe { s.base_address.init(active_table) }; + unsafe { s.base_address.init(&mut KernelMapper::lock()) }; Some(s) } else { None @@ -64,18 +64,21 @@ impl Hpet { } impl GenericAddressStructure { - pub unsafe fn init(&self, active_table: &mut ActivePageTable) { - let page = Page::containing_address(VirtualAddress::new(self.address as usize)); + pub unsafe fn init(&self, mapper: &mut KernelMapper) { let frame = Frame::containing_address(PhysicalAddress::new(self.address as usize)); - let result = active_table.map_to(page, frame, PageFlags::new().write(true)); + let (_, result) = mapper + .get_mut() + .expect("KernelMapper locked re-entrant while mapping memory for GenericAddressStructure") + .map_linearly(frame.start_address(), PageFlags::new().write(true)) + .expect("failed to map memory for GenericAddressStructure"); result.flush(); } pub unsafe fn read_u64(&self, offset: usize) -> u64{ - volatile_load((self.address as usize + offset) as *const u64) + volatile_load((self.address as usize + offset + crate::PHYS_OFFSET) as *const u64) } pub unsafe fn write_u64(&mut self, offset: usize, value: u64) { - volatile_store((self.address as usize + offset) as *mut u64, value); + volatile_store((self.address as usize + offset + crate::PHYS_OFFSET) as *mut u64, value); } } diff --git a/src/acpi/madt.rs b/src/acpi/madt.rs index f6dabb65..af8db07c 100644 --- a/src/acpi/madt.rs +++ b/src/acpi/madt.rs @@ -1,7 +1,7 @@ use core::mem; use crate::memory::{allocate_frames, Frame}; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, VirtualAddress}; use super::sdt::Sdt; use super::find_sdt; @@ -28,7 +28,7 @@ pub static mut MADT: Option = None; pub const FLAG_PCAT: u32 = 1; impl Madt { - pub fn init(active_table: &mut ActivePageTable) { + pub fn init() { let madt_sdt = find_sdt("APIC"); let madt = if madt_sdt.len() == 1 { Madt::new(madt_sdt[0]) @@ -53,10 +53,18 @@ impl Madt { } if cfg!(feature = "multi_core") { + let mut mapper = KernelMapper::lock(); // Map trampoline let trampoline_frame = Frame::containing_address(PhysicalAddress::new(TRAMPOLINE)); let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); - let result = active_table.map_to(trampoline_page, trampoline_frame, PageFlags::new().execute(true).write(true)); //TODO: do not have writable and executable! + let result = unsafe { + //TODO: do not have writable and executable! + mapper + .get_mut() + .expect("expected kernel page table not to be recursively locked while initializing MADT") + .map_phys(trampoline_page.start_address(), trampoline_frame.start_address(), PageFlags::new().execute(true).write(true)) + .expect("failed to map trampoline") + }; result.flush(); // Write trampoline, make sure TRAMPOLINE page is free for use @@ -90,7 +98,7 @@ impl Madt { // Set the ap_ready to 0, volatile unsafe { atomic_store(ap_ready, 0) }; unsafe { atomic_store(ap_cpu_id, ap_local_apic.id as u64) }; - unsafe { atomic_store(ap_page_table, active_table.address() as u64) }; + unsafe { atomic_store(ap_page_table, mapper.table().phys().data() as u64) }; unsafe { atomic_store(ap_stack_start, stack_start as u64) }; unsafe { atomic_store(ap_stack_end, stack_end as u64) }; unsafe { atomic_store(ap_code, kstart_ap as u64) }; @@ -137,7 +145,7 @@ impl Madt { } println!(" Ready"); - active_table.flush_all(); + unsafe { RmmA::invalidate_all(); } } else { println!(" CPU Disabled"); } @@ -147,8 +155,14 @@ impl Madt { } // Unmap trampoline - let (result, _frame) = active_table.unmap_return(trampoline_page, false); - result.flush(); + let (_frame, _, flush) = unsafe { + mapper + .get_mut() + .expect("expected kernel page table not to be recursively locked while initializing MADT") + .unmap_phys(trampoline_page.start_address()) + .expect("failed to unmap trampoline page") + }; + flush.flush(); } } } diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index d65696e8..df8ee781 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -10,7 +10,7 @@ use spin::{Once, RwLock}; use crate::log::info; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, VirtualAddress}; use self::madt::Madt; use self::rsdt::Rsdt; @@ -28,31 +28,33 @@ mod xsdt; mod rxsdt; mod rsdp; -pub fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'static Sdt { - { - let page = Page::containing_address(VirtualAddress::new(sdt_address + crate::PHYS_OFFSET)); - if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(sdt_address)); - let result = active_table.map_to(page, frame, PageFlags::new()); - result.flush(); - } +unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::paging::PageMapper) { + let base = PhysicalAddress::new(crate::paging::round_down_pages(addr.data())); + let aligned_len = crate::paging::round_up_pages(len + (addr.data() - base.data())); + + for page_idx in 0..aligned_len / crate::memory::PAGE_SIZE { + let (_, flush) = mapper.map_linearly(base.add(page_idx * crate::memory::PAGE_SIZE), PageFlags::new()).expect("failed to linearly map SDT"); + flush.flush(); } +} - let sdt = unsafe { &*((sdt_address + crate::PHYS_OFFSET) as *const Sdt) }; +pub fn get_sdt(sdt_address: usize, mapper: &mut KernelMapper) -> &'static Sdt { + let mapper = mapper + .get_mut() + .expect("KernelMapper mapper locked re-entrant in get_sdt"); - // Map extra SDT frames if required - { - let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096 + crate::PHYS_OFFSET)); - let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize + crate::PHYS_OFFSET)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - crate::PHYS_OFFSET)); - let result = active_table.map_to(page, frame, PageFlags::new()); - result.flush(); - } - } - } + let physaddr = PhysicalAddress::new(sdt_address); + + let sdt; + + unsafe { + const SDT_SIZE: usize = core::mem::size_of::(); + map_linearly(physaddr, SDT_SIZE, mapper); + sdt = unsafe { &*(RmmA::phys_to_virt(physaddr).data() as *const Sdt) }; + + map_linearly(physaddr.add(SDT_SIZE), sdt.length as usize - SDT_SIZE, mapper); + } sdt } @@ -72,16 +74,19 @@ impl Rxsdt for RxsdtEnum { pub static RXSDT_ENUM: Once = Once::new(); /// Parse the ACPI tables to gather CPU, interrupt, and timer information -pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: Option<(u64, u64)>) { +pub unsafe fn init(already_supplied_rsdps: Option<(u64, u64)>) { { let mut sdt_ptrs = SDT_POINTERS.write(); *sdt_ptrs = Some(BTreeMap::new()); } // Search for RSDP - if let Some(rsdp) = RSDP::get_rsdp(active_table, already_supplied_rsdps) { + let rsdp_opt = RSDP::get_rsdp(&mut KernelMapper::lock(), already_supplied_rsdps); + + if let Some(rsdp) = rsdp_opt { info!("RSDP: {:?}", rsdp); - let rxsdt = get_sdt(rsdp.sdt_address(), active_table); + let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock()); + dbg!(); for &c in rxsdt.signature.iter() { print!("{}", c as char); @@ -122,7 +127,7 @@ pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: O // TODO: Don't touch ACPI tables in kernel? - rxsdt.map_all(active_table); + rxsdt.map_all(); for sdt_address in rxsdt.iter() { let sdt = &*((sdt_address + crate::PHYS_OFFSET) as *const Sdt); @@ -135,10 +140,10 @@ pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: O // TODO: Enumerate processors in userspace, and then provide an ACPI-independent interface // to initialize enumerated processors to userspace? - Madt::init(active_table); + Madt::init(); // TODO: Let userspace setup HPET, and then provide an interface to specify which timer to // use? - Hpet::init(active_table); + Hpet::init(); } else { println!("NO RSDP FOUND"); } diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs index d96214b9..3d081867 100644 --- a/src/acpi/rsdp.rs +++ b/src/acpi/rsdp.rs @@ -2,7 +2,7 @@ use core::convert::TryFrom; use core::mem; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, VirtualAddress}; /// RSDP #[derive(Copy, Clone, Debug)] @@ -71,16 +71,16 @@ impl RSDP { None } - pub fn get_rsdp(active_table: &mut ActivePageTable, already_supplied_rsdps: Option<(u64, u64)>) -> Option { + pub fn get_rsdp(mapper: &mut KernelMapper, already_supplied_rsdps: Option<(u64, u64)>) -> Option { if let Some((base, size)) = already_supplied_rsdps { let area = unsafe { core::slice::from_raw_parts(base as usize as *const u8, size as usize) }; - Self::get_already_supplied_rsdps(area).or_else(|| Self::get_rsdp_by_searching(active_table)) + Self::get_already_supplied_rsdps(area).or_else(|| Self::get_rsdp_by_searching(mapper)) } else { - Self::get_rsdp_by_searching(active_table) + Self::get_rsdp_by_searching(mapper) } } /// Search for the RSDP - pub fn get_rsdp_by_searching(active_table: &mut ActivePageTable) -> Option { + pub fn get_rsdp_by_searching(mapper: &mut KernelMapper) -> Option { let start_addr = 0xE_0000; let end_addr = 0xF_FFFF; @@ -90,7 +90,9 @@ impl RSDP { let end_frame = Frame::containing_address(PhysicalAddress::new(end_addr)); for frame in Frame::range_inclusive(start_frame, end_frame) { let page = Page::containing_address(VirtualAddress::new(frame.start_address().data())); - let result = active_table.map_to(page, frame, PageFlags::new()); + let result = unsafe { + mapper.get_mut().expect("KernelMapper locked re-entrant while locating RSDPs").map_phys(page.start_address(), frame.start_address(), PageFlags::new()).expect("failed to map page while searching for RSDP") + }; result.flush(); } } diff --git a/src/acpi/rxsdt.rs b/src/acpi/rxsdt.rs index db238806..1f17d79e 100644 --- a/src/acpi/rxsdt.rs +++ b/src/acpi/rxsdt.rs @@ -1,6 +1,6 @@ use alloc::boxed::Box; -use crate::paging::ActivePageTable; +use crate::paging::KernelMapper; use super::sdt::Sdt; use super::get_sdt; @@ -8,9 +8,12 @@ use super::get_sdt; pub trait Rxsdt { fn iter(&self) -> Box>; - fn map_all(&self, active_table: &mut ActivePageTable) { + fn map_all(&self) { + let iter = self.iter(); + + let mut mapper = KernelMapper::lock(); for sdt in self.iter() { - get_sdt(sdt, active_table); + get_sdt(sdt, &mut mapper); } } -- GitLab From 648b0edb41aea69f4089e05e2ec853af0535600b Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:08:25 +0200 Subject: [PATCH 29/44] Partial: migrate allocator to RMM. --- src/allocator/linked_list.rs | 4 ++-- src/allocator/mod.rs | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/allocator/linked_list.rs b/src/allocator/linked_list.rs index e7a371fc..496f8ff6 100644 --- a/src/allocator/linked_list.rs +++ b/src/allocator/linked_list.rs @@ -3,7 +3,7 @@ use core::ptr::{self, NonNull}; use linked_list_allocator::Heap; use spin::Mutex; -use crate::paging::{ActivePageTable, TableKind}; +use crate::paging::KernelMapper; static HEAP: Mutex> = Mutex::new(None); @@ -21,7 +21,7 @@ unsafe impl GlobalAlloc for Allocator { match heap.allocate_first_fit(layout) { Err(()) => { let size = heap.size(); - super::map_heap(&mut ActivePageTable::new(TableKind::Kernel), crate::KERNEL_HEAP_OFFSET + size, crate::KERNEL_HEAP_SIZE); + super::map_heap(&mut KernelMapper::lock(), crate::KERNEL_HEAP_OFFSET + size, crate::KERNEL_HEAP_SIZE); heap.extend(crate::KERNEL_HEAP_SIZE); }, other => return other.ok().map_or(ptr::null_mut(), |allocation| allocation.as_ptr()), diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs index 0617843c..f8be0179 100644 --- a/src/allocator/mod.rs +++ b/src/allocator/mod.rs @@ -1,5 +1,5 @@ use rmm::Flusher; -use crate::paging::{ActivePageTable, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags}; +use crate::paging::{KernelMapper, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags}; #[cfg(not(feature="slab"))] pub use self::linked_list::Allocator; @@ -13,13 +13,14 @@ mod linked_list; #[cfg(feature="slab")] mod slab; -unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usize) { +unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) { + let mapper = mapper.get_mut().expect("failed to obtain exclusive access to KernelMapper while extending heap"); let mut flush_all = PageFlushAll::new(); let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size-1)); for page in Page::range_inclusive(heap_start_page, heap_end_page) { - let result = active_table.map(page, PageFlags::new().write(true).custom_flag(EntryFlags::GLOBAL.bits(), cfg!(not(feature = "pti")))) + let result = mapper.map(page.start_address(), PageFlags::new().write(true).custom_flag(EntryFlags::GLOBAL.bits(), cfg!(not(feature = "pti")))) .expect("failed to map kernel heap"); flush_all.consume(result); } @@ -27,12 +28,12 @@ unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usiz flush_all.flush(); } -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init() { let offset = crate::KERNEL_HEAP_OFFSET; let size = crate::KERNEL_HEAP_SIZE; // Map heap pages - map_heap(active_table, offset, size); + map_heap(&mut KernelMapper::lock(), offset, size); // Initialize global heap Allocator::init(offset, size); -- GitLab From 2bb019bc44a27ae5dd98d8955f56531deabe9ad2 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:09:11 +0200 Subject: [PATCH 30/44] Partial: migrate debugger, add consistency check. --- src/debugger.rs | 82 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 6 deletions(-) diff --git a/src/debugger.rs b/src/debugger.rs index 6157f243..23dfc3b8 100644 --- a/src/debugger.rs +++ b/src/debugger.rs @@ -1,16 +1,23 @@ +use crate::paging::{RmmA, RmmArch}; + // Super unsafe due to page table switching and raw pointers! -pub unsafe fn debugger() { +pub unsafe fn debugger(target_id: Option) { println!("DEBUGGER START"); println!(); - let mut active_table = crate::paging::ActivePageTable::new(crate::paging::TableKind::User); + let old_table = RmmA::table(); + for (id, context_lock) in crate::context::contexts().iter() { + if target_id.map_or(false, |target_id| *id != target_id) { continue; } let context = context_lock.read(); println!("{}: {}", (*id).into(), context.name.read()); // Switch to context page table to ensure syscall debug and stack dump will work - let new_table = crate::paging::InactivePageTable::from_address(context.arch.get_page_utable()); - let old_table = active_table.switch(new_table); + if let Some(ref space) = context.addr_space { + RmmA::set_table(space.read().table.utable.table().phys()); + } + + check_consistency(&mut context.addr_space.as_ref().unwrap().write()); println!("status: {:?}", context.status); if ! context.status_reason.is_empty() { @@ -41,7 +48,7 @@ pub unsafe fn debugger() { println!("stack: {:>016x}", rsp); //Maximum 64 qwords for i in 0..64 { - if active_table.translate(crate::paging::VirtualAddress::new(rsp)).is_some() { + if context.addr_space.as_ref().map_or(false, |space| space.read().table.utable.translate(crate::paging::VirtualAddress::new(rsp)).is_some()) { let value = *(rsp as *const usize); println!(" {:>016x}: {:>016x}", rsp, value); if let Some(next_rsp) = rsp.checked_add(core::mem::size_of::()) { @@ -58,10 +65,73 @@ pub unsafe fn debugger() { } // Switch to original page table - active_table.switch(old_table); + RmmA::set_table(old_table); println!(); } println!("DEBUGGER END"); } + +pub unsafe fn check_consistency(addr_space: &mut crate::context::memory::AddrSpace) { + use crate::paging::*; + + let p4 = addr_space.table.utable.table(); + + for p4i in 0..256 { + let p3 = match p4.next(p4i) { + Some(p3) => p3, + None => continue, + }; + + for p3i in 0..512 { + let p2 = match p3.next(p3i) { + Some(p2) => p2, + None => continue, + }; + + for p2i in 0..512 { + let p1 = match p2.next(p2i) { + Some(p1) => p1, + None => continue, + }; + + for p1i in 0..512 { + let (physaddr, flags) = match p1.entry(p1i) { + Some(e) => if let Ok(address) = e.address() { + (address, e.flags()) + } else { + continue; + } + _ => continue, + }; + let address = VirtualAddress::new((p1i << 12) | (p2i << 21) | (p3i << 30) | (p4i << 39)); + + let grant = match addr_space.grants.contains(address) { + Some(g) => g, + None => { + log::error!("ADDRESS {:p} LACKING GRANT BUT MAPPED TO {:#0x} FLAGS {:?}!", address.data() as *const u8, physaddr.data(), flags); + continue; + } + }; + const STICKY: usize = (1 << 5) | (1 << 6); // accessed+dirty + if grant.flags().data() & !STICKY != flags.data() & !STICKY { + log::error!("FLAG MISMATCH: {:?} != {:?}, address {:p} in grant at {:?}", grant.flags(), flags, address.data() as *const u8, grant.region()); + } + } + } + } + } + + for grant in addr_space.grants.iter() { + for page in grant.pages() { + let entry = match addr_space.table.utable.translate(page.start_address()) { + Some(e) => e, + None => { + log::error!("GRANT AT {:?} LACKING MAPPING AT PAGE {:p}", grant.region(), page.start_address().data() as *const u8); + continue; + } + }; + } + } +} -- GitLab From 302e55098c20013d920c3f57533e86e7eebbc269 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:10:51 +0200 Subject: [PATCH 31/44] Migrate misc x86_64 parts to RMM. --- src/arch/x86_64/device/ioapic.rs | 18 ++++++++++------- src/arch/x86_64/device/local_apic.rs | 29 +++++++++++++++------------- src/arch/x86_64/device/mod.rs | 12 ++++++------ src/arch/x86_64/idt.rs | 28 +++------------------------ src/arch/x86_64/interrupt/trace.rs | 9 ++++++--- src/arch/x86_64/start.rs | 20 ++++++++++++------- 6 files changed, 55 insertions(+), 61 deletions(-) diff --git a/src/arch/x86_64/device/ioapic.rs b/src/arch/x86_64/device/ioapic.rs index e92d4521..161f807f 100644 --- a/src/arch/x86_64/device/ioapic.rs +++ b/src/arch/x86_64/device/ioapic.rs @@ -8,7 +8,7 @@ use crate::acpi::madt::{self, Madt, MadtEntry, MadtIoApic, MadtIntSrcOverride}; use crate::arch::interrupt::irq; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch}; use crate::paging::entry::EntryFlags; use super::pic; @@ -229,16 +229,20 @@ pub fn src_overrides() -> &'static [Override] { } #[cfg(feature = "acpi")] -pub unsafe fn handle_ioapic(active_table: &mut ActivePageTable, madt_ioapic: &'static MadtIoApic) { +pub unsafe fn handle_ioapic(mapper: &mut KernelMapper, madt_ioapic: &'static MadtIoApic) { // map the I/O APIC registers let frame = Frame::containing_address(PhysicalAddress::new(madt_ioapic.address as usize)); - let page = Page::containing_address(VirtualAddress::new(madt_ioapic.address as usize + crate::PHYS_OFFSET)); + let page = Page::containing_address(RmmA::phys_to_virt(frame.start_address())); - assert_eq!(active_table.translate_page(page), None); + assert!(mapper.translate(page.start_address()).is_none()); - let result = active_table.map_to(page, frame, PageFlags::new().write(true).custom_flag(EntryFlags::NO_CACHE.bits(), true)); - result.flush(); + mapper + .get_mut() + .expect("expected KernelMapper not to be locked re-entrant while mapping I/O APIC memory") + .map_phys(page.start_address(), frame.start_address(), PageFlags::new().write(true).custom_flag(EntryFlags::NO_CACHE.bits(), true)) + .expect("failed to map I/O APIC") + .flush(); let ioapic_registers = page.start_address().data() as *const u32; let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base); @@ -280,7 +284,7 @@ pub unsafe fn handle_src_override(src_override: &'static MadtIntSrcOverride) { SRC_OVERRIDES.get_or_insert_with(Vec::new).push(over); } -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init(active_table: &mut KernelMapper) { let bsp_apic_id = x86::cpuid::CpuId::new().get_feature_info().unwrap().initial_local_apic_id(); // TODO // search the madt for all IOAPICs. diff --git a/src/arch/x86_64/device/local_apic.rs b/src/arch/x86_64/device/local_apic.rs index 913781e5..100341e6 100644 --- a/src/arch/x86_64/device/local_apic.rs +++ b/src/arch/x86_64/device/local_apic.rs @@ -3,15 +3,14 @@ use core::intrinsics::{volatile_load, volatile_store}; use x86::cpuid::CpuId; use x86::msr::*; -use crate::memory::Frame; -use crate::paging::{ActivePageTable, PhysicalAddress, Page, PageFlags, VirtualAddress}; +use crate::paging::{KernelMapper, PhysicalAddress, PageFlags, RmmA, RmmArch}; pub static mut LOCAL_APIC: LocalApic = LocalApic { address: 0, x2: false }; -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init(active_table: &mut KernelMapper) { LOCAL_APIC.init(active_table); } @@ -41,21 +40,25 @@ pub fn bsp_apic_id() -> Option { } impl LocalApic { - unsafe fn init(&mut self, active_table: &mut ActivePageTable) { - self.address = (rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000) + crate::PHYS_OFFSET; + unsafe fn init(&mut self, mapper: &mut KernelMapper) { + let mapper = mapper.get_mut().expect("expected KernelMapper not to be locked re-entrant while initializing LAPIC"); + + let physaddr = PhysicalAddress::new(rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000); + let virtaddr = RmmA::phys_to_virt(physaddr); + + self.address = virtaddr.data(); self.x2 = CpuId::new().get_feature_info().unwrap().has_x2apic(); if ! self.x2 { - let page = Page::containing_address(VirtualAddress::new(self.address)); - let frame = Frame::containing_address(PhysicalAddress::new(self.address - crate::PHYS_OFFSET)); - log::info!("Detected xAPIC at {:#x}", frame.start_address().data()); - if active_table.translate_page(page).is_some() { + log::info!("Detected xAPIC at {:#x}", physaddr.data()); + if let Some((_entry, _, flush)) = mapper.unmap_phys(virtaddr) { // Unmap xAPIC page if already mapped - let (result, _frame) = active_table.unmap_return(page, true); - result.flush(); + flush.flush(); } - let result = active_table.map_to(page, frame, PageFlags::new().write(true)); - result.flush(); + mapper + .map_phys(virtaddr, physaddr, PageFlags::new().write(true)) + .expect("failed to map local APIC memory") + .flush(); } else { log::info!("Detected x2APIC"); } diff --git a/src/arch/x86_64/device/mod.rs b/src/arch/x86_64/device/mod.rs index 942d3201..eafa245c 100644 --- a/src/arch/x86_64/device/mod.rs +++ b/src/arch/x86_64/device/mod.rs @@ -1,5 +1,3 @@ -use crate::paging::ActivePageTable; - pub mod cpu; pub mod ioapic; pub mod local_apic; @@ -12,13 +10,15 @@ pub mod hpet; #[cfg(feature = "system76_ec_debug")] pub mod system76_ec; -pub unsafe fn init(active_table: &mut ActivePageTable) { +use crate::paging::KernelMapper; + +pub unsafe fn init() { pic::init(); - local_apic::init(active_table); + local_apic::init(&mut KernelMapper::lock()); } -pub unsafe fn init_after_acpi(_active_table: &mut ActivePageTable) { +pub unsafe fn init_after_acpi() { // this will disable the IOAPIC if needed. - //ioapic::init(active_table); + //ioapic::init(mapper); } #[cfg(feature = "acpi")] diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 595b6a0b..4a712693 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -10,7 +10,6 @@ use x86::dtables::{self, DescriptorTablePointer}; use crate::interrupt::*; use crate::ipi::IpiKind; -use crate::paging::PageFlags; use spin::RwLock; @@ -172,32 +171,11 @@ pub unsafe fn init_generic(is_bsp: bool, idt: &mut Idt) { let frames = crate::memory::allocate_frames(page_count) .expect("failed to allocate pages for backup interrupt stack"); - // Map them linearly, i.e. PHYS_OFFSET + physaddr. - let base_address = { - use crate::memory::{Frame, PhysicalAddress}; - use crate::paging::{ActivePageTable, Page, VirtualAddress}; + use crate::paging::{RmmA, RmmArch}; - let base_virtual_address = VirtualAddress::new(frames.start_address().data() + crate::PHYS_OFFSET); - let mut active_table = ActivePageTable::new(base_virtual_address.kind()); + // Physical pages are mapped linearly. So is the linearly mapped virtual memory. + let base_address = RmmA::phys_to_virt(frames.start_address()); - for i in 0..page_count { - let virtual_address = VirtualAddress::new(base_virtual_address.data() + i * crate::memory::PAGE_SIZE); - let physical_address = PhysicalAddress::new(frames.start_address().data() + i * crate::memory::PAGE_SIZE); - let page = Page::containing_address(virtual_address); - - let flags = PageFlags::new().write(true); - - let flusher = if let Some(already_mapped) = active_table.translate_page(page) { - assert_eq!(already_mapped.start_address(), physical_address, "address already mapped, but non-linearly"); - active_table.remap(page, flags) - } else { - active_table.map_to(page, Frame::containing_address(physical_address), flags) - }; - flusher.flush(); - } - - base_virtual_address - }; // Stack always grows downwards. let address = base_address.data() + BACKUP_STACK_SIZE; diff --git a/src/arch/x86_64/interrupt/trace.rs b/src/arch/x86_64/interrupt/trace.rs index 1b72260d..ecff30e9 100644 --- a/src/arch/x86_64/interrupt/trace.rs +++ b/src/arch/x86_64/interrupt/trace.rs @@ -1,8 +1,9 @@ use core::{mem, str}; + use goblin::elf::sym; use rustc_demangle::demangle; -use crate::paging::{ActivePageTable, TableKind, VirtualAddress}; +use crate::{context, paging::{KernelMapper, VirtualAddress}}; /// Get a stack trace //TODO: Check for stack being mapped before dereferencing @@ -13,12 +14,14 @@ pub unsafe fn stack_trace() { println!("TRACE: {:>016X}", rbp); //Maximum 64 frames - let active_table = ActivePageTable::new(TableKind::User); + + let mapper = KernelMapper::lock(); + for _frame in 0..64 { if let Some(rip_rbp) = rbp.checked_add(mem::size_of::()) { let rbp_virt = VirtualAddress::new(rbp); let rip_rbp_virt = VirtualAddress::new(rip_rbp); - if rbp_virt.is_canonical() && rip_rbp_virt.is_canonical() && active_table.translate(rbp_virt).is_some() && active_table.translate(rip_rbp_virt).is_some() { + if rbp_virt.is_canonical() && rip_rbp_virt.is_canonical() && mapper.translate(rbp_virt).is_some() && mapper.translate(rip_rbp_virt).is_some() { let rip = *(rip_rbp as *const usize); if rip == 0 { println!(" {:>016X}: EMPTY RETURN", rbp); diff --git a/src/arch/x86_64/start.rs b/src/arch/x86_64/start.rs index fb4c8a34..b8acccd2 100644 --- a/src/arch/x86_64/start.rs +++ b/src/arch/x86_64/start.rs @@ -18,7 +18,7 @@ use crate::gdt; use crate::idt; use crate::interrupt; use crate::log::{self, info}; -use crate::paging; +use crate::paging::{self, KernelMapper}; /// Test of zero values in BSS. static BSS_TEST_ZERO: usize = 0; @@ -131,7 +131,7 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { ); // Initialize paging - let (mut active_table, tcb_offset) = paging::init(0); + let tcb_offset = paging::init(0); // Set up GDT after paging with TLS gdt::init_paging(0, tcb_offset, args.stack_base + args.stack_size); @@ -158,7 +158,7 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { BSP_READY.store(false, Ordering::SeqCst); // Setup kernel heap - allocator::init(&mut active_table); + allocator::init(); // Set up double buffer for grpahical debug now that heap is available #[cfg(feature = "graphical_debug")] @@ -170,17 +170,17 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { log::init(); // Initialize devices - device::init(&mut active_table); + device::init(); // Read ACPI tables, starts APs #[cfg(feature = "acpi")] { - acpi::init(&mut active_table, if args.acpi_rsdps_base != 0 && args.acpi_rsdps_size > 0 { + acpi::init(if args.acpi_rsdps_base != 0 && args.acpi_rsdps_size > 0 { Some(((args.acpi_rsdps_base + crate::PHYS_OFFSET) as u64, args.acpi_rsdps_size as u64)) } else { None }); - device::init_after_acpi(&mut active_table); + device::init_after_acpi(); } // Initialize all of the non-core devices not otherwise needed to complete initialization @@ -230,7 +230,13 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { idt::init(); // Initialize paging - let tcb_offset = paging::init_ap(cpu_id, bsp_table); + let tcb_offset = { + use crate::paging::{PageMapper, PhysicalAddress}; + use crate::rmm::FRAME_ALLOCATOR; + + let mut mapper = KernelMapper::lock_for_manual_mapper(cpu_id, PageMapper::new(PhysicalAddress::new(bsp_table), FRAME_ALLOCATOR)); + paging::init_ap(cpu_id, &mut mapper) + }; // Set up GDT with TLS gdt::init_paging(cpu_id as u32, tcb_offset, stack_end); -- GitLab From 486d296d6d1bb805e041ca2c691ee5419183d7c4 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:11:11 +0200 Subject: [PATCH 32/44] Remove old x86_64 paging code, migrate to RMM. --- src/arch/x86_64/paging/mapper.rs | 191 -------------------- src/arch/x86_64/paging/mod.rs | 216 +++-------------------- src/arch/x86_64/paging/table.rs | 131 -------------- src/arch/x86_64/paging/temporary_page.rs | 42 ----- src/arch/x86_64/rmm.rs | 110 +++++++++--- 5 files changed, 111 insertions(+), 579 deletions(-) delete mode 100644 src/arch/x86_64/paging/table.rs delete mode 100644 src/arch/x86_64/paging/temporary_page.rs diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index 95f404b8..9f7659b4 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -1,200 +1,9 @@ -use super::{linear_phys_to_virt, Page, PAGE_SIZE, PageFlags, PhysicalAddress, VirtualAddress}; - use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::{allocate_frames, deallocate_frames, Enomem, Frame}; use super::RmmA; -use super::table::{Table, Level4}; pub use rmm::{Flusher, PageFlush, PageFlushAll}; -pub struct Mapper<'table> { - pub(in super) p4: &'table mut Table, -} - -impl core::fmt::Debug for Mapper<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Mapper referencing P4 at {:p}", self.p4) - } -} - -impl<'table> Mapper<'table> { - /// Wrap the current address space in a mapper. - /// - /// # Safety - /// - /// For this to be safe, the caller must have exclusive access to the pointer in the CR3 - /// register. - // TODO: Find some lifetime hack we can use for ensuring exclusive access at compile time? - pub unsafe fn current() -> Mapper<'table> { - // SAFETY: We know that CR3 must be a valid frame, since the processor would triple fault - // otherwise, and the caller has ensured exclusive ownership of the KERNEL_OFFSET+CR3. - Self::from_p4_unchecked(&mut Frame::containing_address(PhysicalAddress::new(x86::controlregs::cr3() as usize))) - } - /// Wrap a top-level page table (an entire address space) in a mapper. - /// - /// # Safety - /// - /// For this to be safe, the caller must have exclusive access to the frame argument. The frame - /// must also be valid, and the frame must not outlive the lifetime. - pub unsafe fn from_p4_unchecked(frame: &mut Frame) -> Self { - let virt = linear_phys_to_virt(frame.start_address()) - .expect("expected page table frame to fit within linear mapping"); - - Self { - p4: &mut *(virt.data() as *mut Table), - } - } - - pub fn p4(&self) -> &Table { - &*self.p4 - } - - pub fn p4_mut(&mut self) -> &mut Table { - &mut *self.p4 - } - - /// Map a page to a frame - pub fn map_to(&mut self, page: Page, frame: Frame, flags: PageFlags) -> PageFlush { - let p3 = self.p4_mut().next_table_create(page.p4_index()); - let p2 = p3.next_table_create(page.p3_index()); - let p1 = p2.next_table_create(page.p2_index()); - - assert!(p1[page.p1_index()].is_unused(), - "{:X}: Set to {:X}: {:?}, requesting {:X}: {:?}", - page.start_address().data(), - p1[page.p1_index()].address().data(), p1[page.p1_index()].flags(), - frame.start_address().data(), flags); - p1.increment_entry_count(); - p1[page.p1_index()].set(frame, flags); - PageFlush::new(page.start_address()) - } - - /// Map a page to the next free frame - pub fn map(&mut self, page: Page, flags: PageFlags) -> Result, Enomem> { - let frame = allocate_frames(1).ok_or(Enomem)?; - Ok(self.map_to(page, frame, flags)) - } - - /// Update flags for a page - pub fn remap(&mut self, page: Page, flags: PageFlags) -> PageFlush { - let p3 = self.p4_mut().next_table_mut(page.p4_index()).expect("failed to remap: no p3"); - let p2 = p3.next_table_mut(page.p3_index()).expect("failed to remap: no p2"); - let p1 = p2.next_table_mut(page.p2_index()).expect("failed to remap: no p1"); - let frame = p1[page.p1_index()].pointed_frame().expect("failed to remap: not mapped"); - p1[page.p1_index()].set(frame, flags); - PageFlush::new(page.start_address()) - } - - /// Identity map a frame - pub fn identity_map(&mut self, frame: Frame, flags: PageFlags) -> PageFlush { - let page = Page::containing_address(VirtualAddress::new(frame.start_address().data())); - self.map_to(page, frame, flags) - } - - fn unmap_inner(&mut self, page: Page, keep_parents: bool) -> Frame { - let frame; - - let p4 = self.p4_mut(); - if let Some(p3) = p4.next_table_mut(page.p4_index()) { - if let Some(p2) = p3.next_table_mut(page.p3_index()) { - if let Some(p1) = p2.next_table_mut(page.p2_index()) { - frame = if let Some(frame) = p1[page.p1_index()].pointed_frame() { - frame - } else { - panic!("unmap_inner({:X}): frame not found", page.start_address().data()) - }; - - p1.decrement_entry_count(); - p1[page.p1_index()].set_unused(); - - if keep_parents || ! p1.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p1 not found", page.start_address().data()); - } - - if let Some(p1_frame) = p2[page.p2_index()].pointed_frame() { - //println!("unmap_inner: Free p1 {:?}", p1_frame); - p2.decrement_entry_count(); - p2[page.p2_index()].set_unused(); - deallocate_frames(p1_frame, 1); - } else { - panic!("unmap_inner({:X}): p1_frame not found", page.start_address().data()); - } - - if ! p2.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p2 not found", page.start_address().data()); - } - - if let Some(p2_frame) = p3[page.p3_index()].pointed_frame() { - //println!("unmap_inner: Free p2 {:?}", p2_frame); - p3.decrement_entry_count(); - p3[page.p3_index()].set_unused(); - deallocate_frames(p2_frame, 1); - } else { - panic!("unmap_inner({:X}): p2_frame not found", page.start_address().data()); - } - - if ! p3.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p3 not found", page.start_address().data()); - } - - if let Some(p3_frame) = p4[page.p4_index()].pointed_frame() { - //println!("unmap_inner: Free p3 {:?}", p3_frame); - p4.decrement_entry_count(); - p4[page.p4_index()].set_unused(); - deallocate_frames(p3_frame, 1); - } else { - panic!("unmap_inner({:X}): p3_frame not found", page.start_address().data()); - } - - frame - } - - /// Unmap a page - pub fn unmap(&mut self, page: Page) -> PageFlush { - let frame = self.unmap_inner(page, false); - deallocate_frames(frame, 1); - PageFlush::new(page.start_address()) - } - - /// Unmap a page, return frame without free - pub fn unmap_return(&mut self, page: Page, keep_parents: bool) -> (PageFlush, Frame) { - let frame = self.unmap_inner(page, keep_parents); - (PageFlush::new(page.start_address()), frame) - } - - pub fn translate_page(&self, page: Page) -> Option { - self.translate_page_and_flags(page).map(|(frame, _)| frame) - } - - pub fn translate_page_flags(&self, page: Page) -> Option> { - self.translate_page_and_flags(page).map(|(_, flags)| flags) - } - pub fn translate_page_and_flags(&self, page: Page) -> Option<(Frame, PageFlags)> { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .map(|p1| &p1[page.p1_index()]) - .and_then(|entry| Some((entry.pointed_frame()?, entry.flags()))) - } - - /// Translate a virtual address to a physical one - pub fn translate(&self, virtual_address: VirtualAddress) -> Option { - let offset = virtual_address.data() % PAGE_SIZE; - self.translate_page(Page::containing_address(virtual_address)) - .map(|frame| PhysicalAddress::new(frame.start_address().data() + offset)) - } -} - pub struct InactiveFlusher { _inner: () } impl InactiveFlusher { // TODO: cpu id diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index aca613ea..46f1777b 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -1,16 +1,11 @@ //! # Paging //! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) -use core::ops::{Deref, DerefMut}; use core::{mem, ptr}; -use spin::Mutex; use x86::msr; -use crate::memory::Frame; - use self::entry::EntryFlags; -use self::mapper::{Mapper, PageFlushAll}; -use self::table::{Level4, Table}; +use self::mapper::PageFlushAll; pub use rmm::{ Arch as RmmArch, @@ -22,47 +17,17 @@ pub use rmm::{ X8664Arch as RmmA, }; +pub type PageMapper = rmm::PageMapper; +pub use crate::rmm::KernelMapper; + pub mod entry; pub mod mapper; -pub mod table; -pub mod temporary_page; /// Number of entries per page table -pub const ENTRY_COUNT: usize = 512; +pub const ENTRY_COUNT: usize = RmmA::PAGE_ENTRIES; /// Size of pages -pub const PAGE_SIZE: usize = 4096; - -//TODO: This is a rudimentary recursive mutex used to naively fix multi_core issues, replace it! -pub struct PageTableLock { - cpu_id: usize, - count: usize, -} - -pub static PAGE_TABLE_LOCK: Mutex = Mutex::new(PageTableLock { - cpu_id: 0, - count: 0, -}); - -fn page_table_lock() { - let cpu_id = crate::cpu_id(); - loop { - { - let mut lock = PAGE_TABLE_LOCK.lock(); - if lock.count == 0 || lock.cpu_id == cpu_id { - lock.cpu_id = cpu_id; - lock.count += 1; - return; - } - } - crate::arch::interrupt::pause(); - } -} - -fn page_table_unlock() { - let mut lock = PAGE_TABLE_LOCK.lock(); - lock.count -= 1; -} +pub const PAGE_SIZE: usize = RmmA::PAGE_SIZE; /// Setup page attribute table unsafe fn init_pat() { @@ -97,7 +62,7 @@ unsafe fn init_pat() { } /// Map percpu -unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll { +unsafe fn map_percpu(cpu_id: usize, mapper: &mut PageMapper) -> PageFlushAll { extern "C" { /// The starting byte of the thread data segment static mut __tdata_start: u8; @@ -118,7 +83,7 @@ unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll { let end_page = Page::containing_address(VirtualAddress::new(end - 1)); for page in Page::range_inclusive(start_page, end_page) { let result = mapper.map( - page, + page.start_address(), PageFlags::new().write(true).custom_flag(EntryFlags::GLOBAL.bits(), cfg!(not(feature = "pti"))), ) .expect("failed to allocate page table frames while mapping percpu"); @@ -162,7 +127,7 @@ unsafe fn init_tcb(cpu_id: usize) -> usize { /// Returns page table and thread control block offset pub unsafe fn init( cpu_id: usize, -) -> (ActivePageTable, usize) { +) -> usize { extern "C" { /// The starting byte of the text (code) data segment. static mut __text_start: u8; @@ -192,170 +157,30 @@ pub unsafe fn init( init_pat(); - let mut active_table = ActivePageTable::new_unlocked(TableKind::User); - - let flush_all = map_percpu(cpu_id, &mut active_table); + let flush_all = map_percpu(cpu_id, KernelMapper::lock_manually(cpu_id).get_mut().expect("expected KernelMapper not to be locked re-entrant in paging::init")); flush_all.flush(); - return (active_table, init_tcb(cpu_id)); + return init_tcb(cpu_id); } pub unsafe fn init_ap( cpu_id: usize, - bsp_table: usize, + bsp_table: &mut KernelMapper, ) -> usize { init_pat(); - let mut active_table = ActivePageTable::new_unlocked(TableKind::User); - - let mut new_table = InactivePageTable::from_address(bsp_table); - { - let flush_all = map_percpu(cpu_id, &mut new_table.mapper()); - // The flush can be ignored as this is not the active table. See later active_table.switch + let flush_all = map_percpu(cpu_id, bsp_table.get_mut().expect("KernelMapper locked re-entrant for AP")); + + // The flush can be ignored as this is not the active table. See later make_current(). flush_all.ignore(); }; - // This switches the active table, which is setup by the bootloader, to a correct table - // setup by the lambda above. This will also flush the TLB - active_table.switch(new_table); + bsp_table.make_current(); init_tcb(cpu_id) } -#[derive(Debug)] -pub struct ActivePageTable { - mapper: Mapper<'static>, - locked: bool, -} - -impl Deref for ActivePageTable { - type Target = Mapper<'static>; - - fn deref(&self) -> &Mapper<'static> { - &self.mapper - } -} - -impl DerefMut for ActivePageTable { - fn deref_mut(&mut self) -> &mut Mapper<'static> { - &mut self.mapper - } -} - -impl ActivePageTable { - pub unsafe fn new(_table_kind: TableKind) -> ActivePageTable { - page_table_lock(); - ActivePageTable { - mapper: Mapper::current(), - locked: true, - } - } - - pub unsafe fn new_unlocked(_table_kind: TableKind) -> ActivePageTable { - ActivePageTable { - mapper: Mapper::current(), - locked: false, - } - } - - pub fn switch(&mut self, new_table: InactivePageTable) -> InactivePageTable { - let old_table = InactivePageTable { - frame: Frame::containing_address(unsafe { - RmmA::table() - }) - }; - unsafe { - // Activate new page table - RmmA::set_table(new_table.frame.start_address()); - // Update mapper to new page table - self.mapper = Mapper::current(); - } - old_table - } - - pub fn flush(&mut self, page: Page) { - unsafe { - RmmA::invalidate(page.start_address()); - } - } - - pub fn flush_all(&mut self) { - unsafe { - RmmA::invalidate_all(); - } - } - - pub unsafe fn address(&self) -> usize { - RmmA::table().data() - } - pub fn mapper<'a>(&'a mut self) -> Mapper<'a> { - Mapper { - p4: self.p4, - } - } -} - -impl Drop for ActivePageTable { - fn drop(&mut self) { - if self.locked { - page_table_unlock(); - self.locked = false; - } - } -} - -pub struct InactivePageTable { - frame: Frame, -} - -impl InactivePageTable { - /// Create a new inactive page table, located at a given frame. - /// - /// # Safety - /// - /// For this to be safe, the caller must have exclusive access to the corresponding virtual - /// address of the frame. - pub unsafe fn new( - frame: Frame, - ) -> InactivePageTable { - // FIXME: Use active_table to ensure that the newly-allocated frame be linearly mapped, in - // case it is outside the pre-mapped physical address range, or if such a range is too - // large to fit the whole physical address space in the virtual address space. - { - let table = linear_phys_to_virt(frame.start_address()) - .expect("cannot initialize InactivePageTable (currently) without the frame being linearly mapped"); - // now we are able to zero the table - - // SAFETY: The caller must ensure exclusive access to the pointed-to virtual address of - // the frame. - (&mut *(table.data() as *mut Table::)).zero(); - } - - InactivePageTable { frame } - } - - pub unsafe fn from_address(address: usize) -> InactivePageTable { - InactivePageTable { - frame: Frame::containing_address(PhysicalAddress::new(address)), - } - } - - pub fn mapper<'inactive_table>(&'inactive_table mut self) -> Mapper<'inactive_table> { - unsafe { Mapper::from_p4_unchecked(&mut self.frame) } - } - pub unsafe fn address(&self) -> usize { - self.frame.start_address().data() - } -} - -pub fn linear_phys_to_virt(physical: PhysicalAddress) -> Option { - physical.data().checked_add(crate::PHYS_OFFSET).map(VirtualAddress::new) -} -pub fn linear_virt_to_phys(virt: VirtualAddress) -> Option { - virt.data().checked_sub(crate::PHYS_OFFSET).map(PhysicalAddress::new) -} - /// Page #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Page { @@ -426,3 +251,12 @@ impl Iterator for PageIter { } } } + +/// Round down to the nearest multiple of page size +pub fn round_down_pages(number: usize) -> usize { + number - number % PAGE_SIZE +} +/// Round up to the nearest multiple of page size +pub fn round_up_pages(number: usize) -> usize { + round_down_pages(number + PAGE_SIZE - 1) +} diff --git a/src/arch/x86_64/paging/table.rs b/src/arch/x86_64/paging/table.rs deleted file mode 100644 index 605e078c..00000000 --- a/src/arch/x86_64/paging/table.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! # Page table -//! Code borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) - -use core::marker::PhantomData; -use core::ops::{Index, IndexMut}; - -use crate::memory::allocate_frames; -use crate::paging::{linear_phys_to_virt, VirtualAddress}; - -use super::{ENTRY_COUNT, PageFlags}; -use super::entry::{Entry, EntryFlags}; - -pub trait TableLevel {} - -pub enum Level4 {} -pub enum Level3 {} -pub enum Level2 {} -pub enum Level1 {} - -impl TableLevel for Level4 {} -impl TableLevel for Level3 {} -impl TableLevel for Level2 {} -impl TableLevel for Level1 {} - -pub trait HierarchicalLevel: TableLevel { - type NextLevel: TableLevel; -} - -impl HierarchicalLevel for Level4 { - type NextLevel = Level3; -} - -impl HierarchicalLevel for Level3 { - type NextLevel = Level2; -} - -impl HierarchicalLevel for Level2 { - type NextLevel = Level1; -} - -#[repr(C, align(4096))] -pub struct Table { - entries: [Entry; ENTRY_COUNT], - level: PhantomData, -} - -impl Table where L: TableLevel { - pub fn is_unused(&self) -> bool { - self.entry_count() == 0 - } - - pub fn zero(&mut self) { - for entry in self.entries.iter_mut() { - entry.set_zero(); - } - } - - /// Set number of entries in first table entry - fn set_entry_count(&mut self, count: u64) { - debug_assert!(count <= ENTRY_COUNT as u64, "count can't be greater than ENTRY_COUNT"); - self.entries[0].set_counter_bits(count) - } - - /// Get number of entries in first table entry - fn entry_count(&self) -> u64 { - self.entries[0].counter_bits() - } - - pub fn increment_entry_count(&mut self) { - let current_count = self.entry_count(); - self.set_entry_count(current_count + 1); - } - - pub fn decrement_entry_count(&mut self) { - let current_count = self.entry_count(); - self.set_entry_count(current_count - 1); - } -} - -impl Table where L: HierarchicalLevel { - pub fn next_table(&self, index: usize) -> Option<&Table> { - self.next_table_address(index).map(|address| unsafe { &*(address.data() as *const _) }) - } - - pub fn next_table_mut(&mut self, index: usize) -> Option<&mut Table> { - self.next_table_address(index).map(|address| unsafe { &mut *(address.data() as *mut _) }) - } - - pub fn next_table_create(&mut self, index: usize) -> &mut Table { - if self.next_table(index).is_none() { - assert!(!self[index].flags().has_flag(EntryFlags::HUGE_PAGE.bits()), - "next_table_create does not support huge pages"); - let frame = allocate_frames(1).expect("no frames available"); - self.increment_entry_count(); - //TODO: RISC-V will not like this - self[index].set(frame, PageFlags::new_table().execute(true).write(true).user(true) /* Allow users to go down the page table, implement permissions at the page level */); - self.next_table_mut(index).unwrap().zero(); - } - self.next_table_mut(index).unwrap() - } - - fn next_table_address(&self, index: usize) -> Option { - let entry = &self[index]; - let entry_flags = entry.flags(); - - entry.pointed_frame().and_then(|next_table_frame| { - if entry_flags.has_flag(EntryFlags::HUGE_PAGE.bits()) { - return None; - } - let next_table_physaddr = next_table_frame.start_address(); - let next_table_virtaddr = linear_phys_to_virt(next_table_physaddr) - .expect("expected page table frame to fit within linear mapping"); - - Some(next_table_virtaddr) - }) - } -} - -impl Index for Table where L: TableLevel { - type Output = Entry; - - fn index(&self, index: usize) -> &Entry { - &self.entries[index] - } -} - -impl IndexMut for Table where L: TableLevel { - fn index_mut(&mut self, index: usize) -> &mut Entry { - &mut self.entries[index] - } -} diff --git a/src/arch/x86_64/paging/temporary_page.rs b/src/arch/x86_64/paging/temporary_page.rs deleted file mode 100644 index c8427cc1..00000000 --- a/src/arch/x86_64/paging/temporary_page.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! Temporarily map a page -//! From [Phil Opp's Blog](http://os.phil-opp.com/remap-the-kernel.html) - -use crate::memory::Frame; - -use super::{ActivePageTable, Page, PageFlags, RmmA, VirtualAddress}; -use super::table::{Table, Level1}; - -pub struct TemporaryPage { - page: Page, -} - -impl TemporaryPage { - pub fn new(page: Page) -> TemporaryPage { - TemporaryPage { page } - } - - pub fn start_address (&self) -> VirtualAddress { - self.page.start_address() - } - - /// Maps the temporary page to the given frame in the active table. - /// Returns the start address of the temporary page. - pub fn map(&mut self, frame: Frame, flags: PageFlags, active_table: &mut ActivePageTable) -> VirtualAddress { - assert!(active_table.translate_page(self.page).is_none(), "temporary page is already mapped"); - let result = active_table.map_to(self.page, frame, flags); - result.flush(); - self.page.start_address() - } - - /// Maps the temporary page to the given page table frame in the active - /// table. Returns a reference to the now mapped table. - pub fn map_table_frame(&mut self, frame: Frame, flags: PageFlags, active_table: &mut ActivePageTable) -> &mut Table { - unsafe { &mut *(self.map(frame, flags, active_table).data() as *mut Table) } - } - - /// Unmaps the temporary page in the active table. - pub fn unmap(&mut self, active_table: &mut ActivePageTable) { - let (result, _frame) = active_table.unmap_return(self.page, true); - result.flush(); - } -} diff --git a/src/arch/x86_64/rmm.rs b/src/arch/x86_64/rmm.rs index 11eb527d..f3ef63a2 100644 --- a/src/arch/x86_64/rmm.rs +++ b/src/arch/x86_64/rmm.rs @@ -2,6 +2,7 @@ use core::{ cmp, mem, slice, + sync::atomic::{self, AtomicUsize, Ordering}, }; use rmm::{ KILOBYTE, @@ -20,7 +21,7 @@ use rmm::{ X8664Arch as RmmA, }; -use spin::Mutex; +use spin::{Mutex, MutexGuard}; extern "C" { /// The starting byte of the text (code) data segment. @@ -210,21 +211,15 @@ unsafe fn inner( BuddyAllocator::::new(bump_allocator).expect("failed to create BuddyAllocator") } -pub struct LockedAllocator { - inner: Mutex>>, -} +// There can only be one allocator (at the moment), so making this a ZST is great! +#[derive(Clone, Copy)] +pub struct LockedAllocator; -impl LockedAllocator { - const fn new() -> Self { - Self { - inner: Mutex::new(None) - } - } -} +static INNER_ALLOCATOR: Mutex>> = Mutex::new(None); impl FrameAllocator for LockedAllocator { unsafe fn allocate(&mut self, count: FrameCount) -> Option { - if let Some(ref mut allocator) = *self.inner.lock() { + if let Some(ref mut allocator) = *INNER_ALLOCATOR.lock() { allocator.allocate(count) } else { None @@ -232,38 +227,105 @@ impl FrameAllocator for LockedAllocator { } unsafe fn free(&mut self, address: PhysicalAddress, count: FrameCount) { - if let Some(ref mut allocator) = *self.inner.lock() { + if let Some(ref mut allocator) = *INNER_ALLOCATOR.lock() { allocator.free(address, count) } } unsafe fn usage(&self) -> FrameUsage { - if let Some(ref allocator) = *self.inner.lock() { + if let Some(ref allocator) = *INNER_ALLOCATOR.lock() { allocator.usage() } else { FrameUsage::new(FrameCount::new(0), FrameCount::new(0)) } } } +impl core::fmt::Debug for LockedAllocator { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match INNER_ALLOCATOR.try_lock().as_deref() { + Some(Some(alloc)) => write!(f, "[locked allocator: {:?}]", unsafe { alloc.usage() }), + Some(None) => write!(f, "[uninitialized lock allocator]"), + None => write!(f, "[failed to lock]"), + } + } +} static mut AREAS: [MemoryArea; 512] = [MemoryArea { base: PhysicalAddress::new(0), size: 0, }; 512]; -pub static mut FRAME_ALLOCATOR: LockedAllocator = LockedAllocator::new(); - -pub unsafe fn mapper_new(table_addr: PhysicalAddress) -> PageMapper<'static, RmmA, LockedAllocator> { - PageMapper::new(table_addr, &mut FRAME_ALLOCATOR) +pub static FRAME_ALLOCATOR: LockedAllocator = LockedAllocator; + +const NO_PROCESSOR: usize = !0; +static LOCK_OWNER: AtomicUsize = AtomicUsize::new(NO_PROCESSOR); +static LOCK_COUNT: AtomicUsize = AtomicUsize::new(0); + +// TODO: Support, perhaps via const generics, embedding address checking in PageMapper, thereby +// statically enforcing that the kernel mapper can only map things in the kernel half, and vice +// versa. +/// A guard to the global lock protecting the upper 128 TiB of kernel address space. +/// +/// NOTE: Use this with great care! Since heap allocations may also require this lock when the heap +/// needs to be expended, it must not be held while memory allocations are done! +// TODO: Make the lock finer-grained so that e.g. the heap part can be independent from e.g. +// PHYS_PML4? +pub struct KernelMapper { + mapper: crate::paging::PageMapper, + ro: bool, } +impl KernelMapper { + fn lock_inner(current_processor: usize) -> bool { + loop { + match LOCK_OWNER.compare_exchange_weak(NO_PROCESSOR, current_processor, Ordering::Acquire, Ordering::Relaxed) { + Ok(_) => break, + // already owned by this hardware thread + Err(id) if id == current_processor => break, + // either CAS failed, or some other hardware thread holds the lock + Err(_) => core::hint::spin_loop(), + } + } + + let prev_count = LOCK_COUNT.fetch_add(1, Ordering::Relaxed); + atomic::compiler_fence(Ordering::Acquire); -//TODO: global paging lock? -pub unsafe fn mapper_create() -> Option> { - PageMapper::create(&mut FRAME_ALLOCATOR) + prev_count > 0 + } + pub unsafe fn lock_for_manual_mapper(current_processor: usize, mapper: crate::paging::PageMapper) -> Self { + let ro = Self::lock_inner(current_processor); + Self { + mapper, + ro, + } + } + pub fn lock_manually(current_processor: usize) -> Self { + unsafe { Self::lock_for_manual_mapper(current_processor, PageMapper::new(RmmA::table(), FRAME_ALLOCATOR)) } + } + pub fn lock() -> Self { + Self::lock_manually(crate::cpu_id()) + } + pub fn get_mut(&mut self) -> Option<&mut crate::paging::PageMapper> { + if self.ro { + None + } else { + Some(&mut self.mapper) + } + } } +impl core::ops::Deref for KernelMapper { + type Target = crate::paging::PageMapper; -pub unsafe fn mapper_current() -> PageMapper<'static, RmmA, LockedAllocator> { - PageMapper::current(&mut FRAME_ALLOCATOR) + fn deref(&self) -> &Self::Target { + &self.mapper + } +} +impl Drop for KernelMapper { + fn drop(&mut self) { + if LOCK_COUNT.fetch_sub(1, Ordering::Relaxed) == 0 { + LOCK_OWNER.store(NO_PROCESSOR, Ordering::Release); + } + atomic::compiler_fence(Ordering::Release); + } } pub unsafe fn init( @@ -388,5 +450,5 @@ pub unsafe fn init( acpi_base, acpi_size_aligned, initfs_base, initfs_size_aligned, ); - *FRAME_ALLOCATOR.inner.lock() = Some(allocator); + *INNER_ALLOCATOR.lock() = Some(allocator); } -- GitLab From 65890832382634ce76fb3fa445255605d67706e4 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:11:57 +0200 Subject: [PATCH 33/44] Partial: Migrate schemes to RMM. --- src/scheme/live.rs | 9 +++++---- src/scheme/memory.rs | 8 ++++---- src/scheme/mod.rs | 2 +- src/scheme/pipe.rs | 2 +- src/scheme/proc.rs | 41 +++++++++++++++++++++-------------------- src/scheme/user.rs | 23 +++++++++++++---------- 6 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/scheme/live.rs b/src/scheme/live.rs index 63877c81..acf14a03 100644 --- a/src/scheme/live.rs +++ b/src/scheme/live.rs @@ -13,7 +13,7 @@ use syscall::flag::{MODE_DIR, MODE_FILE}; use syscall::scheme::{calc_seek_offset_usize, Scheme}; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, TableKind, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, VirtualAddress}; use crate::paging::mapper::PageFlushAll; static mut LIST: [u8; 2] = [b'0', b'\n']; @@ -55,15 +55,16 @@ impl DiskScheme { // Ensure live disk pages are mapped let virt = phys + crate::PHYS_OFFSET; unsafe { - let mut active_table = ActivePageTable::new(TableKind::Kernel); + let mut mapper = KernelMapper::lock(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(virt)); let end_page = Page::containing_address(VirtualAddress::new(virt + size - 1)); for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { + if mapper.translate(page.start_address()).is_none() { let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - crate::PHYS_OFFSET)); let flags = PageFlags::new().write(true); - let result = active_table.map_to(page, frame, flags); + let result = mapper.get_mut().expect("expected KernelMapper not to be in use while initializing live scheme").map_phys(page.start_address(), frame.start_address(), flags).expect("failed to map live page"); flush_all.consume(result); } } diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 44c7d165..8bcc8cf8 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,10 +1,9 @@ use crate::context; use crate::context::memory::{page_flags, Grant}; use crate::memory::{free_frames, used_frames, PAGE_SIZE}; -use crate::paging::{ActivePageTable, mapper::PageFlushAll, Page, VirtualAddress}; -use crate::syscall::data::{Map, OldMap, StatVfs}; +use crate::paging::{mapper::PageFlushAll, Page, VirtualAddress}; +use crate::syscall::data::{Map, StatVfs}; use crate::syscall::error::*; -use crate::syscall::flag::MapFlags; use crate::syscall::scheme::Scheme; pub struct MemoryScheme; @@ -24,10 +23,11 @@ impl MemoryScheme { let context = context_lock.read(); let mut addr_space = context.addr_space()?.write(); + let addr_space = &mut *addr_space; let region = addr_space.grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); - addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new())?); + addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut addr_space.table.utable, PageFlushAll::new())?); Ok(region.start_address().data()) } diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index ac81fbbd..fb3b7dc7 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -16,7 +16,7 @@ use alloc::{ use core::sync::atomic::AtomicUsize; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; -use crate::context::{Context, memory::AddrSpace, file::FileDescriptor}; +use crate::context::{memory::AddrSpace, file::FileDescriptor}; use crate::syscall::error::*; use crate::syscall::scheme::Scheme; diff --git a/src/scheme/pipe.rs b/src/scheme/pipe.rs index ab7cb163..4090e7e9 100644 --- a/src/scheme/pipe.rs +++ b/src/scheme/pipe.rs @@ -1,7 +1,7 @@ use alloc::sync::{Arc, Weak}; use alloc::collections::{BTreeMap, VecDeque}; use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::{Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use spin::{Mutex, Once, RwLock}; use crate::event; use crate::scheme::SchemeId; diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 3fa2a935..628dd44c 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,9 +1,9 @@ use crate::{ - arch::paging::{ActivePageTable, Flusher, InactivePageTable, mapper::{InactiveFlusher, Mapper, PageFlushAll}, Page, RmmA, VirtualAddress}, + arch::paging::{Flusher, mapper::{InactiveFlusher, PageFlushAll}, Page, RmmA, VirtualAddress}, context::{self, Context, ContextId, Status, file::{FileDescription, FileDescriptor}, memory::{AddrSpace, Grant, new_addrspace, map_flags, page_flags, Region}}, memory::PAGE_SIZE, ptrace, - scheme::{self, AtomicSchemeId, FileHandle, KernelScheme, SchemeId}, + scheme::{self, FileHandle, KernelScheme, SchemeId}, syscall::{ FloatRegisters, IntRegisters, @@ -13,7 +13,6 @@ use crate::{ flag::*, scheme::{calc_seek_offset_usize, Scheme}, self, - validate, }, }; @@ -279,7 +278,7 @@ impl ProcScheme { } fn current_addrspace() -> Result>> { - Ok(Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?.read().addr_space()?)) + Ok(Arc::clone(context::current()?.read().addr_space()?)) } impl ProcScheme { @@ -451,7 +450,7 @@ impl Scheme for ProcScheme { // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But // in that case, what scheme? b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()? }, false), - b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.read().try_clone()? }, false), + b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.write().try_clone()? }, false), b"mem" => (Operation::Memory { addrspace: Arc::clone(&addrspace) }, true), grant_handle if grant_handle.starts_with(b"grant-") => { @@ -747,16 +746,17 @@ impl Scheme for ProcScheme { } let mut addrspace = addrspace.write(); + let addrspace = &mut *addrspace; let is_active = addrspace.is_current(); let (mut inactive, mut active); - let (mut mapper, mut flusher) = if is_active { - active = (unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()); - (active.0.mapper(), &mut active.1 as &mut dyn Flusher) + let mut flusher = if is_active { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher } else { - inactive = (unsafe { InactivePageTable::from_address(addrspace.frame.utable.start_address().data()) }, InactiveFlusher::new()); - (inactive.0.mapper(), &mut inactive.1 as &mut dyn Flusher) + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher }; let region = Region::new(VirtualAddress::new(base), size); @@ -772,7 +772,7 @@ impl Scheme for ProcScheme { addrspace.grants.insert(after); } - let res = current.unmap(&mut mapper, &mut flusher); + let res = current.unmap(&mut addrspace.table.utable, &mut flusher); if res.file_desc.is_some() { // We prefer avoiding file operations from within the kernel. If userspace @@ -788,7 +788,10 @@ impl Scheme for ProcScheme { // Forbid transferring grants to the same address space! if is_active { return Err(Error::new(EBUSY)); } - let src_grant = current_addrspace()?.write().grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; + let current_addrspace = current_addrspace()?; + let mut current_addrspace = current_addrspace.write(); + let current_addrspace = &mut *current_addrspace; + let src_grant = current_addrspace.grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; if src_address % PAGE_SIZE != 0 || src_address.saturating_add(size) > crate::USER_END_OFFSET { return Err(Error::new(EINVAL)); @@ -799,13 +802,13 @@ impl Scheme for ProcScheme { addrspace.grants.insert(Grant::transfer( src_grant, base_page, - &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, - &mut mapper, + &mut current_addrspace.table.utable, + &mut addrspace.table.utable, PageFlushAll::new(), flusher, - )); + )?); } else if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { - addrspace.grants.insert(Grant::zeroed(base_page, size / PAGE_SIZE, page_flags(flags), &mut mapper, flusher)?); + addrspace.grants.insert(Grant::zeroed(base_page, size / PAGE_SIZE, page_flags(flags), &mut addrspace.table.utable, flusher)?); } // TODO: Set some "in use" flag every time an address space is switched to? This @@ -1104,16 +1107,14 @@ impl Scheme for ProcScheme { let prev_addr_space = context.set_addr_space(new); - if let Some(prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { + if let Some(mut prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { // We are the last reference to the address space; therefore it must be // unmapped. - let mut table = unsafe { InactivePageTable::from_address(prev.frame.utable.start_address().data()) }; - // TODO: Optimize away clearing of page tables? In that case, what about memory // deallocation? for grant in prev.grants.into_iter() { - grant.unmap(&mut table.mapper(), ()); + grant.unmap(&mut prev.table.utable, ()); } } diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 2a6fd212..e55c9037 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -8,9 +8,9 @@ use spin::{Mutex, RwLock}; use crate::context::{self, Context}; use crate::context::file::FileDescriptor; -use crate::context::memory::{DANGLING, page_flags, round_down_pages, round_up_pages, Grant, Region, GrantFileRef}; +use crate::context::memory::{DANGLING, page_flags, Grant, Region, GrantFileRef}; use crate::event; -use crate::paging::{ActivePageTable, PAGE_SIZE, InactivePageTable, mapper::InactiveFlusher, Page, VirtualAddress}; +use crate::paging::{PAGE_SIZE, mapper::InactiveFlusher, Page, round_down_pages, round_up_pages, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; use crate::sync::{WaitQueue, WaitMap}; use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec}; @@ -147,8 +147,7 @@ impl UserInner { let mut context = context_lock.write(); let mut addr_space = context.addr_space()?.write(); - - let mut new_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; + let addr_space = &mut *addr_space; let src_address = round_down_pages(address); let dst_address = round_down_pages(dst_address); @@ -156,6 +155,11 @@ impl UserInner { let aligned_size = round_up_pages(offset + size); let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), aligned_size, flags)?; + let current_addrspace = Arc::clone( + context::contexts().current().ok_or(Error::new(ESRCH))? + .read().addr_space()? + ); + //TODO: Use syscall_head and syscall_tail to avoid leaking data addr_space.grants.insert(Grant::borrow( Page::containing_address(VirtualAddress::new(src_address)), @@ -163,10 +167,10 @@ impl UserInner { aligned_size / PAGE_SIZE, page_flags(flags), desc_opt, - &mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, - &mut new_table.mapper(), + &mut current_addrspace.write().table.utable, + &mut addr_space.table.utable, InactiveFlusher::new(), - )); + )?); Ok(VirtualAddress::new(dst_region.start_address().data() + offset)) } @@ -176,16 +180,15 @@ impl UserInner { return Ok(()); } let context_lock = self.context.upgrade().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); + let context = context_lock.write(); let mut addr_space = context.addr_space()?.write(); - let mut other_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; let region = match addr_space.grants.contains(VirtualAddress::new(address)).map(Region::from) { Some(region) => region, None => return Err(Error::new(EFAULT)), }; - addr_space.grants.take(®ion).unwrap().unmap(&mut other_table.mapper(), InactiveFlusher::new()); + addr_space.grants.take(®ion).unwrap().unmap(&mut addr_space.table.utable, InactiveFlusher::new()); Ok(()) } -- GitLab From c912d9e0dba91653ad47b487ce9757f3606caa4a Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:13:34 +0200 Subject: [PATCH 34/44] Partial: migrate syscall handling code to RMM. --- src/syscall/driver.rs | 33 ++++++++++++++------------ src/syscall/fs.rs | 14 +++++------ src/syscall/futex.rs | 14 +++++------ src/syscall/process.rs | 51 ++++++++++++++++++++++------------------- src/syscall/validate.rs | 22 ++++++++++++++---- 5 files changed, 76 insertions(+), 58 deletions(-) diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 103602a8..325e413a 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -1,12 +1,14 @@ use crate::interrupt::InterruptStack; use crate::memory::{allocate_frames_complex, deallocate_frames, Frame, PAGE_SIZE}; -use crate::paging::{ActivePageTable, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{Page, PageFlags, PhysicalAddress, VirtualAddress, mapper::PageFlushAll}; use crate::paging::entry::EntryFlags; use crate::context; use crate::context::memory::{DANGLING, Grant, Region}; use crate::syscall::error::{Error, EFAULT, EINVAL, ENOMEM, EPERM, ESRCH, Result}; use crate::syscall::flag::{PhysallocFlags, PartialAllocStrategy, PhysmapFlags, PHYSMAP_WRITE, PHYSMAP_WRITE_COMBINE, PHYSMAP_NO_CACHE}; +use alloc::sync::Arc; + fn enforce_root() -> Result<()> { let contexts = context::contexts(); let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; @@ -84,11 +86,13 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) } // TODO: Enforce size being a multiple of the page size, fail otherwise. + let addr_space = Arc::clone(context::current()?.read().addr_space()?); let contexts = context::contexts(); let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); let mut addr_space = context.addr_space()?.write(); + let addr_space = &mut *addr_space; let dst_address = addr_space.grants.find_free(size).ok_or(Error::new(ENOMEM))?; @@ -105,11 +109,13 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) } addr_space.grants.insert(Grant::physmap( - PhysicalAddress::new(physical_address), - dst_address.start_address(), - size, + Frame::containing_address(PhysicalAddress::new(physical_address)), + Page::containing_address(dst_address.start_address()), + size / PAGE_SIZE, page_flags, - )); + &mut addr_space.table.utable, + PageFlushAll::new(), + )?); Ok(dst_address.start_address().data()) } @@ -123,16 +129,12 @@ pub fn inner_physunmap(virtual_address: usize) -> Result { if virtual_address == 0 { Ok(0) } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut addr_space = context.addr_space()?.write(); + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let mut addr_space = addr_space.write(); if let Some(region) = addr_space.grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind}; - addr_space.grants.take(®ion).unwrap().unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, PageFlushAll::new()); + addr_space.grants.take(®ion).unwrap().unmap(&mut addr_space.table.utable, PageFlushAll::new()); return Ok(0); } @@ -147,10 +149,11 @@ pub fn physunmap(virtual_address: usize) -> Result { pub fn virttophys(virtual_address: usize) -> Result { enforce_root()?; - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(virtual_address).kind()) }; + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let addr_space = addr_space.read(); - match active_table.translate(VirtualAddress::new(virtual_address)) { - Some(physical_address) => Ok(physical_address.data()), + match addr_space.table.utable.translate(VirtualAddress::new(virtual_address)) { + Some((physical_address, _)) => Ok(physical_address.data()), None => Err(Error::new(EFAULT)) } } diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 38e2cd31..7b8b052b 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -8,7 +8,7 @@ use crate::context::file::{FileDescriptor, FileDescription}; use crate::context::memory::Region; use crate::context; use crate::memory::PAGE_SIZE; -use crate::paging::{ActivePageTable, mapper::PageFlushAll, TableKind, VirtualAddress}; +use crate::paging::{mapper::PageFlushAll, VirtualAddress}; use crate::scheme::{self, FileHandle}; use crate::syscall::data::{Packet, Stat}; use crate::syscall::error::*; @@ -486,12 +486,12 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { let context = context_lock.read(); let mut addr_space = context.addr_space()?.write(); - let grants = &mut addr_space.grants; + let addr_space = &mut *addr_space; - let conflicting: Vec = grants.conflicts(requested).map(Region::from).collect(); + let conflicting: Vec = addr_space.grants.conflicts(requested).map(Region::from).collect(); for conflict in conflicting { - let grant = grants.take(&conflict).expect("conflicting region didn't exist"); + let grant = addr_space.grants.take(&conflict).expect("conflicting region didn't exist"); let intersection = grant.intersect(requested); let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); @@ -502,14 +502,14 @@ pub fn funmap(virtual_address: usize, length: usize) -> Result { // Keep untouched regions if let Some(before) = before { - grants.insert(before); + addr_space.grants.insert(before); } if let Some(after) = after { - grants.insert(after); + addr_space.grants.insert(after); } // Remove irrelevant region - grant.unmap(&mut *unsafe { ActivePageTable::new(TableKind::User) }, &mut flusher); + grant.unmap(&mut addr_space.table.utable, &mut flusher); } } diff --git a/src/syscall/futex.rs b/src/syscall/futex.rs index 53fd2837..b3fde4bb 100644 --- a/src/syscall/futex.rs +++ b/src/syscall/futex.rs @@ -12,7 +12,7 @@ use rmm::Arch; use crate::context::{self, Context}; use crate::time; use crate::memory::PhysicalAddress; -use crate::paging::{ActivePageTable, TableKind, VirtualAddress}; +use crate::paging::VirtualAddress; use crate::syscall::data::TimeSpec; use crate::syscall::error::{Error, Result, ESRCH, EAGAIN, EFAULT, EINVAL}; use crate::syscall::flag::{FUTEX_WAIT, FUTEX_WAIT64, FUTEX_WAKE, FUTEX_REQUEUE}; @@ -44,8 +44,9 @@ pub fn futexes_mut() -> RwLockWriteGuard<'static, FutexList> { } pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> Result { - let target_physaddr = unsafe { - let active_table = ActivePageTable::new(TableKind::User); + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + + let (target_physaddr, _) = unsafe { let virtual_address = VirtualAddress::new(addr); if !crate::CurrentRmmArch::virt_is_valid(virtual_address) { @@ -58,7 +59,7 @@ pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> R return Err(Error::new(EFAULT)); } - active_table.translate(virtual_address).ok_or(Error::new(EFAULT))? + addr_space.read().table.utable.translate(virtual_address).ok_or(Error::new(EFAULT))? }; match op { @@ -162,7 +163,7 @@ pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> R Ok(woken) }, FUTEX_REQUEUE => { - let addr2_physaddr = unsafe { + let (addr2_physaddr, _) = unsafe { let addr2_virt = VirtualAddress::new(addr2); if !crate::CurrentRmmArch::virt_is_valid(addr2_virt) { @@ -175,8 +176,7 @@ pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> R return Err(Error::new(EFAULT)); } - let active_table = ActivePageTable::new(TableKind::User); - active_table.translate(addr2_virt).ok_or(Error::new(EFAULT))? + addr_space.read().table.utable.translate(addr2_virt).ok_or(Error::new(EFAULT))? }; let mut woken = 0; diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 4dd223ff..fdfe7fda 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -1,5 +1,4 @@ use alloc::{ - boxed::Box, sync::Arc, vec::Vec, }; @@ -13,7 +12,7 @@ use crate::Bootstrap; use crate::context; use crate::interrupt; use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, RmmArch, TableKind, VirtualAddress, PAGE_SIZE}; +use crate::paging::{Page, PageFlags, VirtualAddress, PAGE_SIZE}; use crate::ptrace; use crate::start::usermode; use crate::syscall::data::SigAction; @@ -38,16 +37,16 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu None => return context, }; - if let Ok(addr_space) = Arc::try_unwrap(addr_space_arc).map(RwLock::into_inner) { + if let Ok(mut addr_space) = Arc::try_unwrap(addr_space_arc).map(RwLock::into_inner) { + let mapper = &mut addr_space.table.utable; + for grant in addr_space.grants.into_iter() { let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); - let mut new_table = unsafe { InactivePageTable::from_address(addr_space.frame.utable.start_address().data()) }; - - grant.unmap(&mut new_table.mapper(), &mut InactiveFlusher::new()) + grant.unmap(mapper, &mut InactiveFlusher::new()) } else { - grant.unmap(&mut *unsafe { ActivePageTable::new(rmm::TableKind::User) }, PageFlushAll::new()) + grant.unmap(mapper, PageFlushAll::new()) }; if unmap_result.file_desc.is_some() { @@ -294,20 +293,20 @@ pub fn kill(pid: ContextId, sig: usize) -> Result { pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result { // println!("mprotect {:#X}, {}, {:#X}", address, size, flags); - let end_offset = size.checked_sub(1).ok_or(Error::new(EFAULT))?; - let end_address = address.checked_add(end_offset).ok_or(Error::new(EFAULT))?; + let end_address = address.checked_add(size).ok_or(Error::new(EFAULT))?; - let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; + let address_space = Arc::clone(context::current()?.read().addr_space()?); + let mut address_space = address_space.write(); let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(address)); let end_page = Page::containing_address(VirtualAddress::new(end_address)); - for page in Page::range_inclusive(start_page, end_page) { + for page in Page::range_exclusive(start_page, end_page) { // Check if the page is actually mapped before trying to change the flags. // FIXME can other processes change if a page is mapped beneath our feet? - let mut page_flags = if let Some(page_flags) = active_table.translate_page_flags(page) { - page_flags + let mut page_flags = if let Some((_, flags)) = address_space.table.utable.translate(page.start_address()) { + flags } else { flush_all.flush(); return Err(Error::new(EFAULT)); @@ -335,7 +334,7 @@ pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result { //TODO: No flags for readable pages } - let flush = active_table.remap(page, page_flags); + let flush = unsafe { address_space.table.utable.remap(page.start_address(), page_flags).expect("failed to remap page in mprotect") }; flush_all.consume(flush); } @@ -629,18 +628,22 @@ pub unsafe fn usermode_bootstrap(bootstrap: &Bootstrap) -> ! { assert_ne!(bootstrap.page_count, 0); { - let grant = context::memory::Grant::physmap( - bootstrap.base.start_address(), - VirtualAddress::new(0), - bootstrap.page_count * PAGE_SIZE, - PageFlags::new().user(true).write(true).execute(true), - ); - - context::contexts().current() + let addr_space = Arc::clone(context::contexts().current() .expect("expected a context to exist when executing init") .read().addr_space() - .expect("expected bootstrap context to have an address space") - .write().grants.insert(grant); + .expect("expected bootstrap context to have an address space")); + + let mut addr_space = addr_space.write(); + let addr_space = &mut *addr_space; + + addr_space.grants.insert(context::memory::Grant::physmap( + bootstrap.base.clone(), + Page::containing_address(VirtualAddress::new(0)), + bootstrap.page_count, + PageFlags::new().user(true).write(true).execute(true), + &mut addr_space.table.utable, + PageFlushAll::new(), + ).expect("failed to physmap bootstrap memory")); } #[cfg(target_arch = "x86_64")] diff --git a/src/syscall/validate.rs b/src/syscall/validate.rs index 2aac27a4..f4a05543 100644 --- a/src/syscall/validate.rs +++ b/src/syscall/validate.rs @@ -1,24 +1,36 @@ +// TODO: Maybe stop handing out slices and instead use a wrapper type that supports copying etc. +// Invalid pages will cause page faults, which can be handled so that they are caught and EFAULT is +// returned. This will also make SMAP much, much, easier. c.f. Linux's copy_from_user, copy_to_user +// which are written in assembly and handle page faults. use core::{mem, slice, str}; -use crate::paging::{ActivePageTable, Page, VirtualAddress}; +use crate::context; +use crate::paging::{Page, TableKind, VirtualAddress}; use crate::syscall::error::*; +use alloc::sync::Arc; + fn validate(address: usize, size: usize, writable: bool) -> Result<()> { + if VirtualAddress::new(address.saturating_add(size)).kind() != TableKind::User { + return Err(Error::new(EFAULT)); + } + let end_offset = size.checked_sub(1).ok_or(Error::new(EFAULT))?; let end_address = address.checked_add(end_offset).ok_or(Error::new(EFAULT))?; - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(address).kind()) }; + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let addr_space = addr_space.read(); let start_page = Page::containing_address(VirtualAddress::new(address)); let end_page = Page::containing_address(VirtualAddress::new(end_address)); for page in Page::range_inclusive(start_page, end_page) { - if let Some(page_flags) = active_table.translate_page_flags(page) { - if ! page_flags.has_user() { + if let Some((_, flags)) = addr_space.table.utable.translate(page.start_address()) { + if !flags.has_user() { // println!("{:X}: Not usermode", page.start_address().data()); return Err(Error::new(EFAULT)); } - if writable && ! page_flags.has_write() { + if writable && !flags.has_write() { // println!("{:X}: Not writable {}", page.start_address().data(), writable); return Err(Error::new(EFAULT)); } -- GitLab From dc8ce1c22b6161bb5fb2e502efd85ab2e1312f4d Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:13:46 +0200 Subject: [PATCH 35/44] Partial: migrate context handling code to RMM. --- src/context/arch/x86_64.rs | 3 +- src/context/context.rs | 11 +- src/context/list.rs | 6 +- src/context/memory.rs | 203 +++++++++++++++++++++---------------- src/context/mod.rs | 12 ++- 5 files changed, 131 insertions(+), 104 deletions(-) diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 85855dc9..97d14a94 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -172,8 +172,7 @@ pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { // Unless we acquire this lock, it may be possible that the TLB will not contain new // entries. While this can be caught and corrected in a page fault handler, this is not // true when entries are removed from a page table! - let next_space = next_space.read(); - RmmA::set_table(next_space.frame.utable.start_address()); + next_space.read().table.utable.make_current(); } None => { RmmA::set_table(empty_cr3()); diff --git a/src/context/context.rs b/src/context/context.rs index 76248d6b..090b477b 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -12,18 +12,18 @@ use core::{ }; use spin::RwLock; -use crate::arch::{interrupt::InterruptStack, paging::{PAGE_SIZE, RmmA, RmmArch}}; +use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE}; use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::{AddrSpace, new_addrspace, UserGrants}; +use crate::context::memory::AddrSpace; use crate::ipi::{ipi, IpiKind, IpiTarget}; use crate::memory::Enomem; use crate::scheme::{SchemeNamespace, FileHandle}; use crate::sync::WaitMap; use crate::syscall::data::SigAction; -use crate::syscall::error::{Result, Error, ENOMEM, ESRCH}; +use crate::syscall::error::{Result, Error, ESRCH}; use crate::syscall::flag::{SIG_DFL, SigActionFlags}; /// Unique identifier for a context (i.e. `pid`). @@ -556,11 +556,8 @@ impl Context { } #[must_use = "grants must be manually unmapped, otherwise it WILL panic!"] pub fn set_addr_space(&mut self, addr_space: Arc>) -> Option>> { - let physaddr = addr_space.read().frame.utable.start_address(); if self.id == super::context_id() { - unsafe { - RmmA::set_table(physaddr); - } + unsafe { addr_space.read().table.utable.make_current(); } } self.addr_space.replace(addr_space) diff --git a/src/context/list.rs b/src/context/list.rs index 6594e4c7..e900ebcf 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -1,13 +1,11 @@ use alloc::sync::Arc; -use alloc::boxed::Box; use alloc::collections::BTreeMap; -use core::alloc::{GlobalAlloc, Layout}; use core::{iter, mem}; use core::sync::atomic::Ordering; -use crate::paging::{ActivePageTable, TableKind}; + use spin::RwLock; -use crate::syscall::error::{Result, Error, EAGAIN, ENOMEM}; +use crate::syscall::error::{Result, Error, EAGAIN}; use super::context::{Context, ContextId}; /// Context list type diff --git a/src/context/memory.rs b/src/context/memory.rs index eaed5db0..c9580027 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -1,12 +1,10 @@ use alloc::collections::{BTreeMap, BTreeSet}; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::borrow::Borrow; use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; -use core::intrinsics; use core::ops::Deref; -use core::sync::atomic; -use spin::{Mutex, RwLock}; +use spin::RwLock; use syscall::{ flag::MapFlags, error::*, @@ -15,18 +13,9 @@ use rmm::Arch as _; use crate::arch::paging::PAGE_SIZE; use crate::context::file::FileDescriptor; -use crate::memory::Frame; -use crate::paging::mapper::{Flusher, InactiveFlusher, Mapper, PageFlushAll}; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, TableKind, VirtualAddress}; - -/// Round down to the nearest multiple of page size -pub fn round_down_pages(number: usize) -> usize { - number - number % PAGE_SIZE -} -/// Round up to the nearest multiple of page size -pub fn round_up_pages(number: usize) -> usize { - round_down_pages(number + PAGE_SIZE - 1) -} +use crate::memory::{Enomem, Frame}; +use crate::paging::mapper::{Flusher, PageFlushAll}; +use crate::paging::{KernelMapper, Page, PageFlags, PageIter, PageMapper, PhysicalAddress, RmmA, round_up_pages, VirtualAddress}; pub fn page_flags(flags: MapFlags) -> PageFlags { PageFlags::new() @@ -61,25 +50,20 @@ pub fn new_addrspace() -> Result>> { #[derive(Debug)] pub struct AddrSpace { - pub frame: Tables, + pub table: Table, pub grants: UserGrants, } impl AddrSpace { /// Attempt to clone an existing address space so that all mappings are copied (CoW). - pub fn try_clone(&self) -> Result>> { + pub fn try_clone(&mut self) -> Result>> { let mut new = new_addrspace()?; - // TODO: Abstract away this. - let (mut inactive, mut active); + let new_guard = Arc::get_mut(&mut new) + .expect("expected new address space Arc not to be aliased") + .get_mut(); - let mut this_mapper = if self.is_current() { - active = unsafe { ActivePageTable::new(rmm::TableKind::User) }; - active.mapper() - } else { - inactive = unsafe { InactivePageTable::from_address(self.frame.utable.start_address().data()) }; - inactive.mapper() - }; - let mut new_mapper = unsafe { InactivePageTable::from_address(new.read().frame.utable.start_address().data()) }; + let this_mapper = &mut self.table.utable; + let new_mapper = &mut new_guard.table.utable; for grant in self.grants.iter() { if grant.desc_opt.is_some() { continue; } @@ -88,11 +72,11 @@ impl AddrSpace { // TODO: Replace this with CoW if grant.owned { - new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), &mut new_mapper.mapper(), ())?; + new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), new_mapper, ())?; - for page in new_grant.pages() { - let current_frame = unsafe { RmmA::phys_to_virt(this_mapper.translate_page(page).expect("grant containing unmapped pages").start_address()) }.data() as *const u8; - let new_frame = unsafe { RmmA::phys_to_virt(new_mapper.mapper().translate_page(page).expect("grant containing unmapped pages").start_address()) }.data() as *mut u8; + for page in new_grant.pages().map(Page::start_address) { + let current_frame = unsafe { RmmA::phys_to_virt(this_mapper.translate(page).expect("grant containing unmapped pages").0) }.data() as *const u8; + let new_frame = unsafe { RmmA::phys_to_virt(new_mapper.translate(page).expect("grant containing unmapped pages").0) }.data() as *mut u8; unsafe { new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE); @@ -102,21 +86,21 @@ impl AddrSpace { // TODO: Remove reborrow? In that case, physmapped memory will need to either be // remapped when cloning, or be backed by a file descriptor (like // `memory:physical`). - new_grant = Grant::reborrow(&grant, Page::containing_address(grant.start_address()), &mut this_mapper, &mut new_mapper.mapper(), ()); + new_grant = Grant::reborrow(&grant, Page::containing_address(grant.start_address()), this_mapper, new_mapper, ())?; } - new.write().grants.insert(new_grant); + new_guard.grants.insert(new_grant); } Ok(new) } pub fn new() -> Result { Ok(Self { grants: UserGrants::new(), - frame: setup_new_utable()?, + table: setup_new_utable()?, }) } pub fn is_current(&self) -> bool { - self.frame.utable.start_address() == unsafe { RmmA::table() } + self.table.utable.is_current() } } @@ -477,46 +461,42 @@ impl Grant { &mut self.region } - pub fn physmap(from: PhysicalAddress, to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - - let mut flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - to.data() + from.data())); - let result = active_table.map_to(page, frame, flags); - flush_all.consume(result); + pub fn physmap(phys: Frame, dst: Page, page_count: usize, flags: PageFlags, mapper: &mut PageMapper, mut flusher: impl Flusher) -> Result { + for index in 0..page_count { + let result = unsafe { + mapper + .map_phys(dst.next_by(index).start_address(), phys.next_by(index).start_address(), flags) + .expect("TODO: handle OOM from paging structures in physmap") + }; + flusher.consume(result); } - flush_all.flush(); - - Grant { + Ok(Grant { region: Region { - start: to, - size, + start: dst.start_address(), + size: page_count * PAGE_SIZE, }, flags, mapped: true, owned: false, desc_opt: None, - } + }) } - pub fn zeroed(dst: Page, page_count: usize, flags: PageFlags, mapper: &mut Mapper, mut flusher: impl Flusher) -> Result { + pub fn zeroed(dst: Page, page_count: usize, flags: PageFlags, mapper: &mut PageMapper, mut flusher: impl Flusher) -> Result { + // TODO: Unmap partially in case of ENOMEM for page in Page::range_exclusive(dst, dst.next_by(page_count)) { - let flush = mapper.map(page, flags).map_err(|_| Error::new(ENOMEM))?; + let flush = unsafe { mapper.map(page.start_address(), flags) }.ok_or(Enomem)?; flusher.consume(flush); } Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None }) } - pub fn borrow(src_base: Page, dst_base: Page, page_count: usize, flags: PageFlags, desc_opt: Option, src_mapper: &mut Mapper, dst_mapper: &mut Mapper, dst_flusher: impl Flusher) -> Grant { + pub fn borrow(src_base: Page, dst_base: Page, page_count: usize, flags: PageFlags, desc_opt: Option, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { Self::copy_inner(src_base, dst_base, page_count, flags, desc_opt, src_mapper, dst_mapper, (), dst_flusher, false, false) } - pub fn reborrow(src_grant: &Grant, dst_base: Page, src_mapper: &mut Mapper, dst_mapper: &mut Mapper, dst_flusher: impl Flusher) -> Grant { + pub fn reborrow(src_grant: &Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { Self::borrow(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), src_grant.desc_opt.clone(), src_mapper, dst_mapper, dst_flusher) } - pub fn transfer(mut src_grant: Grant, dst_base: Page, src_mapper: &mut Mapper, dst_mapper: &mut Mapper, src_flusher: impl Flusher, dst_flusher: impl Flusher) -> Grant { + pub fn transfer(mut src_grant: Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, src_flusher: impl Flusher, dst_flusher: impl Flusher) -> Result { assert!(core::mem::replace(&mut src_grant.mapped, false)); let desc_opt = src_grant.desc_opt.take(); @@ -529,28 +509,54 @@ impl Grant { page_count: usize, flags: PageFlags, desc_opt: Option, - src_mapper: &mut Mapper, - dst_mapper: &mut Mapper, + src_mapper: &mut PageMapper, + dst_mapper: &mut PageMapper, mut src_flusher: impl Flusher, mut dst_flusher: impl Flusher, owned: bool, unmap: bool, - ) -> Grant { + ) -> Result { + let mut successful_count = 0; + for index in 0..page_count { let src_page = src_base.next_by(index); - let frame = if unmap { - let (flush, frame) = src_mapper.unmap_return(src_page, false); + let (address, entry_flags) = if unmap { + let (entry, entry_flags, flush) = unsafe { src_mapper.unmap_phys(src_page.start_address()).expect("grant references unmapped memory") }; src_flusher.consume(flush); - frame + + (entry, entry_flags) } else { - src_mapper.translate_page(src_page).expect("grant references unmapped memory") + src_mapper.translate(src_page.start_address()).expect("grant references unmapped memory") + }; + + let flush = match unsafe { dst_mapper.map_phys(dst_base.next_by(index).start_address(), address, flags) } { + Some(f) => f, + // ENOMEM + None => break, }; - let flush = dst_mapper.map_to(dst_base.next_by(index), frame, flags); dst_flusher.consume(flush); + + successful_count = index + 1; + } + + if successful_count != page_count { + // TODO: The grant will be lost in case of ENOMEM. Allow putting it back in source? + for index in 0..successful_count { + let (frame, _, flush) = match unsafe { dst_mapper.unmap_phys(dst_base.next_by(index).start_address()) } { + Some(f) => f, + None => unreachable!("grant unmapped by someone else in the meantime despite having a &mut PageMapper"), + }; + dst_flusher.consume(flush); + + if owned { + crate::memory::deallocate_frames(Frame::containing_address(frame), 1); + } + } + return Err(Enomem); } - Grant { + Ok(Grant { region: Region { start: dst_base.start_address(), size: page_count * PAGE_SIZE, @@ -559,23 +565,38 @@ impl Grant { mapped: true, owned, desc_opt, - } + }) } pub fn flags(&self) -> PageFlags { self.flags } - pub fn unmap(mut self, mapper: &mut Mapper, mut flusher: impl Flusher) -> UnmapResult { + pub fn unmap(mut self, mapper: &mut PageMapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); for page in self.pages() { - let (result, frame) = mapper.unmap_return(page, false); + let (entry, _, flush) = unsafe { mapper.unmap_phys(page.start_address()) } + .unwrap_or_else(|| panic!("missing page at {:#0x} for grant {:?}", page.start_address().data(), self)); + if self.owned { - //TODO: make sure this frame can be safely freed, physical use counter - crate::memory::deallocate_frames(frame, 1); + // TODO: make sure this frame can be safely freed, physical use counter. + // + // Namely, we can either have MAP_PRIVATE or MAP_SHARED-style mappings. The former + // maps the source memory read-only and then (not yet) implements CoW on top (as of + // now the kernel does not yet support this distinction), while the latter simply + // means the memory is shared. We can in addition to the desc_opt also include an + // address space and region within, indicating borrowed memory. The source grant + // will have a refcount, and if it is unmapped, it will be transferred to a + // borrower. Only if this refcount becomes zero when decremented, will it be + // possible to unmap. + // + // So currently, it is technically possible to get double frees if the scheme + // "hosting" the memory of an fmap call, decides to funmap its memory before the + // fmapper does. + crate::memory::deallocate_frames(Frame::containing_address(entry), 1); } - flusher.consume(result); + flusher.consume(flush); } self.mapped = false; @@ -663,32 +684,38 @@ impl Drop for Grant { pub const DANGLING: usize = 1 << (usize::BITS - 2); #[derive(Debug)] -pub struct Tables { - pub utable: Frame, +pub struct Table { + pub utable: PageMapper, } -impl Drop for Tables { +impl Drop for Table { fn drop(&mut self) { - use crate::memory::deallocate_frames; - deallocate_frames(Frame::containing_address(PhysicalAddress::new(self.utable.start_address().data())), 1); + if self.utable.is_current() { + // TODO: Do not flush (we immediately context switch after exit(), what else is there + // to do?). Instead, we can garbage-collect such page tables in the idle kernel context + // before it waits for interrupts. Or maybe not, depends on what future benchmarks will + // indicate. + unsafe { + RmmA::set_table(super::empty_cr3()); + } + } + crate::memory::deallocate_frames(Frame::containing_address(self.utable.table().phys()), 1); } } /// Allocates a new identically mapped ktable and empty utable (same memory on x86_64). -pub fn setup_new_utable() -> Result { - let mut new_utable = crate::memory::allocate_frames(1).ok_or(Error::new(ENOMEM))?; +pub fn setup_new_utable() -> Result { + let mut utable = unsafe { PageMapper::create(crate::rmm::FRAME_ALLOCATOR).ok_or(Error::new(ENOMEM))? }; #[cfg(target_arch = "x86_64")] { - let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - let mut new_ktable = unsafe { InactivePageTable::from_address(new_utable.start_address().data()) }; + let active_ktable = KernelMapper::lock(); - let mut copy_mapping = |p4_no| { - let frame = active_ktable.p4()[p4_no].pointed_frame() + let mut copy_mapping = |p4_no| unsafe { + let entry = active_ktable.table().entry(p4_no) .unwrap_or_else(|| panic!("expected kernel PML {} to be mapped", p4_no)); - let flags = active_ktable.p4()[p4_no].flags(); - new_ktable.mapper().p4_mut()[p4_no].set(frame, flags); + utable.table().set_entry(p4_no, entry) }; // TODO: Just copy all 256 mappings? Or copy KERNEL_PML4+KERNEL_PERCPU_PML4 (needed for // paranoid ISRs which can occur anywhere; we don't want interrupts to triple fault!) and @@ -707,8 +734,8 @@ pub fn setup_new_utable() -> Result { copy_mapping(crate::KERNEL_PERCPU_PML4); } - Ok(Tables { - utable: new_utable, + Ok(Table { + utable, }) } diff --git a/src/context/mod.rs b/src/context/mod.rs index c282c8a2..dea6e235 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -1,12 +1,14 @@ //! # Context management //! //! For resources on contexts, please consult [wikipedia](https://en.wikipedia.org/wiki/Context_switch) and [osdev](https://wiki.osdev.org/Context_Switching) -use alloc::boxed::Box; -use core::alloc::{GlobalAlloc, Layout}; use core::sync::atomic::Ordering; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +use alloc::sync::Arc; + +use spin::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use crate::paging::{RmmA, RmmArch}; +use crate::syscall::error::{Error, ESRCH, Result}; pub use self::context::{Context, ContextId, ContextSnapshot, Status, WaitpidKey}; pub use self::list::ContextList; @@ -89,3 +91,7 @@ pub fn context_id() -> ContextId { core::sync::atomic::compiler_fence(Ordering::Acquire); id } + +pub fn current() -> Result>> { + contexts().current().ok_or(Error::new(ESRCH)).map(Arc::clone) +} -- GitLab From e60321d4a0dce392b3abfe65cd08d02c168fe758 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sun, 17 Jul 2022 14:14:20 +0200 Subject: [PATCH 36/44] Partial: migrate remaining parts to RMM. --- src/memory/mod.rs | 15 ++++++++++----- src/ptrace.rs | 19 ++++++------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/memory/mod.rs b/src/memory/mod.rs index e72f5afd..f8bd8b1f 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -3,7 +3,7 @@ use core::cmp; -use crate::arch::rmm::FRAME_ALLOCATOR; +use crate::arch::rmm::LockedAllocator; pub use crate::paging::{PAGE_SIZE, PhysicalAddress}; use rmm::{ @@ -26,21 +26,21 @@ pub struct MemoryArea { /// Get the number of frames available pub fn free_frames() -> usize { unsafe { - FRAME_ALLOCATOR.usage().free().data() + LockedAllocator.usage().free().data() } } /// Get the number of frames used pub fn used_frames() -> usize { unsafe { - FRAME_ALLOCATOR.usage().used().data() + LockedAllocator.usage().used().data() } } /// Allocate a range of frames pub fn allocate_frames(count: usize) -> Option { unsafe { - FRAME_ALLOCATOR.allocate(FrameCount::new(count)).map(|phys| { + LockedAllocator.allocate(FrameCount::new(count)).map(|phys| { Frame::containing_address(PhysicalAddress::new(phys.data())) }) } @@ -65,7 +65,7 @@ pub fn allocate_frames_complex(count: usize, flags: PhysallocFlags, strategy: Op /// Deallocate a range of frames frame pub fn deallocate_frames(frame: Frame, count: usize) { unsafe { - FRAME_ALLOCATOR.free( + LockedAllocator.free( rmm::PhysicalAddress::new(frame.start_address().data()), FrameCount::new(count) ); @@ -103,6 +103,11 @@ impl Frame { pub fn range_inclusive(start: Frame, end: Frame) -> FrameIter { FrameIter { start, end } } + pub fn next_by(&self, n: usize) -> Self { + Self { + number: self.number + n, + } + } } pub struct FrameIter { diff --git a/src/ptrace.rs b/src/ptrace.rs index 302646e8..f1f01189 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -7,10 +7,7 @@ use rmm::Arch; use crate::{ arch::{ interrupt::InterruptStack, - paging::{ - mapper::PageFlushAll, - ActivePageTable, InactivePageTable, Page, PAGE_SIZE, TableKind, VirtualAddress - } + paging::{PAGE_SIZE, VirtualAddress}, }, common::unique::Unique, context::{self, signal, Context, ContextId, memory::AddrSpace}, @@ -34,12 +31,8 @@ use alloc::{ btree_map::Entry }, sync::Arc, - vec::Vec -}; -use core::{ - cmp, - sync::atomic::Ordering }; +use core::cmp; use spin::{Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; // ____ _ @@ -473,8 +466,6 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator impl Iterator> + '_ { - let mut table = unsafe { InactivePageTable::from_address(addrspace.frame.utable.start_address().data()) }; - // TODO: Iterate over grants instead to avoid yielding None too many times. What if // context_memory is used for an entire process's address space, where the stack is at the very // end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then @@ -485,8 +476,10 @@ pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: us //log::info!("ADDR {:p} LEN {:#0x}", page as *const u8, len); - let frame = table.mapper().translate_page(Page::containing_address(VirtualAddress::new(addr)))?; - let start = RmmA::phys_to_virt(frame.start_address()).data() + addr % crate::memory::PAGE_SIZE; + // FIXME: verify flags before giving out slice + let (address, _flags) = addrspace.table.utable.translate(VirtualAddress::new(addr))?; + + let start = RmmA::phys_to_virt(address).data() + addr % crate::memory::PAGE_SIZE; Some(core::ptr::slice_from_raw_parts_mut(start as *mut u8, len)) }) } -- GitLab From bf82387f3bff734eda1e4ea0f306cf0b7a611776 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 18 Jul 2022 11:29:11 +0200 Subject: [PATCH 37/44] Fix KernelMapper unlocking code. --- src/arch/x86_64/rmm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/x86_64/rmm.rs b/src/arch/x86_64/rmm.rs index f3ef63a2..5fc741a2 100644 --- a/src/arch/x86_64/rmm.rs +++ b/src/arch/x86_64/rmm.rs @@ -321,7 +321,7 @@ impl core::ops::Deref for KernelMapper { } impl Drop for KernelMapper { fn drop(&mut self) { - if LOCK_COUNT.fetch_sub(1, Ordering::Relaxed) == 0 { + if LOCK_COUNT.fetch_sub(1, Ordering::Relaxed) == 1 { LOCK_OWNER.store(NO_PROCESSOR, Ordering::Release); } atomic::compiler_fence(Ordering::Release); -- GitLab From b7665018963a706ef4ac7aef15cc15fa3e644176 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 18 Jul 2022 11:29:45 +0200 Subject: [PATCH 38/44] Fix deadlock while starting APs. --- src/acpi/madt.rs | 15 +++++++++------ src/acpi/mod.rs | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/acpi/madt.rs b/src/acpi/madt.rs index af8db07c..4991b961 100644 --- a/src/acpi/madt.rs +++ b/src/acpi/madt.rs @@ -53,17 +53,20 @@ impl Madt { } if cfg!(feature = "multi_core") { - let mut mapper = KernelMapper::lock(); // Map trampoline let trampoline_frame = Frame::containing_address(PhysicalAddress::new(TRAMPOLINE)); let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); - let result = unsafe { + let (result, page_table_physaddr) = unsafe { //TODO: do not have writable and executable! - mapper + let mut mapper = KernelMapper::lock(); + + let result = mapper .get_mut() .expect("expected kernel page table not to be recursively locked while initializing MADT") .map_phys(trampoline_page.start_address(), trampoline_frame.start_address(), PageFlags::new().execute(true).write(true)) - .expect("failed to map trampoline") + .expect("failed to map trampoline"); + + (result, mapper.table().phys().data()) }; result.flush(); @@ -98,7 +101,7 @@ impl Madt { // Set the ap_ready to 0, volatile unsafe { atomic_store(ap_ready, 0) }; unsafe { atomic_store(ap_cpu_id, ap_local_apic.id as u64) }; - unsafe { atomic_store(ap_page_table, mapper.table().phys().data() as u64) }; + unsafe { atomic_store(ap_page_table, page_table_physaddr as u64) }; unsafe { atomic_store(ap_stack_start, stack_start as u64) }; unsafe { atomic_store(ap_stack_end, stack_end as u64) }; unsafe { atomic_store(ap_code, kstart_ap as u64) }; @@ -156,7 +159,7 @@ impl Madt { // Unmap trampoline let (_frame, _, flush) = unsafe { - mapper + KernelMapper::lock() .get_mut() .expect("expected kernel page table not to be recursively locked while initializing MADT") .unmap_phys(trampoline_page.start_address()) diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index df8ee781..49b5bb8b 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -86,7 +86,6 @@ pub unsafe fn init(already_supplied_rsdps: Option<(u64, u64)>) { if let Some(rsdp) = rsdp_opt { info!("RSDP: {:?}", rsdp); let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock()); - dbg!(); for &c in rxsdt.signature.iter() { print!("{}", c as char); -- GitLab From 308c2cc711215a820039073e2aa180ee15fa43c4 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Mon, 18 Jul 2022 14:06:07 +0200 Subject: [PATCH 39/44] Support modifying processes' sigactions. This is, other than vfork, the last piece of functionality that the previous clone() offered (CLONE_SIGHAND) which previously was not implemented. --- src/context/context.rs | 19 ++++++----- src/scheme/mod.rs | 3 ++ src/scheme/proc.rs | 73 ++++++++++++++++++++++++++++++------------ src/syscall/process.rs | 6 +--- 4 files changed, 67 insertions(+), 34 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 090b477b..b9bc2332 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -368,14 +368,7 @@ impl Context { name: Arc::new(RwLock::new(String::new().into_boxed_str())), cwd: Arc::new(RwLock::new(String::new())), files: Arc::new(RwLock::new(Vec::new())), - actions: Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])), + actions: Self::empty_actions(), regs: None, ptrace_stop: false, sigstack: None, @@ -562,4 +555,14 @@ impl Context { self.addr_space.replace(addr_space) } + pub fn empty_actions() -> Arc>> { + Arc::new(RwLock::new(vec![( + SigAction { + sa_handler: unsafe { mem::transmute(SIG_DFL) }, + sa_mask: [0; 2], + sa_flags: SigActionFlags::empty(), + }, + 0 + ); 128])) + } } diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index fb3b7dc7..d5e1d5df 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -303,4 +303,7 @@ pub trait KernelScheme: Scheme + Send + Sync + 'static { fn as_addrspace(&self, number: usize) -> Result>> { Err(Error::new(EBADF)) } + fn as_sigactions(&self, number: usize) -> Result>>> { + Err(Error::new(EBADF)) + } } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 628dd44c..8cc67422 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -8,7 +8,7 @@ use crate::{ FloatRegisters, IntRegisters, EnvRegisters, - data::{Map, PtraceEvent, Stat}, + data::{Map, PtraceEvent, SigAction, Stat}, error::*, flag::*, scheme::{calc_seek_offset_usize, Scheme}, @@ -141,6 +141,10 @@ enum Operation { // TODO: Remove this once cross-scheme links are merged. That would allow acquiring a new // FD to access the file descriptor behind grants. GrantHandle { description: Arc> }, + + Sigactions(Arc>>), + CurrentSigactions, + AwaitingSigactionsChange(Arc>>), } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -150,7 +154,7 @@ enum Attr { } impl Operation { fn needs_child_process(&self) -> bool { - matches!(self, Self::Memory { .. } | Self::Regs(_) | Self::Trace | Self::Filetable { .. } | Self::AddrSpace { .. } | Self::CurrentAddrSpace | Self::CurrentFiletable) + matches!(self, Self::Memory { .. } | Self::Regs(_) | Self::Trace | Self::Filetable { .. } | Self::AddrSpace { .. } | Self::CurrentAddrSpace | Self::CurrentFiletable | Self::Sigactions(_) | Self::CurrentSigactions | Self::AwaitingSigactionsChange(_)) } fn needs_root(&self) -> bool { matches!(self, Self::Attr(_)) @@ -286,7 +290,7 @@ impl ProcScheme { let operation = match operation_str { Some("mem") => Operation::Memory { addrspace: current_addrspace()? }, Some("addrspace") => Operation::AddrSpace { addrspace: current_addrspace()? }, - Some("filetable") => Operation::Filetable { filetable: Arc::clone(&context::contexts().current().ok_or(Error::new(ESRCH))?.read().files) }, + Some("filetable") => Operation::Filetable { filetable: Arc::clone(&context::current()?.read().files) }, Some("current-addrspace") => Operation::CurrentAddrSpace, Some("current-filetable") => Operation::CurrentFiletable, Some("regs/float") => Operation::Regs(RegsKind::Float), @@ -300,6 +304,8 @@ impl ProcScheme { Some("uid") => Operation::Attr(Attr::Uid), Some("gid") => Operation::Attr(Attr::Gid), Some("open_via_dup") => Operation::OpenViaDup, + Some("sigactions") => Operation::Sigactions(Arc::clone(&context::current()?.read().actions)), + Some("current-sigactions") => Operation::CurrentSigactions, _ => return Err(Error::new(EINVAL)) }; @@ -420,6 +426,15 @@ impl Scheme for ProcScheme { handle.info.clone() }; + let handle = |operation, data| Handle { + info: Info { + flags: 0, + pid: info.pid, + operation, + }, + data, + }; + self.new_handle(match info.operation { Operation::OpenViaDup => { let (uid, gid) = match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read() { @@ -428,7 +443,7 @@ impl Scheme for ProcScheme { return self.open_inner(info.pid, Some(core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?).filter(|s| !s.is_empty()), O_RDWR | O_CLOEXEC, uid, gid); }, - Operation::Filetable { filetable } => { + Operation::Filetable { ref filetable } => { // TODO: Maybe allow userspace to either copy or transfer recently dupped file // descriptors between file tables. if buf != b"copy" { @@ -436,16 +451,9 @@ impl Scheme for ProcScheme { } let new_filetable = Arc::try_new(RwLock::new(filetable.read().clone())).map_err(|_| Error::new(ENOMEM))?; - Handle { - info: Info { - flags: 0, - pid: info.pid, - operation: Operation::Filetable { filetable: new_filetable }, - }, - data: OperationData::Other, - } + handle(Operation::Filetable { filetable: new_filetable }, OperationData::Other) } - Operation::AddrSpace { addrspace } => { + Operation::AddrSpace { ref addrspace } => { let (operation, is_mem) = match buf { // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But // in that case, what scheme? @@ -462,14 +470,16 @@ impl Scheme for ProcScheme { _ => return Err(Error::new(EINVAL)), }; - Handle { - info: Info { - flags: 0, - pid: info.pid, - operation, - }, - data: if is_mem { OperationData::Memory(MemData { offset: VirtualAddress::new(0) }) } else { OperationData::Offset(0) }, - } + + handle(operation, if is_mem { OperationData::Memory(MemData { offset: VirtualAddress::new(0) }) } else { OperationData::Offset(0) }) + } + Operation::Sigactions(ref sigactions) => { + let new = match buf { + b"empty" => Context::empty_actions(), + b"copy" => Arc::new(RwLock::new(sigactions.read().clone())), + _ => return Err(Error::new(EINVAL)), + }; + handle(Operation::Sigactions(new), OperationData::Other) } _ => return Err(Error::new(EINVAL)), }) @@ -991,6 +1001,7 @@ impl Scheme for ProcScheme { Ok(buf.len()) } Operation::Filetable { .. } => return Err(Error::new(EBADF)), + Operation::CurrentFiletable => { let filetable_fd = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); let (hopefully_this_scheme, number) = extract_scheme_number(filetable_fd)?; @@ -1014,6 +1025,13 @@ impl Scheme for ProcScheme { Ok(3 * mem::size_of::()) } + Operation::CurrentSigactions => { + let sigactions_fd = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + let (hopefully_this_scheme, number) = extract_scheme_number(sigactions_fd)?; + let sigactions = hopefully_this_scheme.as_sigactions(number)?; + self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingSigactionsChange(sigactions); + Ok(mem::size_of::()) + } _ => return Err(Error::new(EBADF)), } } @@ -1059,8 +1077,10 @@ impl Scheme for ProcScheme { Operation::Attr(Attr::Gid) => "gid", Operation::Filetable { .. } => "filetable", Operation::AddrSpace { .. } => "addrspace", + Operation::Sigactions(_) => "sigactions", Operation::CurrentAddrSpace => "current-addrspace", Operation::CurrentFiletable => "current-filetable", + Operation::CurrentSigactions => "current-sigactions", Operation::OpenViaDup => "open-via-dup", _ => return Err(Error::new(EOPNOTSUPP)), @@ -1124,6 +1144,10 @@ impl Scheme for ProcScheme { context.files = new; Ok(()) })?, + Operation::AwaitingSigactionsChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { + context.actions = new; + Ok(()) + })?, Operation::Trace => { ptrace::close_session(handle.info.pid); @@ -1171,6 +1195,13 @@ impl KernelScheme for ProcScheme { Err(Error::new(EBADF)) } } + fn as_sigactions(&self, number: usize) -> Result>>> { + if let Operation::Sigactions(ref sigactions) = self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation { + Ok(Arc::clone(sigactions)) + } else { + Err(Error::new(EBADF)) + } + } } extern "C" fn clone_handler() { let context_lock = Arc::clone(context::contexts().current().expect("expected the current context to be set in a spawn closure")); diff --git a/src/syscall/process.rs b/src/syscall/process.rs index fdfe7fda..62b2a589 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -65,11 +65,7 @@ pub fn exit(status: usize) -> ! { ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status))); { - let context_lock = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exit failed to find context"); - Arc::clone(&context_lock) - }; + let context_lock = context::current().expect("exit failed to find context"); let mut close_files; let pid = { -- GitLab From 0720db226577e8e113560fc5d8d652df201c99cb Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Thu, 21 Jul 2022 10:51:59 +0200 Subject: [PATCH 40/44] Remove SYS_EXEC debug code. --- src/syscall/debug.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 717e1c3a..53e6ac51 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -174,15 +174,6 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - "exit({})", b ), - SYS_EXEC => format!( - "exec({:#x?}, {:p}, {:p})", - validate_slice( - b as *const crate::syscall::data::ExecMemRange, - c, - ), - d as *const u8, - e as *const u8, - ), SYS_FUTEX => format!( "futex({:#X} [{:?}], {}, {}, {}, {})", b, -- GitLab From 5bbfdcda6b892a446d324e43c528cb82a0a70a40 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Sat, 23 Jul 2022 19:14:46 +0200 Subject: [PATCH 41/44] Add mmap-min-addr and support unmapping parent PTs. --- src/acpi/madt.rs | 2 +- src/arch/x86_64/device/local_apic.rs | 2 +- src/context/context.rs | 8 ++ src/context/memory.rs | 149 +++++++++++++++++++++++---- src/scheme/memory.rs | 3 +- src/scheme/proc.rs | 119 ++++++++++++--------- src/scheme/user.rs | 2 +- src/syscall/driver.rs | 2 +- src/syscall/process.rs | 52 +--------- 9 files changed, 220 insertions(+), 119 deletions(-) diff --git a/src/acpi/madt.rs b/src/acpi/madt.rs index 4991b961..1f8d8f0d 100644 --- a/src/acpi/madt.rs +++ b/src/acpi/madt.rs @@ -162,7 +162,7 @@ impl Madt { KernelMapper::lock() .get_mut() .expect("expected kernel page table not to be recursively locked while initializing MADT") - .unmap_phys(trampoline_page.start_address()) + .unmap_phys(trampoline_page.start_address(), true) .expect("failed to unmap trampoline page") }; flush.flush(); diff --git a/src/arch/x86_64/device/local_apic.rs b/src/arch/x86_64/device/local_apic.rs index 100341e6..392f3c27 100644 --- a/src/arch/x86_64/device/local_apic.rs +++ b/src/arch/x86_64/device/local_apic.rs @@ -51,7 +51,7 @@ impl LocalApic { if ! self.x2 { log::info!("Detected xAPIC at {:#x}", physaddr.data()); - if let Some((_entry, _, flush)) = mapper.unmap_phys(virtaddr) { + if let Some((_entry, _, flush)) = mapper.unmap_phys(virtaddr, true) { // Unmap xAPIC page if already mapped flush.flush(); } diff --git a/src/context/context.rs b/src/context/context.rs index b9bc2332..abb53639 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -258,6 +258,11 @@ pub struct Context { /// set since there is no interrupt stack (unless the kernel stack is copied, but that is in my /// opinion hackier and less efficient than this (and UB to do in Rust)). pub clone_entry: Option<[usize; 2]>, + /// Lowest offset for mmap invocations where the user has not already specified the offset + /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with + /// the exception that we have a memory safe kernel which doesn't have to protect itself + /// against null pointers, so fixed mmaps are still allowed. + pub mmap_min: usize, } // Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box @@ -373,6 +378,7 @@ impl Context { ptrace_stop: false, sigstack: None, clone_entry: None, + mmap_min: MMAP_MIN_DEFAULT, }; Ok(this) } @@ -566,3 +572,5 @@ impl Context { ); 128])) } } + +pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; diff --git a/src/context/memory.rs b/src/context/memory.rs index c9580027..13891d80 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -1,5 +1,5 @@ use alloc::collections::{BTreeMap, BTreeSet}; -use alloc::sync::Arc; +use alloc::{sync::Arc, vec::Vec}; use core::borrow::Borrow; use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; @@ -14,7 +14,7 @@ use rmm::Arch as _; use crate::arch::paging::PAGE_SIZE; use crate::context::file::FileDescriptor; use crate::memory::{Enomem, Frame}; -use crate::paging::mapper::{Flusher, PageFlushAll}; +use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; use crate::paging::{KernelMapper, Page, PageFlags, PageIter, PageMapper, PhysicalAddress, RmmA, round_up_pages, VirtualAddress}; pub fn page_flags(flags: MapFlags) -> PageFlags { @@ -54,6 +54,10 @@ pub struct AddrSpace { pub grants: UserGrants, } impl AddrSpace { + pub fn current() -> Result>> { + Ok(Arc::clone(super::current()?.read().addr_space()?)) + } + /// Attempt to clone an existing address space so that all mappings are copied (CoW). pub fn try_clone(&mut self) -> Result>> { let mut new = new_addrspace()?; @@ -102,6 +106,50 @@ impl AddrSpace { pub fn is_current(&self) -> bool { self.table.utable.is_current() } + pub fn mprotect(&mut self, base: Page, page_count: usize, flags: MapFlags) -> Result<()> { + let (mut active, mut inactive); + let mut flusher = if self.is_current() { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher + } else { + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher + }; + let mut mapper = &mut self.table.utable; + + let region = Region::new(base.start_address(), page_count * PAGE_SIZE); + + // TODO: Remove allocation + let regions = self.grants.conflicts(region).map(|g| *g.region()).collect::>(); + + for grant_region in regions { + let grant = self.grants.take(&grant_region).expect("grant cannot magically disappear while we hold the lock!"); + let intersection = grant_region.intersect(region); + + let (before, mut grant, after) = grant.extract(intersection).expect("failed to extract grant"); + + if let Some(before) = before { self.grants.insert(before); } + if let Some(after) = after { self.grants.insert(after); } + + if !grant.is_owned() && flags.contains(MapFlags::PROT_WRITE) && !grant.flags().has_write() { + self.grants.insert(grant); + return Err(Error::new(EACCES)); + } + + let new_flags = grant.flags() + // TODO: Require a capability in order to map executable memory? + .execute(flags.contains(MapFlags::PROT_EXEC)) + .write(flags.contains(MapFlags::PROT_WRITE)); + + // TODO: Allow enabling/disabling read access on architectures which allow it. On + // x86_64 with protection keys (although only enforced by userspace), and AArch64 (I + // think), execute-only memory is also supported. + + grant.remap(mapper, &mut flusher, new_flags); + self.grants.insert(grant); + } + Ok(()) + } } #[derive(Debug)] @@ -149,22 +197,31 @@ impl UserGrants { } /// Return a free region with the specified size // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). - pub fn find_free(&self, size: usize) -> Option { + pub fn find_free(&self, min: usize, size: usize) -> Option { // Get first available hole, but do reserve the page starting from zero as most compiled // languages cannot handle null pointers safely even if they point to valid memory. If an // application absolutely needs to map the 0th page, they will have to do so explicitly via // MAP_FIXED/MAP_FIXED_NOREPLACE. // TODO: Allow explicitly allocating guard pages? - let (hole_start, hole_size) = self.holes.iter().find(|(hole_offset, hole_size)| size <= if hole_offset.data() == 0 { hole_size.saturating_sub(PAGE_SIZE) } else { **hole_size })?; + let (hole_start, hole_size) = self.holes.iter() + .skip_while(|(hole_offset, hole_size)| hole_offset.data() + **hole_size <= min) + .find(|(hole_offset, hole_size)| { + let avail_size = if hole_offset.data() <= min && min <= hole_offset.data() + **hole_size { + **hole_size - (min - hole_offset.data()) + } else { + **hole_size + }; + size <= avail_size + })?; // Create new region - Some(Region::new(VirtualAddress::new(cmp::max(hole_start.data(), PAGE_SIZE)), size)) + Some(Region::new(VirtualAddress::new(cmp::max(hole_start.data(), min)), size)) } /// Return a free region, respecting the user's hinted address and flags. Address may be null. - pub fn find_free_at(&mut self, address: VirtualAddress, size: usize, flags: MapFlags) -> Result { + pub fn find_free_at(&mut self, min: usize, address: VirtualAddress, size: usize, flags: MapFlags) -> Result { if address == VirtualAddress::new(0) { // Free hands! - return self.find_free(size).ok_or(Error::new(ENOMEM)); + return self.find_free(min, size).ok_or(Error::new(ENOMEM)); } // The user wished to have this region... @@ -178,18 +235,18 @@ impl UserGrants { return Err(Error::new(EINVAL)); } - if let Some(grant) = self.contains(requested.start_address()) { + + if let Some(grant) = self.conflicts(requested).next() { // ... but it already exists if flags.contains(MapFlags::MAP_FIXED_NOREPLACE) { - println!("grant: {:#x} conflicts with: {:#x} - {:#x}", address.data(), grant.start_address().data(), grant.end_address().data()); return Err(Error::new(EEXIST)); - } else if flags.contains(MapFlags::MAP_FIXED) { - // TODO: Overwrite existing grant + } + if flags.contains(MapFlags::MAP_FIXED) { return Err(Error::new(EOPNOTSUPP)); } else { // TODO: Find grant close to requested address? - requested = self.find_free(requested.size()).ok_or(Error::new(ENOMEM))?; + requested = self.find_free(min, requested.size()).ok_or(Error::new(ENOMEM))?; } } @@ -241,9 +298,36 @@ impl UserGrants { holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); } } - pub fn insert(&mut self, grant: Grant) { + pub fn insert(&mut self, mut grant: Grant) { assert!(self.conflicts(*grant).next().is_none()); self.reserve(&grant); + + // FIXME: This currently causes issues, mostly caused by old code that unmaps only based on + // offsets. For instance, the scheme code does not specify any length, and would thus unmap + // memory outside of what it intended to. + + /* + let before_region = self.inner + .range(..grant.region).next_back() + .filter(|b| b.end_address() == grant.start_address() && b.can_be_merged_if_adjacent(&grant)).map(|g| g.region); + + let after_region = self.inner + .range(Region::new(grant.end_address(), 1)..).next() + .filter(|a| a.start_address() == grant.end_address() && a.can_be_merged_if_adjacent(&grant)).map(|g| g.region); + + if let Some(before) = before_region { + grant.region.start = before.start; + grant.region.size += before.size; + + core::mem::forget(self.inner.take(&before)); + } + if let Some(after) = after_region { + grant.region.size += after.size; + + core::mem::forget(self.inner.take(&after)); + } + */ + self.inner.insert(grant); } pub fn remove(&mut self, region: &Region) -> bool { @@ -493,14 +577,14 @@ impl Grant { pub fn borrow(src_base: Page, dst_base: Page, page_count: usize, flags: PageFlags, desc_opt: Option, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { Self::copy_inner(src_base, dst_base, page_count, flags, desc_opt, src_mapper, dst_mapper, (), dst_flusher, false, false) } - pub fn reborrow(src_grant: &Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { - Self::borrow(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), src_grant.desc_opt.clone(), src_mapper, dst_mapper, dst_flusher) + pub fn reborrow(src_grant: &Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { + Self::borrow(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), src_grant.desc_opt.clone(), src_mapper, dst_mapper, dst_flusher).map_err(Into::into) } - pub fn transfer(mut src_grant: Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, src_flusher: impl Flusher, dst_flusher: impl Flusher) -> Result { + pub fn transfer(mut src_grant: Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, src_flusher: impl Flusher, dst_flusher: impl Flusher) -> Result { assert!(core::mem::replace(&mut src_grant.mapped, false)); let desc_opt = src_grant.desc_opt.take(); - Self::copy_inner(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), desc_opt, src_mapper, dst_mapper, src_flusher, dst_flusher, src_grant.owned, true) + Self::copy_inner(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), desc_opt, src_mapper, dst_mapper, src_flusher, dst_flusher, src_grant.owned, true).map_err(Into::into) } fn copy_inner( @@ -521,7 +605,7 @@ impl Grant { for index in 0..page_count { let src_page = src_base.next_by(index); let (address, entry_flags) = if unmap { - let (entry, entry_flags, flush) = unsafe { src_mapper.unmap_phys(src_page.start_address()).expect("grant references unmapped memory") }; + let (entry, entry_flags, flush) = unsafe { src_mapper.unmap_phys(src_page.start_address(), true).expect("grant references unmapped memory") }; src_flusher.consume(flush); (entry, entry_flags) @@ -543,7 +627,7 @@ impl Grant { if successful_count != page_count { // TODO: The grant will be lost in case of ENOMEM. Allow putting it back in source? for index in 0..successful_count { - let (frame, _, flush) = match unsafe { dst_mapper.unmap_phys(dst_base.next_by(index).start_address()) } { + let (frame, _, flush) = match unsafe { dst_mapper.unmap_phys(dst_base.next_by(index).start_address(), true) } { Some(f) => f, None => unreachable!("grant unmapped by someone else in the meantime despite having a &mut PageMapper"), }; @@ -572,11 +656,24 @@ impl Grant { self.flags } + pub fn remap(&mut self, mapper: &mut PageMapper, mut flusher: impl Flusher, flags: PageFlags) { + assert!(self.mapped); + + for page in self.pages() { + unsafe { + let result = mapper.remap(page.start_address(), flags).expect("grant contained unmap address"); + flusher.consume(result); + } + } + + self.flags = flags; + } + pub fn unmap(mut self, mapper: &mut PageMapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); for page in self.pages() { - let (entry, _, flush) = unsafe { mapper.unmap_phys(page.start_address()) } + let (entry, _, flush) = unsafe { mapper.unmap_phys(page.start_address(), true) } .unwrap_or_else(|| panic!("missing page at {:#0x} for grant {:?}", page.start_address().data(), self)); if self.owned { @@ -643,6 +740,18 @@ impl Grant { Some((before_grant, self, after_grant)) } + // FIXME + /* + pub fn can_be_merged_if_adjacent(&self, with: &Self) -> bool { + match (&self.desc_opt, &with.desc_opt) { + (None, None) => (), + (Some(ref a), Some(ref b)) if Arc::ptr_eq(&a.desc.description, &b.desc.description) => (), + + _ => return false, + } + self.owned == with.owned && self.mapped == with.mapped && self.flags.data() == with.flags.data() + } + */ } impl Deref for Grant { diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 8bcc8cf8..2668f078 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -4,6 +4,7 @@ use crate::memory::{free_frames, used_frames, PAGE_SIZE}; use crate::paging::{mapper::PageFlushAll, Page, VirtualAddress}; use crate::syscall::data::{Map, StatVfs}; use crate::syscall::error::*; +use crate::syscall::flag::MapFlags; use crate::syscall::scheme::Scheme; pub struct MemoryScheme; @@ -25,7 +26,7 @@ impl MemoryScheme { let mut addr_space = context.addr_space()?.write(); let addr_space = &mut *addr_space; - let region = addr_space.grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); + let region = addr_space.grants.find_free_at(context.mmap_min, VirtualAddress::new(map.address), map.size, map.flags)?.round(); addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut addr_space.table.utable, PageFlushAll::new())?); diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 8cc67422..cb33744f 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -145,6 +145,8 @@ enum Operation { Sigactions(Arc>>), CurrentSigactions, AwaitingSigactionsChange(Arc>>), + + MmapMinAddr, } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -281,15 +283,11 @@ impl ProcScheme { } } -fn current_addrspace() -> Result>> { - Ok(Arc::clone(context::current()?.read().addr_space()?)) -} - impl ProcScheme { fn open_inner(&self, pid: ContextId, operation_str: Option<&str>, flags: usize, uid: u32, gid: u32) -> Result { let operation = match operation_str { - Some("mem") => Operation::Memory { addrspace: current_addrspace()? }, - Some("addrspace") => Operation::AddrSpace { addrspace: current_addrspace()? }, + Some("mem") => Operation::Memory { addrspace: AddrSpace::current()? }, + Some("addrspace") => Operation::AddrSpace { addrspace: AddrSpace::current()? }, Some("filetable") => Operation::Filetable { filetable: Arc::clone(&context::current()?.read().files) }, Some("current-addrspace") => Operation::CurrentAddrSpace, Some("current-filetable") => Operation::CurrentFiletable, @@ -306,6 +304,7 @@ impl ProcScheme { Some("open_via_dup") => Operation::OpenViaDup, Some("sigactions") => Operation::Sigactions(Arc::clone(&context::current()?.read().actions)), Some("current-sigactions") => Operation::CurrentSigactions, + Some("mmap-min-addr") => Operation::MmapMinAddr, _ => return Err(Error::new(EINVAL)) }; @@ -695,6 +694,11 @@ impl Scheme for ProcScheme { read_from(buf, &data.buf, &mut data.offset) } + Operation::MmapMinAddr => { + let val = with_context(info.pid, |context| Ok(context.mmap_min))?; + *buf.array_chunks_mut::<{mem::size_of::()}>().next().unwrap() = usize::to_ne_bytes(val); + Ok(mem::size_of::()) + } // TODO: Replace write() with SYS_DUP_FORWARD. // TODO: Find a better way to switch address spaces, since they also require switching // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it @@ -798,7 +802,7 @@ impl Scheme for ProcScheme { // Forbid transferring grants to the same address space! if is_active { return Err(Error::new(EBUSY)); } - let current_addrspace = current_addrspace()?; + let current_addrspace = AddrSpace::current()?; let mut current_addrspace = current_addrspace.write(); let current_addrspace = &mut *current_addrspace; let src_grant = current_addrspace.grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; @@ -1032,6 +1036,11 @@ impl Scheme for ProcScheme { self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingSigactionsChange(sigactions); Ok(mem::size_of::()) } + Operation::MmapMinAddr => { + let val = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + with_context_mut(info.pid, |context| { context.mmap_min = val; Ok(()) })?; + Ok(mem::size_of::()) + } _ => return Err(Error::new(EBADF)), } } @@ -1082,6 +1091,7 @@ impl Scheme for ProcScheme { Operation::CurrentFiletable => "current-filetable", Operation::CurrentSigactions => "current-sigactions", Operation::OpenViaDup => "open-via-dup", + Operation::MmapMinAddr => "mmap-min-addr", _ => return Err(Error::new(EOPNOTSUPP)), }); @@ -1117,29 +1127,32 @@ impl Scheme for ProcScheme { let stop_context = if handle.info.pid == context::context_id() { with_context_mut } else { try_stop_context }; match handle.info.operation { - Operation::AwaitingAddrSpaceChange { new, new_sp, new_ip } => stop_context(handle.info.pid, |context: &mut Context| unsafe { - if let Some(saved_regs) = ptrace::regs_for_mut(context) { - saved_regs.iret.rip = new_ip; - saved_regs.iret.rsp = new_sp; - } else { - context.clone_entry = Some([new_ip, new_sp]); - } + Operation::AwaitingAddrSpaceChange { new, new_sp, new_ip } => { + stop_context(handle.info.pid, |context: &mut Context| unsafe { + if let Some(saved_regs) = ptrace::regs_for_mut(context) { + saved_regs.iret.rip = new_ip; + saved_regs.iret.rsp = new_sp; + } else { + context.clone_entry = Some([new_ip, new_sp]); + } - let prev_addr_space = context.set_addr_space(new); + let prev_addr_space = context.set_addr_space(new); - if let Some(mut prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { - // We are the last reference to the address space; therefore it must be - // unmapped. + if let Some(mut prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { + // We are the last reference to the address space; therefore it must be + // unmapped. - // TODO: Optimize away clearing of page tables? In that case, what about memory - // deallocation? - for grant in prev.grants.into_iter() { - grant.unmap(&mut prev.table.utable, ()); + // TODO: Optimize away clearing of page tables? In that case, what about memory + // deallocation? + for grant in prev.grants.into_iter() { + grant.unmap(&mut prev.table.utable, ()); + } } - } - Ok(()) - })?, + Ok(()) + })?; + let _ = ptrace::send_event(crate::syscall::ptrace_event!(PTRACE_EVENT_ADDRSPACE_SWITCH, 0)); + } Operation::AwaitingFiletableChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { context.files = new; Ok(()) @@ -1216,27 +1229,41 @@ extern "C" fn clone_handler() { } fn inherit_context() -> Result { - let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); - let new_context_lock = Arc::clone(context::contexts_mut().spawn(clone_handler)?); - - let current_context = current_context_lock.read(); - let mut new_context = new_context_lock.write(); - - new_context.status = Status::Stopped(SIGSTOP); - new_context.euid = current_context.euid; - new_context.egid = current_context.egid; - new_context.ruid = current_context.ruid; - new_context.rgid = current_context.rgid; - new_context.ens = current_context.ens; - new_context.rns = current_context.rns; - new_context.ppid = current_context.id; - new_context.pgid = current_context.pgid; - new_context.umask = current_context.umask; - new_context.sigmask = current_context.sigmask; - - // TODO: More to copy? - - Ok(new_context.id) + let new_id = { + let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); + let new_context_lock = Arc::clone(context::contexts_mut().spawn(clone_handler)?); + + let current_context = current_context_lock.read(); + let mut new_context = new_context_lock.write(); + + new_context.status = Status::Stopped(SIGSTOP); + new_context.euid = current_context.euid; + new_context.egid = current_context.egid; + new_context.ruid = current_context.ruid; + new_context.rgid = current_context.rgid; + new_context.ens = current_context.ens; + new_context.rns = current_context.rns; + new_context.ppid = current_context.id; + new_context.pgid = current_context.pgid; + new_context.umask = current_context.umask; + new_context.sigmask = current_context.sigmask; + new_context.cpu_id = current_context.cpu_id; + + // TODO: More to copy? + + new_context.id + }; + + if ptrace::send_event(crate::syscall::ptrace_event!(PTRACE_EVENT_CLONE, new_id.into())).is_some() { + // Freeze the clone, allow ptrace to put breakpoints + // to it before it starts + let contexts = context::contexts(); + let context = contexts.get(new_id).expect("Newly created context doesn't exist??"); + let mut context = context.write(); + context.ptrace_stop = true; + } + + Ok(new_id) } fn extract_scheme_number(fd: usize) -> Result<(Arc, usize)> { let (scheme_id, number) = match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read().get_file(FileHandle::from(fd)).ok_or(Error::new(EBADF))?.description.read() { diff --git a/src/scheme/user.rs b/src/scheme/user.rs index e55c9037..115d6bda 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -153,7 +153,7 @@ impl UserInner { let dst_address = round_down_pages(dst_address); let offset = address - src_address; let aligned_size = round_up_pages(offset + size); - let dst_region = addr_space.grants.find_free_at(VirtualAddress::new(dst_address), aligned_size, flags)?; + let dst_region = addr_space.grants.find_free_at(context.mmap_min, VirtualAddress::new(dst_address), aligned_size, flags)?; let current_addrspace = Arc::clone( context::contexts().current().ok_or(Error::new(ESRCH))? diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 325e413a..8a8d2940 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -94,7 +94,7 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) let mut addr_space = context.addr_space()?.write(); let addr_space = &mut *addr_space; - let dst_address = addr_space.grants.find_free(size).ok_or(Error::new(ENOMEM))?; + let dst_address = addr_space.grants.find_free(context.mmap_min, size).ok_or(Error::new(ENOMEM))?; let mut page_flags = PageFlags::new().user(true); if flags.contains(PHYSMAP_WRITE) { diff --git a/src/syscall/process.rs b/src/syscall/process.rs index 62b2a589..523d03b6 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -6,7 +6,7 @@ use core::mem; use spin::{RwLock, RwLockWriteGuard}; -use crate::context::{Context, ContextId, WaitpidKey}; +use crate::context::{Context, ContextId, memory::AddrSpace, WaitpidKey}; use crate::Bootstrap; use crate::context; @@ -289,54 +289,10 @@ pub fn kill(pid: ContextId, sig: usize) -> Result { pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result { // println!("mprotect {:#X}, {}, {:#X}", address, size, flags); - let end_address = address.checked_add(size).ok_or(Error::new(EFAULT))?; + if address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 { return Err(Error::new(EINVAL)); } + if address.saturating_add(size) > crate::USER_END_OFFSET { return Err(Error::new(EFAULT)); } - let address_space = Arc::clone(context::current()?.read().addr_space()?); - let mut address_space = address_space.write(); - - let mut flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(VirtualAddress::new(address)); - let end_page = Page::containing_address(VirtualAddress::new(end_address)); - for page in Page::range_exclusive(start_page, end_page) { - // Check if the page is actually mapped before trying to change the flags. - // FIXME can other processes change if a page is mapped beneath our feet? - let mut page_flags = if let Some((_, flags)) = address_space.table.utable.translate(page.start_address()) { - flags - } else { - flush_all.flush(); - return Err(Error::new(EFAULT)); - }; - if !page_flags.has_present() { - flush_all.flush(); - return Err(Error::new(EFAULT)); - } - - if flags.contains(PROT_EXEC) { - page_flags = page_flags.execute(true); - } else { - page_flags = page_flags.execute(false); - } - - if flags.contains(PROT_WRITE) { - //TODO: Not allowing gain of write privileges - } else { - page_flags = page_flags.write(false); - } - - if flags.contains(PROT_READ) { - //TODO: No flags for readable pages - } else { - //TODO: No flags for readable pages - } - - let flush = unsafe { address_space.table.utable.remap(page.start_address(), page_flags).expect("failed to remap page in mprotect") }; - flush_all.consume(flush); - } - - flush_all.flush(); - - Ok(0) + AddrSpace::current()?.write().mprotect(Page::containing_address(VirtualAddress::new(address)), size / PAGE_SIZE, flags).map(|()| 0) } pub fn setpgid(pid: ContextId, pgid: ContextId) -> Result { -- GitLab From 939feacdc56e1e0c701d628a96223f9db9da4cc1 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Tue, 26 Jul 2022 20:57:33 +0200 Subject: [PATCH 42/44] Add fn for validating user memory range bounds. --- src/syscall/validate.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/syscall/validate.rs b/src/syscall/validate.rs index f4a05543..51f7b5ef 100644 --- a/src/syscall/validate.rs +++ b/src/syscall/validate.rs @@ -5,6 +5,7 @@ use core::{mem, slice, str}; use crate::context; +use crate::memory::PAGE_SIZE; use crate::paging::{Page, TableKind, VirtualAddress}; use crate::syscall::error::*; @@ -108,3 +109,13 @@ pub fn validate_str(ptr: *const u8, len: usize) -> Result<&'static str> { let slice = validate_slice(ptr, len)?; str::from_utf8(slice).map_err(|_| Error::new(EINVAL)) } + +pub fn validate_region(address: usize, size: usize) -> Result<(Page, usize)> { + if address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || size == 0 { + return Err(Error::new(EINVAL)); + } + if address.saturating_add(size) > crate::USER_END_OFFSET { + return Err(Error::new(EFAULT)); + } + Ok((Page::containing_address(VirtualAddress::new(address)), size / PAGE_SIZE)) +} -- GitLab From 99362f98d2a251737e3bc67eb74b2a76b06422d3 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Tue, 26 Jul 2022 21:01:53 +0200 Subject: [PATCH 43/44] Add a better interface for modifying addrspaces. --- src/context/context.rs | 8 -- src/context/memory.rs | 84 ++++++++++++++- src/lib.rs | 2 + src/ptrace.rs | 12 +-- src/scheme/memory.rs | 37 +++---- src/scheme/mod.rs | 4 + src/scheme/proc.rs | 235 ++++++++++++++++++++++------------------- src/scheme/user.rs | 43 +++----- src/syscall/driver.rs | 61 +++++------ src/syscall/fs.rs | 64 +---------- src/syscall/mod.rs | 4 +- 11 files changed, 286 insertions(+), 268 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index abb53639..b9bc2332 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -258,11 +258,6 @@ pub struct Context { /// set since there is no interrupt stack (unless the kernel stack is copied, but that is in my /// opinion hackier and less efficient than this (and UB to do in Rust)). pub clone_entry: Option<[usize; 2]>, - /// Lowest offset for mmap invocations where the user has not already specified the offset - /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with - /// the exception that we have a memory safe kernel which doesn't have to protect itself - /// against null pointers, so fixed mmaps are still allowed. - pub mmap_min: usize, } // Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box @@ -378,7 +373,6 @@ impl Context { ptrace_stop: false, sigstack: None, clone_entry: None, - mmap_min: MMAP_MIN_DEFAULT, }; Ok(this) } @@ -572,5 +566,3 @@ impl Context { ); 128])) } } - -pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; diff --git a/src/context/memory.rs b/src/context/memory.rs index 13891d80..61f695e9 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -4,7 +4,7 @@ use core::borrow::Borrow; use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; use core::ops::Deref; -use spin::RwLock; +use spin::{RwLock, RwLockWriteGuard}; use syscall::{ flag::MapFlags, error::*, @@ -17,6 +17,8 @@ use crate::memory::{Enomem, Frame}; use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; use crate::paging::{KernelMapper, Page, PageFlags, PageIter, PageMapper, PhysicalAddress, RmmA, round_up_pages, VirtualAddress}; +pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; + pub fn page_flags(flags: MapFlags) -> PageFlags { PageFlags::new() .user(true) @@ -52,6 +54,11 @@ pub fn new_addrspace() -> Result>> { pub struct AddrSpace { pub table: Table, pub grants: UserGrants, + /// Lowest offset for mmap invocations where the user has not already specified the offset + /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with + /// the exception that we have a memory safe kernel which doesn't have to protect itself + /// against null pointers, so fixed mmaps to address zero are still allowed. + pub mmap_min: usize, } impl AddrSpace { pub fn current() -> Result>> { @@ -101,6 +108,7 @@ impl AddrSpace { Ok(Self { grants: UserGrants::new(), table: setup_new_utable()?, + mmap_min: MMAP_MIN_DEFAULT, }) } pub fn is_current(&self) -> bool { @@ -131,7 +139,7 @@ impl AddrSpace { if let Some(before) = before { self.grants.insert(before); } if let Some(after) = after { self.grants.insert(after); } - if !grant.is_owned() && flags.contains(MapFlags::PROT_WRITE) && !grant.flags().has_write() { + if !grant.can_have_flags(flags) { self.grants.insert(grant); return Err(Error::new(EACCES)); } @@ -150,6 +158,75 @@ impl AddrSpace { } Ok(()) } + pub fn munmap(mut self: RwLockWriteGuard<'_, Self>, page: Page, page_count: usize) { + let mut notify_files = Vec::new(); + + let requested = Region::new(page.start_address(), page_count * PAGE_SIZE); + let mut flusher = PageFlushAll::new(); + + let conflicting: Vec = self.grants.conflicts(requested).map(Region::from).collect(); + + for conflict in conflicting { + let grant = self.grants.take(&conflict).expect("conflicting region didn't exist"); + let intersection = grant.intersect(requested); + let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); + + // Notify scheme that holds grant + if let Some(file_desc) = grant.desc_opt.take() { + notify_files.push((file_desc, intersection)); + } + + // Keep untouched regions + if let Some(before) = before { + self.grants.insert(before); + } + if let Some(after) = after { + self.grants.insert(after); + } + + // Remove irrelevant region + grant.unmap(&mut self.table.utable, &mut flusher); + } + drop(self); + + for (file_ref, intersection) in notify_files { + let scheme_id = { file_ref.desc.description.read().scheme }; + + let scheme = match crate::scheme::schemes().get(scheme_id) { + Some(scheme) => Arc::clone(scheme), + // One could argue that EBADFD could be returned here, but we have already unmapped + // the memory. + None => continue, + }; + // Same here, we don't really care about errors when schemes respond to unmap events. + // The caller wants the memory to be unmapped, period. When already unmapped, what + // would we do with error codes anyway? + let _ = scheme.funmap(intersection.start_address().data(), intersection.size()); + + let _ = file_ref.desc.close(); + } + } + pub fn mmap(&mut self, page: Option, page_count: usize, flags: MapFlags, map: impl FnOnce(Page, PageFlags, &mut PageMapper, &mut dyn Flusher) -> Result) -> Result { + // Finally, the end of all "T0DO: Abstract with other grant creation"! + + let region = match page { + Some(page) => self.grants.find_free_at(self.mmap_min, page.start_address(), page_count * PAGE_SIZE, flags)?, + None => self.grants.find_free(self.mmap_min, page_count * PAGE_SIZE).ok_or(Error::new(ENOMEM))?, + }; + let page = Page::containing_address(region.start_address()); + + let (mut active, mut inactive); + let flusher = if self.is_current() { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher + } else { + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher + }; + + self.grants.insert(map(page, page_flags(flags), &mut self.table.utable, flusher)?); + Ok(page) + } } #[derive(Debug)] @@ -668,6 +745,9 @@ impl Grant { self.flags = flags; } + pub fn can_have_flags(&self, flags: MapFlags) -> bool { + self.owned || ((self.flags.has_write() || !flags.contains(MapFlags::PROT_WRITE)) && (self.flags.has_execute() || !flags.contains(MapFlags::PROT_EXEC))) + } pub fn unmap(mut self, mapper: &mut PageMapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); diff --git a/src/lib.rs b/src/lib.rs index f4f4c476..18646e04 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,9 +43,11 @@ #![deny(unused_must_use)] #![feature(allocator_api)] +#![feature(arbitrary_self_types)] #![feature(array_chunks)] #![feature(asm_const, asm_sym)] // TODO: Relax requirements of most asm invocations #![cfg_attr(target_arch = "aarch64", feature(llvm_asm))] // TODO: Rewrite using asm! +#![feature(bool_to_option)] #![feature(concat_idents)] #![feature(const_btree_new)] #![feature(const_ptr_offset_from)] diff --git a/src/ptrace.rs b/src/ptrace.rs index f1f01189..81d06115 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -465,7 +465,10 @@ fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator impl Iterator> + '_ { +pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: usize) -> impl Iterator> + '_ { + let end = core::cmp::min(offset.data().saturating_add(len), crate::USER_END_OFFSET); + let len = end - offset.data(); + // TODO: Iterate over grants instead to avoid yielding None too many times. What if // context_memory is used for an entire process's address space, where the stack is at the very // end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then @@ -474,12 +477,9 @@ pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: us // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the // possible exception of an unaligned head/tail. - //log::info!("ADDR {:p} LEN {:#0x}", page as *const u8, len); - - // FIXME: verify flags before giving out slice - let (address, _flags) = addrspace.table.utable.translate(VirtualAddress::new(addr))?; + let (address, flags) = addrspace.table.utable.translate(VirtualAddress::new(addr))?; let start = RmmA::phys_to_virt(address).data() + addr % crate::memory::PAGE_SIZE; - Some(core::ptr::slice_from_raw_parts_mut(start as *mut u8, len)) + Some((core::ptr::slice_from_raw_parts_mut(start as *mut u8, len), flags.has_write())) }) } diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 2668f078..603ece51 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,7 +1,11 @@ +use alloc::sync::Arc; +use spin::RwLock; + use crate::context; -use crate::context::memory::{page_flags, Grant}; +use crate::context::memory::{AddrSpace, page_flags, Grant}; use crate::memory::{free_frames, used_frames, PAGE_SIZE}; use crate::paging::{mapper::PageFlushAll, Page, VirtualAddress}; + use crate::syscall::data::{Map, StatVfs}; use crate::syscall::error::*; use crate::syscall::flag::MapFlags; @@ -14,23 +18,16 @@ impl MemoryScheme { MemoryScheme } - pub fn fmap_anonymous(map: &Map) -> Result { - //TODO: Abstract with other grant creation - if map.size == 0 { - return Ok(0); - } - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; + pub fn fmap_anonymous(addr_space: &Arc>, map: &Map) -> Result { + let (requested_page, page_count) = crate::syscall::validate::validate_region(map.address, map.size)?; - let region = addr_space.grants.find_free_at(context.mmap_min, VirtualAddress::new(map.address), map.size, map.flags)?.round(); + let page = addr_space + .write() + .mmap((map.address != 0).then_some(requested_page), page_count, map.flags, |page, flags, mapper, flusher| { + Ok(Grant::zeroed(page, page_count, flags, mapper, flusher)?) + })?; - addr_space.grants.insert(Grant::zeroed(Page::containing_address(region.start_address()), map.size / PAGE_SIZE, page_flags(map.flags), &mut addr_space.table.utable, PageFlushAll::new())?); - - Ok(region.start_address().data()) + Ok(page.start_address().data()) } } impl Scheme for MemoryScheme { @@ -51,7 +48,7 @@ impl Scheme for MemoryScheme { } fn fmap(&self, _id: usize, map: &Map) -> Result { - Self::fmap_anonymous(map) + Self::fmap_anonymous(&Arc::clone(context::current()?.read().addr_space()?), map) } fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { @@ -72,4 +69,8 @@ impl Scheme for MemoryScheme { Ok(0) } } -impl crate::scheme::KernelScheme for MemoryScheme {} +impl crate::scheme::KernelScheme for MemoryScheme { + fn kfmap(&self, _number: usize, addr_space: &Arc>, map: &Map, _consume: bool) -> Result { + Self::fmap_anonymous(addr_space, map) + } +} diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index d5e1d5df..2e7df394 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -306,4 +306,8 @@ pub trait KernelScheme: Scheme + Send + Sync + 'static { fn as_sigactions(&self, number: usize) -> Result>>> { Err(Error::new(EBADF)) } + + fn kfmap(&self, number: usize, addr_space: &Arc>, map: &crate::syscall::data::Map, consume: bool) -> Result { + Err(Error::new(EOPNOTSUPP)) + } } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index cb33744f..23d7f9c6 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -146,7 +146,7 @@ enum Operation { CurrentSigactions, AwaitingSigactionsChange(Arc>>), - MmapMinAddr, + MmapMinAddr(Arc>), } #[derive(Clone, Copy, PartialEq, Eq)] enum Attr { @@ -283,12 +283,16 @@ impl ProcScheme { } } +fn get_context(id: ContextId) -> Result>> { + context::contexts().get(id).ok_or(Error::new(ENOENT)).map(Arc::clone) +} + impl ProcScheme { fn open_inner(&self, pid: ContextId, operation_str: Option<&str>, flags: usize, uid: u32, gid: u32) -> Result { let operation = match operation_str { - Some("mem") => Operation::Memory { addrspace: AddrSpace::current()? }, - Some("addrspace") => Operation::AddrSpace { addrspace: AddrSpace::current()? }, - Some("filetable") => Operation::Filetable { filetable: Arc::clone(&context::current()?.read().files) }, + Some("mem") => Operation::Memory { addrspace: Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?) }, + Some("addrspace") => Operation::AddrSpace { addrspace: Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?) }, + Some("filetable") => Operation::Filetable { filetable: Arc::clone(&get_context(pid)?.read().files) }, Some("current-addrspace") => Operation::CurrentAddrSpace, Some("current-filetable") => Operation::CurrentFiletable, Some("regs/float") => Operation::Regs(RegsKind::Float), @@ -302,9 +306,9 @@ impl ProcScheme { Some("uid") => Operation::Attr(Attr::Uid), Some("gid") => Operation::Attr(Attr::Gid), Some("open_via_dup") => Operation::OpenViaDup, - Some("sigactions") => Operation::Sigactions(Arc::clone(&context::current()?.read().actions)), + Some("sigactions") => Operation::Sigactions(Arc::clone(&get_context(pid)?.read().actions)), Some("current-sigactions") => Operation::CurrentSigactions, - Some("mmap-min-addr") => Operation::MmapMinAddr, + Some("mmap-min-addr") => Operation::MmapMinAddr(Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?)), _ => return Err(Error::new(EINVAL)) }; @@ -459,6 +463,7 @@ impl Scheme for ProcScheme { b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()? }, false), b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.write().try_clone()? }, false), b"mem" => (Operation::Memory { addrspace: Arc::clone(&addrspace) }, true), + b"mmap-min-addr" => (Operation::MmapMinAddr(Arc::clone(&addrspace)), false), grant_handle if grant_handle.starts_with(b"grant-") => { let start_addr = usize::from_str_radix(core::str::from_utf8(&grant_handle[6..]).map_err(|_| Error::new(EINVAL))?, 16).map_err(|_| Error::new(EINVAL))?; @@ -529,7 +534,7 @@ impl Scheme for ProcScheme { let mut bytes_read = 0; for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { - let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let (chunk, _writable) = chunk_opt.ok_or(Error::new(EFAULT))?; let dst_slice = &mut buf[bytes_read..bytes_read + chunk.len()]; unsafe { chunk.as_mut_ptr().copy_to_nonoverlapping(dst_slice.as_mut_ptr(), dst_slice.len()); @@ -694,8 +699,8 @@ impl Scheme for ProcScheme { read_from(buf, &data.buf, &mut data.offset) } - Operation::MmapMinAddr => { - let val = with_context(info.pid, |context| Ok(context.mmap_min))?; + Operation::MmapMinAddr(ref addrspace) => { + let val = addrspace.read().mmap_min; *buf.array_chunks_mut::<{mem::size_of::()}>().next().unwrap() = usize::to_ne_bytes(val); Ok(mem::size_of::()) } @@ -734,7 +739,10 @@ impl Scheme for ProcScheme { let mut bytes_written = 0; for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { - let chunk = chunk_opt.ok_or(Error::new(EFAULT))?; + let (chunk, writable) = chunk_opt.ok_or(Error::new(EFAULT))?; + + if !writable { return Err(Error::new(EACCES)); } + let src_slice = &buf[bytes_written..bytes_written + chunk.len()]; unsafe { chunk.as_mut_ptr().copy_from_nonoverlapping(src_slice.as_ptr(), src_slice.len()); @@ -742,94 +750,42 @@ impl Scheme for ProcScheme { bytes_written += chunk.len(); } - data.offset = VirtualAddress::new(data.offset.data() + bytes_written); + data.offset = data.offset.add(bytes_written); Ok(bytes_written) }, Operation::AddrSpace { addrspace } => { - // FIXME: Forbid upgrading external mappings. - let mut chunks = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); - // Update grant mappings, like mprotect but allowed to target other contexts. - let base = chunks.next().ok_or(Error::new(EINVAL))?; - let size = chunks.next().ok_or(Error::new(EINVAL))?; - let flags = chunks.next().and_then(|f| MapFlags::from_bits(f)).ok_or(Error::new(EINVAL))?; - let src_address = chunks.next(); - - if base % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || base.saturating_add(size) > crate::USER_END_OFFSET { - return Err(Error::new(EINVAL)); - } + let mut next = || chunks.next().ok_or(Error::new(EINVAL)); - let mut addrspace = addrspace.write(); - let addrspace = &mut *addrspace; - let is_active = addrspace.is_current(); + match next()? { + op @ ADDRSPACE_OP_MMAP | op @ ADDRSPACE_OP_TRANSFER => { + let fd = next()?; + let offset = next()?; + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + let flags = MapFlags::from_bits(next()?).ok_or(Error::new(EINVAL))?; - let (mut inactive, mut active); - - let mut flusher = if is_active { - active = PageFlushAll::new(); - &mut active as &mut dyn Flusher - } else { - inactive = InactiveFlusher::new(); - &mut inactive as &mut dyn Flusher - }; + if !flags.contains(MapFlags::MAP_FIXED) { + return Err(Error::new(EOPNOTSUPP)); + } - let region = Region::new(VirtualAddress::new(base), size); - let conflicting = addrspace.grants.conflicts(region).map(|g| *g.region()).collect::>(); - for conflicting_region in conflicting { - let whole_grant = addrspace.grants.take(&conflicting_region).ok_or(Error::new(EBADFD))?; - let (before_opt, current, after_opt) = whole_grant.extract(region.intersect(conflicting_region)).ok_or(Error::new(EBADFD))?; + let (scheme, number) = extract_scheme_number(fd)?; - if let Some(before) = before_opt { - addrspace.grants.insert(before); + return scheme.kfmap(number, &addrspace, &Map { offset, size: page_count * PAGE_SIZE, address: page.start_address().data(), flags }, op == ADDRSPACE_OP_TRANSFER); } - if let Some(after) = after_opt { - addrspace.grants.insert(after); - } - - let res = current.unmap(&mut addrspace.table.utable, &mut flusher); + ADDRSPACE_OP_MUNMAP => { + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; - if res.file_desc.is_some() { - // We prefer avoiding file operations from within the kernel. If userspace - // updates grants that overlap, it might as well enumerate grants and call - // partial funmap on its own. - return Err(Error::new(EBUSY)); + addrspace.write().munmap(page, page_count); } - } - - let base_page = Page::containing_address(VirtualAddress::new(base)); + ADDRSPACE_OP_MPROTECT => { + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + let flags = MapFlags::from_bits(next()?).ok_or(Error::new(EINVAL))?; - if let Some(src_address) = src_address { - // Forbid transferring grants to the same address space! - if is_active { return Err(Error::new(EBUSY)); } - - let current_addrspace = AddrSpace::current()?; - let mut current_addrspace = current_addrspace.write(); - let current_addrspace = &mut *current_addrspace; - let src_grant = current_addrspace.grants.take(&Region::new(VirtualAddress::new(src_address), size)).ok_or(Error::new(EINVAL))?; - - if src_address % PAGE_SIZE != 0 || src_address.saturating_add(size) > crate::USER_END_OFFSET { - return Err(Error::new(EINVAL)); + addrspace.write().mprotect(page, page_count, flags)?; } - - // TODO: Allow downgrading flags? - - addrspace.grants.insert(Grant::transfer( - src_grant, - base_page, - &mut current_addrspace.table.utable, - &mut addrspace.table.utable, - PageFlushAll::new(), - flusher, - )?); - } else if flags.intersects(MapFlags::PROT_READ | MapFlags::PROT_EXEC | MapFlags::PROT_WRITE) { - addrspace.grants.insert(Grant::zeroed(base_page, size / PAGE_SIZE, page_flags(flags), &mut addrspace.table.utable, flusher)?); + _ => return Err(Error::new(EINVAL)), } - - // TODO: Set some "in use" flag every time an address space is switched to? This - // way, we know what hardware threads are using any given page table, which we need - // to know while doing TLB shootdown. - - Ok((3 + usize::from(src_address.is_some())) * mem::size_of::()) + Ok(0) } Operation::Regs(kind) => match kind { RegsKind::Float => { @@ -1036,9 +992,10 @@ impl Scheme for ProcScheme { self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingSigactionsChange(sigactions); Ok(mem::size_of::()) } - Operation::MmapMinAddr => { + Operation::MmapMinAddr(ref addrspace) => { let val = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); - with_context_mut(info.pid, |context| { context.mmap_min = val; Ok(()) })?; + if val % PAGE_SIZE != 0 || val > crate::USER_END_OFFSET { return Err(Error::new(EINVAL)); } + addrspace.write().mmap_min = val; Ok(mem::size_of::()) } _ => return Err(Error::new(EBADF)), @@ -1091,7 +1048,7 @@ impl Scheme for ProcScheme { Operation::CurrentFiletable => "current-filetable", Operation::CurrentSigactions => "current-sigactions", Operation::OpenViaDup => "open-via-dup", - Operation::MmapMinAddr => "mmap-min-addr", + Operation::MmapMinAddr(_) => "mmap-min-addr", _ => return Err(Error::new(EOPNOTSUPP)), }); @@ -1138,21 +1095,16 @@ impl Scheme for ProcScheme { let prev_addr_space = context.set_addr_space(new); - if let Some(mut prev) = prev_addr_space.and_then(|a| Arc::try_unwrap(a).ok()).map(RwLock::into_inner) { - // We are the last reference to the address space; therefore it must be - // unmapped. - - // TODO: Optimize away clearing of page tables? In that case, what about memory - // deallocation? - for grant in prev.grants.into_iter() { - grant.unmap(&mut prev.table.utable, ()); - } + if let Some(prev_addr_space) = prev_addr_space { + maybe_cleanup_addr_space(prev_addr_space); } Ok(()) })?; let _ = ptrace::send_event(crate::syscall::ptrace_event!(PTRACE_EVENT_ADDRSPACE_SWITCH, 0)); } + Operation::AddrSpace { addrspace } | Operation::Memory { addrspace } | Operation::MmapMinAddr(addrspace) => maybe_cleanup_addr_space(addrspace), + Operation::AwaitingFiletableChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { context.files = new; Ok(()) @@ -1178,19 +1130,8 @@ impl Scheme for ProcScheme { } Ok(0) } - // TODO: Support borrowing someone else's memory. fn fmap(&self, id: usize, map: &Map) -> Result { - let description_lock = match self.handles.read().get(&id) { - Some(Handle { info: Info { operation: Operation::GrantHandle { ref description }, .. }, .. }) => Arc::clone(description), - _ => return Err(Error::new(EBADF)), - }; - let (scheme_id, number) = { - let description = description_lock.read(); - - (description.scheme, description.number) - }; - let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(EBADFD))?); - scheme.fmap(number, map) + self.kfmap(id, &AddrSpace::current()?, map, false) } } impl KernelScheme for ProcScheme { @@ -1215,6 +1156,71 @@ impl KernelScheme for ProcScheme { Err(Error::new(EBADF)) } } + fn kfmap(&self, id: usize, dst_addr_space: &Arc>, map: &crate::syscall::data::Map, consume: bool) -> Result { + let info = self.handles.read().get(&id).ok_or(Error::new(EBADF))?.info.clone(); + + match info.operation { + Operation::GrantHandle { ref description } => { + let (scheme_id, number) = { + let description = description.read(); + + (description.scheme, description.number) + }; + let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(EBADFD))?); + scheme.fmap(number, map) + } + Operation::AddrSpace { ref addrspace } => { + if Arc::ptr_eq(addrspace, dst_addr_space) { + return Err(Error::new(EBUSY)); + } + // Limit to transferring/borrowing at most one grant, or part of a grant (splitting + // will be mandatory if grants are coalesced). + + let (requested_dst_page, page_count) = crate::syscall::validate_region(map.address, map.size)?; + let (src_page, _) = crate::syscall::validate_region(map.offset, map.size)?; + + let requested_dst_page = (map.address != 0).then_some(requested_dst_page); + + let mut src_addr_space = addrspace.write(); + let src_addr_space = &mut *src_addr_space; + let mut dst_addr_space = dst_addr_space.write(); + + let src_grant_region = { + let src_region = Region::new(src_page.start_address(), page_count * PAGE_SIZE); + let mut conflicts = src_addr_space.grants.conflicts(src_region); + let first = conflicts.next().ok_or(Error::new(EINVAL))?; + if conflicts.next().is_some() { + return Err(Error::new(EINVAL)); + } + + if !first.can_have_flags(map.flags) { + return Err(Error::new(EACCES)); + } + + first.region().intersect(src_region) + }; + + let grant_page_count = src_grant_region.size() / PAGE_SIZE; + + let src_mapper = &mut src_addr_space.table.utable; + + let result_page = if consume { + let grant = src_addr_space.grants.take(&src_grant_region).expect("grant cannot disappear"); + let (before, middle, after) = grant.extract(src_grant_region).expect("called intersect(), must succeed"); + + if let Some(before) = before { src_addr_space.grants.insert(before); } + if let Some(after) = after { src_addr_space.grants.insert(after); } + + dst_addr_space.mmap(requested_dst_page, grant_page_count, map.flags, |dst_page, flags, dst_mapper, dst_flusher| Ok(Grant::transfer(middle, dst_page, src_mapper, dst_mapper, InactiveFlusher::new(), dst_flusher)?))? + } else { + dst_addr_space.mmap(requested_dst_page, grant_page_count, map.flags, |dst_page, flags, dst_mapper, flusher| Ok(Grant::borrow(Page::containing_address(src_grant_region.start_address()), dst_page, grant_page_count, flags, None, src_mapper, dst_mapper, flusher)?))? + }; + + Ok(result_page.start_address().data()) + } + _ => return Err(Error::new(EBADF)), + } + } } extern "C" fn clone_handler() { let context_lock = Arc::clone(context::contexts().current().expect("expected the current context to be set in a spawn closure")); @@ -1273,3 +1279,16 @@ fn extract_scheme_number(fd: usize) -> Result<(Arc, usize)> { Ok((scheme, number)) } +fn maybe_cleanup_addr_space(addr_space: Arc>) { + if let Ok(mut space) = Arc::try_unwrap(addr_space).map(RwLock::into_inner) { + // We are the last reference to the address space; therefore it must be + // unmapped. + + // TODO: Optimize away clearing of page tables? In that case, what about memory + // deallocation? + for grant in space.grants.into_iter() { + grant.unmap(&mut space.table.utable, ()); + } + } + +} diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 115d6bda..0f7f2ccc 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -8,7 +8,7 @@ use spin::{Mutex, RwLock}; use crate::context::{self, Context}; use crate::context::file::FileDescriptor; -use crate::context::memory::{DANGLING, page_flags, Grant, Region, GrantFileRef}; +use crate::context::memory::{AddrSpace, DANGLING, page_flags, Grant, Region, GrantFileRef}; use crate::event; use crate::paging::{PAGE_SIZE, mapper::InactiveFlusher, Page, round_down_pages, round_up_pages, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; @@ -128,8 +128,6 @@ impl UserInner { // where the initial context is closed. fn capture_inner(context_weak: &Weak>, dst_address: usize, address: usize, size: usize, flags: MapFlags, desc_opt: Option) -> Result { - // TODO: More abstractions over grant creation! - if size == 0 { // NOTE: Rather than returning NULL, we return a dummy dangling address, that is also // non-canonical on x86. This means that scheme handlers do not need to check the @@ -143,36 +141,23 @@ impl UserInner { return Ok(VirtualAddress::new(DANGLING)); } - let context_lock = context_weak.upgrade().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; + let dst_addr_space = Arc::clone(context_weak.upgrade().ok_or(Error::new(ESRCH))?.read().addr_space()?); + let mut dst_addr_space = dst_addr_space.write(); - let src_address = round_down_pages(address); - let dst_address = round_down_pages(dst_address); - let offset = address - src_address; - let aligned_size = round_up_pages(offset + size); - let dst_region = addr_space.grants.find_free_at(context.mmap_min, VirtualAddress::new(dst_address), aligned_size, flags)?; + let src_page = Page::containing_address(VirtualAddress::new(round_down_pages(address))); + let offset = address - src_page.start_address().data(); + let page_count = round_up_pages(offset + size) / PAGE_SIZE; + let requested_dst_page = (dst_address != 0).then_some(Page::containing_address(VirtualAddress::new(round_down_pages(dst_address)))); - let current_addrspace = Arc::clone( - context::contexts().current().ok_or(Error::new(ESRCH))? - .read().addr_space()? - ); + let current_addrspace = AddrSpace::current()?; + let mut current_addrspace = current_addrspace.write(); //TODO: Use syscall_head and syscall_tail to avoid leaking data - addr_space.grants.insert(Grant::borrow( - Page::containing_address(VirtualAddress::new(src_address)), - Page::containing_address(dst_region.start_address()), - aligned_size / PAGE_SIZE, - page_flags(flags), - desc_opt, - &mut current_addrspace.write().table.utable, - &mut addr_space.table.utable, - InactiveFlusher::new(), - )?); - - Ok(VirtualAddress::new(dst_region.start_address().data() + offset)) + let dst_page = dst_addr_space.mmap(requested_dst_page, page_count, flags, |dst_page, page_flags, mapper, flusher| { + Ok(Grant::borrow(src_page, dst_page, page_count, page_flags, desc_opt, &mut current_addrspace.table.utable, mapper, flusher)?) + })?; + + Ok(dst_page.start_address().add(offset)) } pub fn release(&self, address: usize) -> Result<()> { diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 8a8d2940..9440dd09 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -77,47 +77,38 @@ pub fn physfree(physical_address: usize, size: usize) -> Result { // `physaddr` to `address` (optional) will map that physical address. We would have to find out // some way to pass flags such as WRITE_COMBINE/NO_CACHE however. pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { - //TODO: Abstract with other grant creation - if size == 0 { - return Ok(DANGLING); - } - if size % PAGE_SIZE != 0 || physical_address % PAGE_SIZE != 0 { + // TODO: Check physical_address against MAXPHYADDR. + + let end = 1 << 52; + if physical_address.saturating_add(size) > end || physical_address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 { return Err(Error::new(EINVAL)); } - // TODO: Enforce size being a multiple of the page size, fail otherwise. let addr_space = Arc::clone(context::current()?.read().addr_space()?); - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; - - let dst_address = addr_space.grants.find_free(context.mmap_min, size).ok_or(Error::new(ENOMEM))?; + let mut addr_space = addr_space.write(); - let mut page_flags = PageFlags::new().user(true); - if flags.contains(PHYSMAP_WRITE) { - page_flags = page_flags.write(true); - } - if flags.contains(PHYSMAP_WRITE_COMBINE) { - page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true); - } - #[cfg(target_arch = "x86_64")] // TODO: AARCH64 - if flags.contains(PHYSMAP_NO_CACHE) { - page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); - } - - addr_space.grants.insert(Grant::physmap( - Frame::containing_address(PhysicalAddress::new(physical_address)), - Page::containing_address(dst_address.start_address()), - size / PAGE_SIZE, - page_flags, - &mut addr_space.table.utable, - PageFlushAll::new(), - )?); + addr_space.mmap(None, size / PAGE_SIZE, Default::default(), |dst_page, _, dst_mapper, dst_flusher| { + let mut page_flags = PageFlags::new().user(true); + if flags.contains(PHYSMAP_WRITE) { + page_flags = page_flags.write(true); + } + if flags.contains(PHYSMAP_WRITE_COMBINE) { + page_flags = page_flags.custom_flag(EntryFlags::HUGE_PAGE.bits(), true); + } + #[cfg(target_arch = "x86_64")] // TODO: AARCH64 + if flags.contains(PHYSMAP_NO_CACHE) { + page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); + } + Grant::physmap( + Frame::containing_address(PhysicalAddress::new(physical_address)), + dst_page, + size / PAGE_SIZE, + page_flags, + dst_mapper, + dst_flusher, + ) + }).map(|page| page.start_address().data()) - Ok(dst_address.start_address().data()) } // TODO: Remove this syscall, funmap makes it redundant. pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 7b8b052b..353eab53 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -1,14 +1,10 @@ //! Filesystem syscalls use alloc::sync::Arc; -use alloc::vec::Vec; use core::str; use spin::RwLock; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::Region; use crate::context; -use crate::memory::PAGE_SIZE; -use crate::paging::{mapper::PageFlushAll, VirtualAddress}; use crate::scheme::{self, FileHandle}; use crate::syscall::data::{Packet, Stat}; use crate::syscall::error::*; @@ -469,64 +465,10 @@ pub fn fstat(fd: FileHandle, stat: &mut Stat) -> Result { } pub fn funmap(virtual_address: usize, length: usize) -> Result { - if virtual_address == 0 || length == 0 { - return Ok(0); - } else if virtual_address % PAGE_SIZE != 0 || length % PAGE_SIZE != 0 { - return Err(Error::new(EINVAL)); - } - - let mut notify_files = Vec::new(); - - let virtual_address = VirtualAddress::new(virtual_address); - let requested = Region::new(virtual_address, length); - let mut flusher = PageFlushAll::new(); - - { - let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); - let context = context_lock.read(); - - let mut addr_space = context.addr_space()?.write(); - let addr_space = &mut *addr_space; - - let conflicting: Vec = addr_space.grants.conflicts(requested).map(Region::from).collect(); - - for conflict in conflicting { - let grant = addr_space.grants.take(&conflict).expect("conflicting region didn't exist"); - let intersection = grant.intersect(requested); - let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); - - // Notify scheme that holds grant - if let Some(file_desc) = grant.desc_opt.take() { - notify_files.push((file_desc, intersection)); - } - - // Keep untouched regions - if let Some(before) = before { - addr_space.grants.insert(before); - } - if let Some(after) = after { - addr_space.grants.insert(after); - } + let (page, page_count) = crate::syscall::validate::validate_region(virtual_address, length)?; - // Remove irrelevant region - grant.unmap(&mut addr_space.table.utable, &mut flusher); - } - } - - for (file_ref, intersection) in notify_files { - let scheme_id = { file_ref.desc.description.read().scheme }; - - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(scheme_id).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - let res = scheme.funmap(intersection.start_address().data(), intersection.size()); - - let _ = file_ref.desc.close(); - - res?; - } + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + addr_space.write().munmap(page, page_count); Ok(0) } diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index e25cf044..613bb00f 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,6 +25,8 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; +use self::scheme::Scheme as _; + use self::data::{Map, SigAction, Stat, TimeSpec}; use self::error::{Error, Result, ENOSYS, EINVAL}; use self::flag::{MapFlags, PhysmapFlags, WaitFlags}; @@ -70,7 +72,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u match a & SYS_ARG { SYS_ARG_SLICE => match a { SYS_FMAP if b == !0 => { - MemoryScheme::fmap_anonymous(unsafe { validate_ref(c as *const Map, d)? }) + MemoryScheme.fmap(!0, unsafe { validate_ref(c as *const Map, d)? }) }, _ => file_op_slice(a, fd, validate_slice(c as *const u8, d)?), } -- GitLab From e58bf714d87845792bca6a02bfd95f90a440d0c6 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Wed, 27 Jul 2022 17:39:16 +0200 Subject: [PATCH 44/44] Update rmm and syscall --- rmm | 2 +- syscall | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rmm b/rmm index 9462df03..5700899e 160000 --- a/rmm +++ b/rmm @@ -1 +1 @@ -Subproject commit 9462df03e786312b6ce197cf56113d411412cbb2 +Subproject commit 5700899e9ad5826e9ab65934b2b8de88de792b87 diff --git a/syscall b/syscall index d6af2661..fac87ee3 160000 --- a/syscall +++ b/syscall @@ -1 +1 @@ -Subproject commit d6af266119e7b4a3b0e9a04c63b3cfcfac94781a +Subproject commit fac87ee3c74e5e504a74f2713301c1ddc7d43d17 -- GitLab