diff --git a/Cargo.lock b/Cargo.lock index 33f358d4d09a130bfae9388919dfb053f2a19692..8c23a1fcc3fdf331d29cd19ec9501593702bed52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,7 +76,6 @@ dependencies = [ "log", "memoffset", "raw-cpuid", - "redox-initfs", "redox_syscall", "rmm", "rustc-cfg", @@ -116,9 +115,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.16" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] @@ -147,14 +146,6 @@ dependencies = [ "bitflags", ] -[[package]] -name = "redox-initfs" -version = "0.1.0" -source = "git+https://gitlab.redox-os.org/redox-os/redox-initfs.git#89b8fb8984cf96c418880b7dcd9ce3d6afc3f71c" -dependencies = [ - "plain", -] - [[package]] name = "redox_syscall" version = "0.2.16" diff --git a/Cargo.toml b/Cargo.toml index 2bd3aa7e179fd00d523815ae9b4259bd528b262f..182aac59c894ed5a4b4bc972bbddc6e93b9b3900 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,6 @@ slab_allocator = { path = "slab_allocator", optional = true } # FIXME: There is some undefined behavior probably in the kernel, which forces us to use spin 0.9.0 and not 0.9.2. spin = "=0.9.0" rmm = { path = "rmm", default-features = false } -redox-initfs = { git = "https://gitlab.redox-os.org/redox-os/redox-initfs.git", features = ["kernel"], default-features = false } [dependencies.goblin] version = "0.2.1" diff --git a/rmm b/rmm index 0944b17983223966e339a25f9328bdb77a59d5c7..5700899e9ad5826e9ab65934b2b8de88de792b87 160000 --- a/rmm +++ b/rmm @@ -1 +1 @@ -Subproject commit 0944b17983223966e339a25f9328bdb77a59d5c7 +Subproject commit 5700899e9ad5826e9ab65934b2b8de88de792b87 diff --git a/src/acpi/hpet.rs b/src/acpi/hpet.rs index 498a6681bb0f2b41396d64a4ddb867744bd5b24c..18e9ee09b219cf2de936d12ede0088b3f8620205 100644 --- a/src/acpi/hpet.rs +++ b/src/acpi/hpet.rs @@ -3,7 +3,7 @@ use core::{mem, ptr}; use core::intrinsics::{volatile_load, volatile_store}; use crate::memory::Frame; -use crate::paging::{ActivePageTable, PhysicalAddress, Page, PageFlags, VirtualAddress}; +use crate::paging::{KernelMapper, PhysicalAddress, PageFlags}; use super::sdt::Sdt; use super::{ACPI_TABLE, find_sdt}; @@ -35,10 +35,10 @@ pub struct Hpet { } impl Hpet { - pub fn init(active_table: &mut ActivePageTable) { + pub fn init() { let hpet_sdt = find_sdt("HPET"); let hpet = if hpet_sdt.len() == 1 { - Hpet::new(hpet_sdt[0], active_table) + Hpet::new(hpet_sdt[0]) } else { println!("Unable to find HPET"); return; @@ -52,10 +52,10 @@ impl Hpet { } } - pub fn new(sdt: &'static Sdt, active_table: &mut ActivePageTable) -> Option { + pub fn new(sdt: &'static Sdt) -> Option { if &sdt.signature == b"HPET" && sdt.length as usize >= mem::size_of::() { let s = unsafe { ptr::read((sdt as *const Sdt) as *const Hpet) }; - unsafe { s.base_address.init(active_table) }; + unsafe { s.base_address.init(&mut KernelMapper::lock()) }; Some(s) } else { None @@ -64,18 +64,21 @@ impl Hpet { } impl GenericAddressStructure { - pub unsafe fn init(&self, active_table: &mut ActivePageTable) { - let page = Page::containing_address(VirtualAddress::new(self.address as usize)); + pub unsafe fn init(&self, mapper: &mut KernelMapper) { let frame = Frame::containing_address(PhysicalAddress::new(self.address as usize)); - let result = active_table.map_to(page, frame, PageFlags::new().write(true)); + let (_, result) = mapper + .get_mut() + .expect("KernelMapper locked re-entrant while mapping memory for GenericAddressStructure") + .map_linearly(frame.start_address(), PageFlags::new().write(true)) + .expect("failed to map memory for GenericAddressStructure"); result.flush(); } pub unsafe fn read_u64(&self, offset: usize) -> u64{ - volatile_load((self.address as usize + offset) as *const u64) + volatile_load((self.address as usize + offset + crate::PHYS_OFFSET) as *const u64) } pub unsafe fn write_u64(&mut self, offset: usize, value: u64) { - volatile_store((self.address as usize + offset) as *mut u64, value); + volatile_store((self.address as usize + offset + crate::PHYS_OFFSET) as *mut u64, value); } } diff --git a/src/acpi/madt.rs b/src/acpi/madt.rs index f6dabb65fe2fa75e5706084b07ef3fe5e6fdfdeb..1f8d8f0d1f35465240219ffc943ca1a6e2518e19 100644 --- a/src/acpi/madt.rs +++ b/src/acpi/madt.rs @@ -1,7 +1,7 @@ use core::mem; use crate::memory::{allocate_frames, Frame}; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, VirtualAddress}; use super::sdt::Sdt; use super::find_sdt; @@ -28,7 +28,7 @@ pub static mut MADT: Option = None; pub const FLAG_PCAT: u32 = 1; impl Madt { - pub fn init(active_table: &mut ActivePageTable) { + pub fn init() { let madt_sdt = find_sdt("APIC"); let madt = if madt_sdt.len() == 1 { Madt::new(madt_sdt[0]) @@ -56,7 +56,18 @@ impl Madt { // Map trampoline let trampoline_frame = Frame::containing_address(PhysicalAddress::new(TRAMPOLINE)); let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); - let result = active_table.map_to(trampoline_page, trampoline_frame, PageFlags::new().execute(true).write(true)); //TODO: do not have writable and executable! + let (result, page_table_physaddr) = unsafe { + //TODO: do not have writable and executable! + let mut mapper = KernelMapper::lock(); + + let result = mapper + .get_mut() + .expect("expected kernel page table not to be recursively locked while initializing MADT") + .map_phys(trampoline_page.start_address(), trampoline_frame.start_address(), PageFlags::new().execute(true).write(true)) + .expect("failed to map trampoline"); + + (result, mapper.table().phys().data()) + }; result.flush(); // Write trampoline, make sure TRAMPOLINE page is free for use @@ -90,7 +101,7 @@ impl Madt { // Set the ap_ready to 0, volatile unsafe { atomic_store(ap_ready, 0) }; unsafe { atomic_store(ap_cpu_id, ap_local_apic.id as u64) }; - unsafe { atomic_store(ap_page_table, active_table.address() as u64) }; + unsafe { atomic_store(ap_page_table, page_table_physaddr as u64) }; unsafe { atomic_store(ap_stack_start, stack_start as u64) }; unsafe { atomic_store(ap_stack_end, stack_end as u64) }; unsafe { atomic_store(ap_code, kstart_ap as u64) }; @@ -137,7 +148,7 @@ impl Madt { } println!(" Ready"); - active_table.flush_all(); + unsafe { RmmA::invalidate_all(); } } else { println!(" CPU Disabled"); } @@ -147,8 +158,14 @@ impl Madt { } // Unmap trampoline - let (result, _frame) = active_table.unmap_return(trampoline_page, false); - result.flush(); + let (_frame, _, flush) = unsafe { + KernelMapper::lock() + .get_mut() + .expect("expected kernel page table not to be recursively locked while initializing MADT") + .unmap_phys(trampoline_page.start_address(), true) + .expect("failed to unmap trampoline page") + }; + flush.flush(); } } } diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index c75c78b4197387ba728b2ad5fd5bb68c717553ff..49b5bb8b365eb5c84a3a7190402fa38784824996 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -10,7 +10,7 @@ use spin::{Once, RwLock}; use crate::log::info; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, VirtualAddress}; use self::madt::Madt; use self::rsdt::Rsdt; @@ -28,31 +28,33 @@ mod xsdt; mod rxsdt; mod rsdp; -pub fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'static Sdt { - { - let page = Page::containing_address(VirtualAddress::new(sdt_address)); - if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data())); - let result = active_table.map_to(page, frame, PageFlags::new()); - result.flush(); - } +unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::paging::PageMapper) { + let base = PhysicalAddress::new(crate::paging::round_down_pages(addr.data())); + let aligned_len = crate::paging::round_up_pages(len + (addr.data() - base.data())); + + for page_idx in 0..aligned_len / crate::memory::PAGE_SIZE { + let (_, flush) = mapper.map_linearly(base.add(page_idx * crate::memory::PAGE_SIZE), PageFlags::new()).expect("failed to linearly map SDT"); + flush.flush(); } +} - let sdt = unsafe { &*(sdt_address as *const Sdt) }; +pub fn get_sdt(sdt_address: usize, mapper: &mut KernelMapper) -> &'static Sdt { + let mapper = mapper + .get_mut() + .expect("KernelMapper mapper locked re-entrant in get_sdt"); - // Map extra SDT frames if required - { - let start_page = Page::containing_address(VirtualAddress::new(sdt_address + 4096)); - let end_page = Page::containing_address(VirtualAddress::new(sdt_address + sdt.length as usize)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data())); - let result = active_table.map_to(page, frame, PageFlags::new()); - result.flush(); - } - } - } + let physaddr = PhysicalAddress::new(sdt_address); + + let sdt; + + unsafe { + const SDT_SIZE: usize = core::mem::size_of::(); + map_linearly(physaddr, SDT_SIZE, mapper); + sdt = unsafe { &*(RmmA::phys_to_virt(physaddr).data() as *const Sdt) }; + + map_linearly(physaddr.add(SDT_SIZE), sdt.length as usize - SDT_SIZE, mapper); + } sdt } @@ -72,16 +74,18 @@ impl Rxsdt for RxsdtEnum { pub static RXSDT_ENUM: Once = Once::new(); /// Parse the ACPI tables to gather CPU, interrupt, and timer information -pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: Option<(u64, u64)>) { +pub unsafe fn init(already_supplied_rsdps: Option<(u64, u64)>) { { let mut sdt_ptrs = SDT_POINTERS.write(); *sdt_ptrs = Some(BTreeMap::new()); } // Search for RSDP - if let Some(rsdp) = RSDP::get_rsdp(active_table, already_supplied_rsdps) { + let rsdp_opt = RSDP::get_rsdp(&mut KernelMapper::lock(), already_supplied_rsdps); + + if let Some(rsdp) = rsdp_opt { info!("RSDP: {:?}", rsdp); - let rxsdt = get_sdt(rsdp.sdt_address(), active_table); + let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock()); for &c in rxsdt.signature.iter() { print!("{}", c as char); @@ -122,10 +126,10 @@ pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: O // TODO: Don't touch ACPI tables in kernel? - rxsdt.map_all(active_table); + rxsdt.map_all(); for sdt_address in rxsdt.iter() { - let sdt = &*(sdt_address as *const Sdt); + let sdt = &*((sdt_address + crate::PHYS_OFFSET) as *const Sdt); let signature = get_sdt_signature(sdt); if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) { @@ -135,10 +139,10 @@ pub unsafe fn init(active_table: &mut ActivePageTable, already_supplied_rsdps: O // TODO: Enumerate processors in userspace, and then provide an ACPI-independent interface // to initialize enumerated processors to userspace? - Madt::init(active_table); + Madt::init(); // TODO: Let userspace setup HPET, and then provide an interface to specify which timer to // use? - Hpet::init(active_table); + Hpet::init(); } else { println!("NO RSDP FOUND"); } diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs index d96214b96e1cfa4bd0b407c6270bef331f8eba0f..3d0818675017b12f0aaa52406df3cd92223290f5 100644 --- a/src/acpi/rsdp.rs +++ b/src/acpi/rsdp.rs @@ -2,7 +2,7 @@ use core::convert::TryFrom; use core::mem; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, VirtualAddress}; /// RSDP #[derive(Copy, Clone, Debug)] @@ -71,16 +71,16 @@ impl RSDP { None } - pub fn get_rsdp(active_table: &mut ActivePageTable, already_supplied_rsdps: Option<(u64, u64)>) -> Option { + pub fn get_rsdp(mapper: &mut KernelMapper, already_supplied_rsdps: Option<(u64, u64)>) -> Option { if let Some((base, size)) = already_supplied_rsdps { let area = unsafe { core::slice::from_raw_parts(base as usize as *const u8, size as usize) }; - Self::get_already_supplied_rsdps(area).or_else(|| Self::get_rsdp_by_searching(active_table)) + Self::get_already_supplied_rsdps(area).or_else(|| Self::get_rsdp_by_searching(mapper)) } else { - Self::get_rsdp_by_searching(active_table) + Self::get_rsdp_by_searching(mapper) } } /// Search for the RSDP - pub fn get_rsdp_by_searching(active_table: &mut ActivePageTable) -> Option { + pub fn get_rsdp_by_searching(mapper: &mut KernelMapper) -> Option { let start_addr = 0xE_0000; let end_addr = 0xF_FFFF; @@ -90,7 +90,9 @@ impl RSDP { let end_frame = Frame::containing_address(PhysicalAddress::new(end_addr)); for frame in Frame::range_inclusive(start_frame, end_frame) { let page = Page::containing_address(VirtualAddress::new(frame.start_address().data())); - let result = active_table.map_to(page, frame, PageFlags::new()); + let result = unsafe { + mapper.get_mut().expect("KernelMapper locked re-entrant while locating RSDPs").map_phys(page.start_address(), frame.start_address(), PageFlags::new()).expect("failed to map page while searching for RSDP") + }; result.flush(); } } diff --git a/src/acpi/rxsdt.rs b/src/acpi/rxsdt.rs index db238806713e73a156c961d67bbf600cf828ec40..1f17d79e699c6a362c22905a08dd3e7b4bd5b0b5 100644 --- a/src/acpi/rxsdt.rs +++ b/src/acpi/rxsdt.rs @@ -1,6 +1,6 @@ use alloc::boxed::Box; -use crate::paging::ActivePageTable; +use crate::paging::KernelMapper; use super::sdt::Sdt; use super::get_sdt; @@ -8,9 +8,12 @@ use super::get_sdt; pub trait Rxsdt { fn iter(&self) -> Box>; - fn map_all(&self, active_table: &mut ActivePageTable) { + fn map_all(&self) { + let iter = self.iter(); + + let mut mapper = KernelMapper::lock(); for sdt in self.iter() { - get_sdt(sdt, active_table); + get_sdt(sdt, &mut mapper); } } diff --git a/src/allocator/linked_list.rs b/src/allocator/linked_list.rs index e7a371fc959d208c8ab53cf58881a53b02aa3d9a..496f8ff6d4ec877bdfd0e26ea15b8f9bcb95b258 100644 --- a/src/allocator/linked_list.rs +++ b/src/allocator/linked_list.rs @@ -3,7 +3,7 @@ use core::ptr::{self, NonNull}; use linked_list_allocator::Heap; use spin::Mutex; -use crate::paging::{ActivePageTable, TableKind}; +use crate::paging::KernelMapper; static HEAP: Mutex> = Mutex::new(None); @@ -21,7 +21,7 @@ unsafe impl GlobalAlloc for Allocator { match heap.allocate_first_fit(layout) { Err(()) => { let size = heap.size(); - super::map_heap(&mut ActivePageTable::new(TableKind::Kernel), crate::KERNEL_HEAP_OFFSET + size, crate::KERNEL_HEAP_SIZE); + super::map_heap(&mut KernelMapper::lock(), crate::KERNEL_HEAP_OFFSET + size, crate::KERNEL_HEAP_SIZE); heap.extend(crate::KERNEL_HEAP_SIZE); }, other => return other.ok().map_or(ptr::null_mut(), |allocation| allocation.as_ptr()), diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs index dfc618a41e07bda4fe53cae027beb31fc89722c6..f8be01794c7f1dbafe4fdd77d984996e05efa304 100644 --- a/src/allocator/mod.rs +++ b/src/allocator/mod.rs @@ -1,4 +1,5 @@ -use crate::paging::{ActivePageTable, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags}; +use rmm::Flusher; +use crate::paging::{KernelMapper, Page, PageFlags, VirtualAddress, mapper::PageFlushAll, entry::EntryFlags}; #[cfg(not(feature="slab"))] pub use self::linked_list::Allocator; @@ -12,13 +13,14 @@ mod linked_list; #[cfg(feature="slab")] mod slab; -unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usize) { - let flush_all = PageFlushAll::new(); +unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) { + let mapper = mapper.get_mut().expect("failed to obtain exclusive access to KernelMapper while extending heap"); + let mut flush_all = PageFlushAll::new(); let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size-1)); for page in Page::range_inclusive(heap_start_page, heap_end_page) { - let result = active_table.map(page, PageFlags::new().write(true).custom_flag(EntryFlags::GLOBAL.bits(), cfg!(not(feature = "pti")))) + let result = mapper.map(page.start_address(), PageFlags::new().write(true).custom_flag(EntryFlags::GLOBAL.bits(), cfg!(not(feature = "pti")))) .expect("failed to map kernel heap"); flush_all.consume(result); } @@ -26,12 +28,12 @@ unsafe fn map_heap(active_table: &mut ActivePageTable, offset: usize, size: usiz flush_all.flush(); } -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init() { let offset = crate::KERNEL_HEAP_OFFSET; let size = crate::KERNEL_HEAP_SIZE; // Map heap pages - map_heap(active_table, offset, size); + map_heap(&mut KernelMapper::lock(), offset, size); // Initialize global heap Allocator::init(offset, size); diff --git a/src/arch/x86_64/consts.rs b/src/arch/x86_64/consts.rs index 7df51adc52406d4be2765b27848a00a2d50e948b..1656c8c26f3e5cc404f377ba5e828da323b802a6 100644 --- a/src/arch/x86_64/consts.rs +++ b/src/arch/x86_64/consts.rs @@ -37,51 +37,6 @@ /// Offset to user image pub const USER_OFFSET: usize = 0; - pub const USER_PML4: usize = (USER_OFFSET & PML4_MASK)/PML4_SIZE; - /// Offset to user arguments - pub const USER_ARG_OFFSET: usize = USER_OFFSET + PML4_SIZE/2; - - /// Offset to user heap - pub const USER_HEAP_OFFSET: usize = USER_OFFSET + PML4_SIZE; - pub const USER_HEAP_PML4: usize = (USER_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user grants - pub const USER_GRANT_OFFSET: usize = USER_HEAP_OFFSET + PML4_SIZE; - pub const USER_GRANT_PML4: usize = (USER_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user stack - pub const USER_STACK_OFFSET: usize = USER_GRANT_OFFSET + PML4_SIZE; - pub const USER_STACK_PML4: usize = (USER_STACK_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of user stack - pub const USER_STACK_SIZE: usize = 1024 * 1024; // 1 MB - - /// Offset to user sigstack - pub const USER_SIGSTACK_OFFSET: usize = USER_STACK_OFFSET + PML4_SIZE; - pub const USER_SIGSTACK_PML4: usize = (USER_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE; - /// Size of user sigstack - pub const USER_SIGSTACK_SIZE: usize = 256 * 1024; // 256 KB - - /// Offset to user temporary image (used when cloning) - pub const USER_TMP_OFFSET: usize = USER_SIGSTACK_OFFSET + PML4_SIZE; - pub const USER_TMP_PML4: usize = (USER_TMP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary heap (used when cloning) - pub const USER_TMP_HEAP_OFFSET: usize = USER_TMP_OFFSET + PML4_SIZE; - pub const USER_TMP_HEAP_PML4: usize = (USER_TMP_HEAP_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary page for grants - pub const USER_TMP_GRANT_OFFSET: usize = USER_TMP_HEAP_OFFSET + PML4_SIZE; - pub const USER_TMP_GRANT_PML4: usize = (USER_TMP_GRANT_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary stack (used when cloning) - pub const USER_TMP_STACK_OFFSET: usize = USER_TMP_GRANT_OFFSET + PML4_SIZE; - pub const USER_TMP_STACK_PML4: usize = (USER_TMP_STACK_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset to user temporary sigstack (used when cloning) - pub const USER_TMP_SIGSTACK_OFFSET: usize = USER_TMP_STACK_OFFSET + PML4_SIZE; - pub const USER_TMP_SIGSTACK_PML4: usize = (USER_TMP_SIGSTACK_OFFSET & PML4_MASK)/PML4_SIZE; - - /// Offset for usage in other temporary pages - pub const USER_TMP_MISC_OFFSET: usize = USER_TMP_SIGSTACK_OFFSET + PML4_SIZE; - pub const USER_TMP_MISC_PML4: usize = (USER_TMP_MISC_OFFSET & PML4_MASK)/PML4_SIZE; + /// End offset of the user image, i.e. kernel start + pub const USER_END_OFFSET: usize = 256 * PML4_SIZE; diff --git a/src/arch/x86_64/device/ioapic.rs b/src/arch/x86_64/device/ioapic.rs index e92d45216cc1a4d27b2143779b8b3ca03498f029..161f807f09c135d398735092a520093d5d66ed4e 100644 --- a/src/arch/x86_64/device/ioapic.rs +++ b/src/arch/x86_64/device/ioapic.rs @@ -8,7 +8,7 @@ use crate::acpi::madt::{self, Madt, MadtEntry, MadtIoApic, MadtIntSrcOverride}; use crate::arch::interrupt::irq; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch}; use crate::paging::entry::EntryFlags; use super::pic; @@ -229,16 +229,20 @@ pub fn src_overrides() -> &'static [Override] { } #[cfg(feature = "acpi")] -pub unsafe fn handle_ioapic(active_table: &mut ActivePageTable, madt_ioapic: &'static MadtIoApic) { +pub unsafe fn handle_ioapic(mapper: &mut KernelMapper, madt_ioapic: &'static MadtIoApic) { // map the I/O APIC registers let frame = Frame::containing_address(PhysicalAddress::new(madt_ioapic.address as usize)); - let page = Page::containing_address(VirtualAddress::new(madt_ioapic.address as usize + crate::PHYS_OFFSET)); + let page = Page::containing_address(RmmA::phys_to_virt(frame.start_address())); - assert_eq!(active_table.translate_page(page), None); + assert!(mapper.translate(page.start_address()).is_none()); - let result = active_table.map_to(page, frame, PageFlags::new().write(true).custom_flag(EntryFlags::NO_CACHE.bits(), true)); - result.flush(); + mapper + .get_mut() + .expect("expected KernelMapper not to be locked re-entrant while mapping I/O APIC memory") + .map_phys(page.start_address(), frame.start_address(), PageFlags::new().write(true).custom_flag(EntryFlags::NO_CACHE.bits(), true)) + .expect("failed to map I/O APIC") + .flush(); let ioapic_registers = page.start_address().data() as *const u32; let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base); @@ -280,7 +284,7 @@ pub unsafe fn handle_src_override(src_override: &'static MadtIntSrcOverride) { SRC_OVERRIDES.get_or_insert_with(Vec::new).push(over); } -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init(active_table: &mut KernelMapper) { let bsp_apic_id = x86::cpuid::CpuId::new().get_feature_info().unwrap().initial_local_apic_id(); // TODO // search the madt for all IOAPICs. diff --git a/src/arch/x86_64/device/local_apic.rs b/src/arch/x86_64/device/local_apic.rs index 913781e5ceee75a0f6af42698b9c991704924ba8..392f3c2770be579d779a35f8a6f681dc62b7be98 100644 --- a/src/arch/x86_64/device/local_apic.rs +++ b/src/arch/x86_64/device/local_apic.rs @@ -3,15 +3,14 @@ use core::intrinsics::{volatile_load, volatile_store}; use x86::cpuid::CpuId; use x86::msr::*; -use crate::memory::Frame; -use crate::paging::{ActivePageTable, PhysicalAddress, Page, PageFlags, VirtualAddress}; +use crate::paging::{KernelMapper, PhysicalAddress, PageFlags, RmmA, RmmArch}; pub static mut LOCAL_APIC: LocalApic = LocalApic { address: 0, x2: false }; -pub unsafe fn init(active_table: &mut ActivePageTable) { +pub unsafe fn init(active_table: &mut KernelMapper) { LOCAL_APIC.init(active_table); } @@ -41,21 +40,25 @@ pub fn bsp_apic_id() -> Option { } impl LocalApic { - unsafe fn init(&mut self, active_table: &mut ActivePageTable) { - self.address = (rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000) + crate::PHYS_OFFSET; + unsafe fn init(&mut self, mapper: &mut KernelMapper) { + let mapper = mapper.get_mut().expect("expected KernelMapper not to be locked re-entrant while initializing LAPIC"); + + let physaddr = PhysicalAddress::new(rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000); + let virtaddr = RmmA::phys_to_virt(physaddr); + + self.address = virtaddr.data(); self.x2 = CpuId::new().get_feature_info().unwrap().has_x2apic(); if ! self.x2 { - let page = Page::containing_address(VirtualAddress::new(self.address)); - let frame = Frame::containing_address(PhysicalAddress::new(self.address - crate::PHYS_OFFSET)); - log::info!("Detected xAPIC at {:#x}", frame.start_address().data()); - if active_table.translate_page(page).is_some() { + log::info!("Detected xAPIC at {:#x}", physaddr.data()); + if let Some((_entry, _, flush)) = mapper.unmap_phys(virtaddr, true) { // Unmap xAPIC page if already mapped - let (result, _frame) = active_table.unmap_return(page, true); - result.flush(); + flush.flush(); } - let result = active_table.map_to(page, frame, PageFlags::new().write(true)); - result.flush(); + mapper + .map_phys(virtaddr, physaddr, PageFlags::new().write(true)) + .expect("failed to map local APIC memory") + .flush(); } else { log::info!("Detected x2APIC"); } diff --git a/src/arch/x86_64/device/mod.rs b/src/arch/x86_64/device/mod.rs index 942d32014d903a037bf0fbc4af4b8cac5f1477cd..eafa245c5c9286c8afab45f237d5319d18d8c2e3 100644 --- a/src/arch/x86_64/device/mod.rs +++ b/src/arch/x86_64/device/mod.rs @@ -1,5 +1,3 @@ -use crate::paging::ActivePageTable; - pub mod cpu; pub mod ioapic; pub mod local_apic; @@ -12,13 +10,15 @@ pub mod hpet; #[cfg(feature = "system76_ec_debug")] pub mod system76_ec; -pub unsafe fn init(active_table: &mut ActivePageTable) { +use crate::paging::KernelMapper; + +pub unsafe fn init() { pic::init(); - local_apic::init(active_table); + local_apic::init(&mut KernelMapper::lock()); } -pub unsafe fn init_after_acpi(_active_table: &mut ActivePageTable) { +pub unsafe fn init_after_acpi() { // this will disable the IOAPIC if needed. - //ioapic::init(active_table); + //ioapic::init(mapper); } #[cfg(feature = "acpi")] diff --git a/src/arch/x86_64/idt.rs b/src/arch/x86_64/idt.rs index 595b6a0b33e1470abaf60322dfb7e93192fc7d1d..4a712693d08e3e0d4d6805e0626dc075299cb157 100644 --- a/src/arch/x86_64/idt.rs +++ b/src/arch/x86_64/idt.rs @@ -10,7 +10,6 @@ use x86::dtables::{self, DescriptorTablePointer}; use crate::interrupt::*; use crate::ipi::IpiKind; -use crate::paging::PageFlags; use spin::RwLock; @@ -172,32 +171,11 @@ pub unsafe fn init_generic(is_bsp: bool, idt: &mut Idt) { let frames = crate::memory::allocate_frames(page_count) .expect("failed to allocate pages for backup interrupt stack"); - // Map them linearly, i.e. PHYS_OFFSET + physaddr. - let base_address = { - use crate::memory::{Frame, PhysicalAddress}; - use crate::paging::{ActivePageTable, Page, VirtualAddress}; + use crate::paging::{RmmA, RmmArch}; - let base_virtual_address = VirtualAddress::new(frames.start_address().data() + crate::PHYS_OFFSET); - let mut active_table = ActivePageTable::new(base_virtual_address.kind()); + // Physical pages are mapped linearly. So is the linearly mapped virtual memory. + let base_address = RmmA::phys_to_virt(frames.start_address()); - for i in 0..page_count { - let virtual_address = VirtualAddress::new(base_virtual_address.data() + i * crate::memory::PAGE_SIZE); - let physical_address = PhysicalAddress::new(frames.start_address().data() + i * crate::memory::PAGE_SIZE); - let page = Page::containing_address(virtual_address); - - let flags = PageFlags::new().write(true); - - let flusher = if let Some(already_mapped) = active_table.translate_page(page) { - assert_eq!(already_mapped.start_address(), physical_address, "address already mapped, but non-linearly"); - active_table.remap(page, flags) - } else { - active_table.map_to(page, Frame::containing_address(physical_address), flags) - }; - flusher.flush(); - } - - base_virtual_address - }; // Stack always grows downwards. let address = base_address.data() + BACKUP_STACK_SIZE; diff --git a/src/arch/x86_64/interrupt/handler.rs b/src/arch/x86_64/interrupt/handler.rs index d8fa21f796c35d2043fc94896ddfc6e65bfe3490..ddbef8285da68823d6489ba08af62425e45b3a99 100644 --- a/src/arch/x86_64/interrupt/handler.rs +++ b/src/arch/x86_64/interrupt/handler.rs @@ -150,8 +150,6 @@ impl InterruptStack { /// Loads all registers from a struct used by the proc: /// scheme to read/write registers. pub fn load(&mut self, all: &IntRegisters) { - // TODO: Which of these should be allowed to change? - self.preserved.r15 = all.r15; self.preserved.r14 = all.r14; self.preserved.r13 = all.r13; @@ -168,9 +166,11 @@ impl InterruptStack { self.scratch.rcx = all.rcx; self.scratch.rax = all.rax; self.iret.rip = all.rip; + self.iret.rsp = all.rsp; + + // CS and SS are immutable - // These should probably be restricted - // self.iret.cs = all.cs; + // TODO: RFLAGS should be restricted before being changeable // self.iret.rflags = all.eflags; } /// Enables the "Trap Flag" in the FLAGS register, causing the CPU diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs index 70fd2a60c63d526ffea40a0259441f8cffbaa74a..803a6d8d58cc96f8b53db819d87d7804699989f1 100644 --- a/src/arch/x86_64/interrupt/syscall.rs +++ b/src/arch/x86_64/interrupt/syscall.rs @@ -160,23 +160,3 @@ interrupt_stack!(syscall, |stack| { syscall::syscall(scratch.rax, stack.preserved.rbx, scratch.rcx, scratch.rdx, scratch.rsi, scratch.rdi, rbp, stack) }) }); - -#[naked] -pub unsafe extern "C" fn clone_ret() { - core::arch::asm!(concat!( - // The address of this instruction is injected by `clone` in process.rs, on - // top of the stack syscall->inner in this file, which is done using the rbp - // register we save there. - // - // The top of our stack here is the address pointed to by rbp, which is: - // - // - the previous rbp - // - the return location - // - // Our goal is to return from the parent function, inner, so we restore - // rbp... - "pop rbp\n", - // ...and we return to the address at the top of the stack - "ret\n", - ), options(noreturn)); -} diff --git a/src/arch/x86_64/interrupt/trace.rs b/src/arch/x86_64/interrupt/trace.rs index b8eb820dfd2bf0029b32f7747316c52144d44a61..ecff30e99daa47f73bf1ae4f209cd56574767125 100644 --- a/src/arch/x86_64/interrupt/trace.rs +++ b/src/arch/x86_64/interrupt/trace.rs @@ -1,8 +1,9 @@ use core::{mem, str}; + use goblin::elf::sym; use rustc_demangle::demangle; -use crate::paging::{ActivePageTable, TableKind, VirtualAddress}; +use crate::{context, paging::{KernelMapper, VirtualAddress}}; /// Get a stack trace //TODO: Check for stack being mapped before dereferencing @@ -13,10 +14,14 @@ pub unsafe fn stack_trace() { println!("TRACE: {:>016X}", rbp); //Maximum 64 frames - let active_table = ActivePageTable::new(TableKind::User); + + let mapper = KernelMapper::lock(); + for _frame in 0..64 { if let Some(rip_rbp) = rbp.checked_add(mem::size_of::()) { - if active_table.translate(VirtualAddress::new(rbp)).is_some() && active_table.translate(VirtualAddress::new(rip_rbp)).is_some() { + let rbp_virt = VirtualAddress::new(rbp); + let rip_rbp_virt = VirtualAddress::new(rip_rbp); + if rbp_virt.is_canonical() && rip_rbp_virt.is_canonical() && mapper.translate(rbp_virt).is_some() && mapper.translate(rip_rbp_virt).is_some() { let rip = *(rip_rbp as *const usize); if rip == 0 { println!(" {:>016X}: EMPTY RETURN", rbp); diff --git a/src/arch/x86_64/paging/mapper.rs b/src/arch/x86_64/paging/mapper.rs index babefcca89942da8b99bca12d7d4dd9d9233cd5d..9f7659b4e68886bb0a3f42107ddd57b2c7155f34 100644 --- a/src/arch/x86_64/paging/mapper.rs +++ b/src/arch/x86_64/paging/mapper.rs @@ -1,193 +1,23 @@ -use super::{linear_phys_to_virt, Page, PAGE_SIZE, PageFlags, PhysicalAddress, VirtualAddress}; -use crate::memory::{allocate_frames, deallocate_frames, Enomem, Frame}; +use crate::ipi::{ipi, IpiKind, IpiTarget}; use super::RmmA; -use super::table::{Table, Level4}; -pub use rmm::{PageFlush, PageFlushAll}; +pub use rmm::{Flusher, PageFlush, PageFlushAll}; -pub struct Mapper<'table> { - p4: &'table mut Table, +pub struct InactiveFlusher { _inner: () } +impl InactiveFlusher { + // TODO: cpu id + pub fn new() -> Self { Self { _inner: () } } } -impl core::fmt::Debug for Mapper<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Mapper referencing P4 at {:p}", self.p4) +impl Flusher for InactiveFlusher { + fn consume(&mut self, flush: PageFlush) { + // TODO: Push to TLB "mailbox" or tell it to reload CR3 if there are too many entries. + unsafe { flush.ignore(); } } } - -impl<'table> Mapper<'table> { - /// Wrap the current address space in a mapper. - /// - /// # Safety - /// - /// For this to be safe, the caller must have exclusive access to the pointer in the CR3 - /// register. - // TODO: Find some lifetime hack we can use for ensuring exclusive access at compile time? - pub unsafe fn current() -> Mapper<'table> { - // SAFETY: We know that CR3 must be a valid frame, since the processor would triple fault - // otherwise, and the caller has ensured exclusive ownership of the KERNEL_OFFSET+CR3. - Self::from_p4_unchecked(&mut Frame::containing_address(PhysicalAddress::new(x86::controlregs::cr3() as usize))) - } - /// Wrap a top-level page table (an entire address space) in a mapper. - /// - /// # Safety - /// - /// For this to be safe, the caller must have exclusive access to the frame argument. The frame - /// must also be valid, and the frame must not outlive the lifetime. - pub unsafe fn from_p4_unchecked(frame: &mut Frame) -> Self { - let virt = linear_phys_to_virt(frame.start_address()) - .expect("expected page table frame to fit within linear mapping"); - - Self { - p4: &mut *(virt.data() as *mut Table), - } - } - - pub fn p4(&self) -> &Table { - &*self.p4 - } - - pub fn p4_mut(&mut self) -> &mut Table { - &mut *self.p4 - } - - /// Map a page to a frame - pub fn map_to(&mut self, page: Page, frame: Frame, flags: PageFlags) -> PageFlush { - let p3 = self.p4_mut().next_table_create(page.p4_index()); - let p2 = p3.next_table_create(page.p3_index()); - let p1 = p2.next_table_create(page.p2_index()); - - assert!(p1[page.p1_index()].is_unused(), - "{:X}: Set to {:X}: {:?}, requesting {:X}: {:?}", - page.start_address().data(), - p1[page.p1_index()].address().data(), p1[page.p1_index()].flags(), - frame.start_address().data(), flags); - p1.increment_entry_count(); - p1[page.p1_index()].set(frame, flags); - PageFlush::new(page.start_address()) - } - - /// Map a page to the next free frame - pub fn map(&mut self, page: Page, flags: PageFlags) -> Result, Enomem> { - let frame = allocate_frames(1).ok_or(Enomem)?; - Ok(self.map_to(page, frame, flags)) - } - - /// Update flags for a page - pub fn remap(&mut self, page: Page, flags: PageFlags) -> PageFlush { - let p3 = self.p4_mut().next_table_mut(page.p4_index()).expect("failed to remap: no p3"); - let p2 = p3.next_table_mut(page.p3_index()).expect("failed to remap: no p2"); - let p1 = p2.next_table_mut(page.p2_index()).expect("failed to remap: no p1"); - let frame = p1[page.p1_index()].pointed_frame().expect("failed to remap: not mapped"); - p1[page.p1_index()].set(frame, flags); - PageFlush::new(page.start_address()) - } - - /// Identity map a frame - pub fn identity_map(&mut self, frame: Frame, flags: PageFlags) -> PageFlush { - let page = Page::containing_address(VirtualAddress::new(frame.start_address().data())); - self.map_to(page, frame, flags) - } - - fn unmap_inner(&mut self, page: Page, keep_parents: bool) -> Frame { - let frame; - - let p4 = self.p4_mut(); - if let Some(p3) = p4.next_table_mut(page.p4_index()) { - if let Some(p2) = p3.next_table_mut(page.p3_index()) { - if let Some(p1) = p2.next_table_mut(page.p2_index()) { - frame = if let Some(frame) = p1[page.p1_index()].pointed_frame() { - frame - } else { - panic!("unmap_inner({:X}): frame not found", page.start_address().data()) - }; - - p1.decrement_entry_count(); - p1[page.p1_index()].set_unused(); - - if keep_parents || ! p1.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p1 not found", page.start_address().data()); - } - - if let Some(p1_frame) = p2[page.p2_index()].pointed_frame() { - //println!("unmap_inner: Free p1 {:?}", p1_frame); - p2.decrement_entry_count(); - p2[page.p2_index()].set_unused(); - deallocate_frames(p1_frame, 1); - } else { - panic!("unmap_inner({:X}): p1_frame not found", page.start_address().data()); - } - - if ! p2.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p2 not found", page.start_address().data()); - } - - if let Some(p2_frame) = p3[page.p3_index()].pointed_frame() { - //println!("unmap_inner: Free p2 {:?}", p2_frame); - p3.decrement_entry_count(); - p3[page.p3_index()].set_unused(); - deallocate_frames(p2_frame, 1); - } else { - panic!("unmap_inner({:X}): p2_frame not found", page.start_address().data()); - } - - if ! p3.is_unused() { - return frame; - } - } else { - panic!("unmap_inner({:X}): p3 not found", page.start_address().data()); - } - - if let Some(p3_frame) = p4[page.p4_index()].pointed_frame() { - //println!("unmap_inner: Free p3 {:?}", p3_frame); - p4.decrement_entry_count(); - p4[page.p4_index()].set_unused(); - deallocate_frames(p3_frame, 1); - } else { - panic!("unmap_inner({:X}): p3_frame not found", page.start_address().data()); - } - - frame - } - - /// Unmap a page - pub fn unmap(&mut self, page: Page) -> PageFlush { - let frame = self.unmap_inner(page, false); - deallocate_frames(frame, 1); - PageFlush::new(page.start_address()) - } - - /// Unmap a page, return frame without free - pub fn unmap_return(&mut self, page: Page, keep_parents: bool) -> (PageFlush, Frame) { - let frame = self.unmap_inner(page, keep_parents); - (PageFlush::new(page.start_address()), frame) - } - - pub fn translate_page(&self, page: Page) -> Option { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| p1[page.p1_index()].pointed_frame()) - } - - pub fn translate_page_flags(&self, page: Page) -> Option> { - self.p4().next_table(page.p4_index()) - .and_then(|p3| p3.next_table(page.p3_index())) - .and_then(|p2| p2.next_table(page.p2_index())) - .and_then(|p1| Some(p1[page.p1_index()].flags())) - } - - /// Translate a virtual address to a physical one - pub fn translate(&self, virtual_address: VirtualAddress) -> Option { - let offset = virtual_address.data() % PAGE_SIZE; - self.translate_page(Page::containing_address(virtual_address)) - .map(|frame| PhysicalAddress::new(frame.start_address().data() + offset)) +impl Drop for InactiveFlusher { + fn drop(&mut self) { + ipi(IpiKind::Tlb, IpiTarget::Other); } } diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs index 347aebcf80c44f134fcd91fa907eb4342e5d0c88..46f1777b1215eecb439a5d340e17ec567d042aa7 100644 --- a/src/arch/x86_64/paging/mod.rs +++ b/src/arch/x86_64/paging/mod.rs @@ -1,19 +1,15 @@ //! # Paging //! Some code was borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) -use core::ops::{Deref, DerefMut}; use core::{mem, ptr}; -use spin::Mutex; use x86::msr; -use crate::memory::Frame; - use self::entry::EntryFlags; -use self::mapper::{Mapper, PageFlushAll}; -use self::table::{Level4, Table}; +use self::mapper::PageFlushAll; pub use rmm::{ Arch as RmmArch, + Flusher, PageFlags, PhysicalAddress, TableKind, @@ -21,47 +17,17 @@ pub use rmm::{ X8664Arch as RmmA, }; +pub type PageMapper = rmm::PageMapper; +pub use crate::rmm::KernelMapper; + pub mod entry; pub mod mapper; -pub mod table; -pub mod temporary_page; /// Number of entries per page table -pub const ENTRY_COUNT: usize = 512; +pub const ENTRY_COUNT: usize = RmmA::PAGE_ENTRIES; /// Size of pages -pub const PAGE_SIZE: usize = 4096; - -//TODO: This is a rudimentary recursive mutex used to naively fix multi_core issues, replace it! -pub struct PageTableLock { - cpu_id: usize, - count: usize, -} - -pub static PAGE_TABLE_LOCK: Mutex = Mutex::new(PageTableLock { - cpu_id: 0, - count: 0, -}); - -fn page_table_lock() { - let cpu_id = crate::cpu_id(); - loop { - { - let mut lock = PAGE_TABLE_LOCK.lock(); - if lock.count == 0 || lock.cpu_id == cpu_id { - lock.cpu_id = cpu_id; - lock.count += 1; - return; - } - } - crate::arch::interrupt::pause(); - } -} - -fn page_table_unlock() { - let mut lock = PAGE_TABLE_LOCK.lock(); - lock.count -= 1; -} +pub const PAGE_SIZE: usize = RmmA::PAGE_SIZE; /// Setup page attribute table unsafe fn init_pat() { @@ -96,7 +62,7 @@ unsafe fn init_pat() { } /// Map percpu -unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll { +unsafe fn map_percpu(cpu_id: usize, mapper: &mut PageMapper) -> PageFlushAll { extern "C" { /// The starting byte of the thread data segment static mut __tdata_start: u8; @@ -112,12 +78,12 @@ unsafe fn map_percpu(cpu_id: usize, mapper: &mut Mapper) -> PageFlushAll { let start = crate::KERNEL_PERCPU_OFFSET + crate::KERNEL_PERCPU_SIZE * cpu_id; let end = start + size; - let flush_all = PageFlushAll::new(); + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(start)); let end_page = Page::containing_address(VirtualAddress::new(end - 1)); for page in Page::range_inclusive(start_page, end_page) { let result = mapper.map( - page, + page.start_address(), PageFlags::new().write(true).custom_flag(EntryFlags::GLOBAL.bits(), cfg!(not(feature = "pti"))), ) .expect("failed to allocate page table frames while mapping percpu"); @@ -161,7 +127,7 @@ unsafe fn init_tcb(cpu_id: usize) -> usize { /// Returns page table and thread control block offset pub unsafe fn init( cpu_id: usize, -) -> (ActivePageTable, usize) { +) -> usize { extern "C" { /// The starting byte of the text (code) data segment. static mut __text_start: u8; @@ -191,166 +157,30 @@ pub unsafe fn init( init_pat(); - let mut active_table = ActivePageTable::new_unlocked(TableKind::User); - - let flush_all = map_percpu(cpu_id, &mut active_table); + let flush_all = map_percpu(cpu_id, KernelMapper::lock_manually(cpu_id).get_mut().expect("expected KernelMapper not to be locked re-entrant in paging::init")); flush_all.flush(); - return (active_table, init_tcb(cpu_id)); + return init_tcb(cpu_id); } pub unsafe fn init_ap( cpu_id: usize, - bsp_table: usize, + bsp_table: &mut KernelMapper, ) -> usize { init_pat(); - let mut active_table = ActivePageTable::new_unlocked(TableKind::User); - - let mut new_table = InactivePageTable::from_address(bsp_table); - { - let flush_all = map_percpu(cpu_id, &mut new_table.mapper()); - // The flush can be ignored as this is not the active table. See later active_table.switch + let flush_all = map_percpu(cpu_id, bsp_table.get_mut().expect("KernelMapper locked re-entrant for AP")); + + // The flush can be ignored as this is not the active table. See later make_current(). flush_all.ignore(); }; - // This switches the active table, which is setup by the bootloader, to a correct table - // setup by the lambda above. This will also flush the TLB - active_table.switch(new_table); + bsp_table.make_current(); init_tcb(cpu_id) } -#[derive(Debug)] -pub struct ActivePageTable { - mapper: Mapper<'static>, - locked: bool, -} - -impl Deref for ActivePageTable { - type Target = Mapper<'static>; - - fn deref(&self) -> &Mapper<'static> { - &self.mapper - } -} - -impl DerefMut for ActivePageTable { - fn deref_mut(&mut self) -> &mut Mapper<'static> { - &mut self.mapper - } -} - -impl ActivePageTable { - pub unsafe fn new(_table_kind: TableKind) -> ActivePageTable { - page_table_lock(); - ActivePageTable { - mapper: Mapper::current(), - locked: true, - } - } - - pub unsafe fn new_unlocked(_table_kind: TableKind) -> ActivePageTable { - ActivePageTable { - mapper: Mapper::current(), - locked: false, - } - } - - pub fn switch(&mut self, new_table: InactivePageTable) -> InactivePageTable { - let old_table = InactivePageTable { - frame: Frame::containing_address(unsafe { - RmmA::table() - }) - }; - unsafe { - // Activate new page table - RmmA::set_table(new_table.frame.start_address()); - // Update mapper to new page table - self.mapper = Mapper::current(); - } - old_table - } - - pub fn flush(&mut self, page: Page) { - unsafe { - RmmA::invalidate(page.start_address()); - } - } - - pub fn flush_all(&mut self) { - unsafe { - RmmA::invalidate_all(); - } - } - - pub unsafe fn address(&self) -> usize { - RmmA::table().data() - } -} - -impl Drop for ActivePageTable { - fn drop(&mut self) { - if self.locked { - page_table_unlock(); - self.locked = false; - } - } -} - -pub struct InactivePageTable { - frame: Frame, -} - -impl InactivePageTable { - /// Create a new inactive page table, located at a given frame. - /// - /// # Safety - /// - /// For this to be safe, the caller must have exclusive access to the corresponding virtual - /// address of the frame. - pub unsafe fn new( - _active_table: &mut ActivePageTable, - frame: Frame, - ) -> InactivePageTable { - // FIXME: Use active_table to ensure that the newly-allocated frame be linearly mapped, in - // case it is outside the pre-mapped physical address range, or if such a range is too - // large to fit the whole physical address space in the virtual address space. - { - let table = linear_phys_to_virt(frame.start_address()) - .expect("cannot initialize InactivePageTable (currently) without the frame being linearly mapped"); - // now we are able to zero the table - - // SAFETY: The caller must ensure exclusive access to the pointed-to virtual address of - // the frame. - (&mut *(table.data() as *mut Table::)).zero(); - } - - InactivePageTable { frame } - } - - pub unsafe fn from_address(address: usize) -> InactivePageTable { - InactivePageTable { - frame: Frame::containing_address(PhysicalAddress::new(address)), - } - } - - pub fn mapper<'inactive_table>(&'inactive_table mut self) -> Mapper<'inactive_table> { - unsafe { Mapper::from_p4_unchecked(&mut self.frame) } - } - pub unsafe fn address(&self) -> usize { - self.frame.start_address().data() - } -} - -pub fn linear_phys_to_virt(physical: PhysicalAddress) -> Option { - physical.data().checked_add(crate::PHYS_OFFSET).map(VirtualAddress::new) -} -pub fn linear_virt_to_phys(virt: VirtualAddress) -> Option { - virt.data().checked_sub(crate::PHYS_OFFSET).map(PhysicalAddress::new) -} - /// Page #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Page { @@ -386,13 +216,19 @@ impl Page { } } - pub fn range_inclusive(start: Page, end: Page) -> PageIter { + pub fn range_inclusive(start: Page, r#final: Page) -> PageIter { + PageIter { start, end: r#final.next() } + } + pub fn range_exclusive(start: Page, end: Page) -> PageIter { PageIter { start, end } } pub fn next(self) -> Page { + self.next_by(1) + } + pub fn next_by(self, n: usize) -> Page { Self { - number: self.number + 1, + number: self.number + n, } } } @@ -406,7 +242,7 @@ impl Iterator for PageIter { type Item = Page; fn next(&mut self) -> Option { - if self.start <= self.end { + if self.start < self.end { let page = self.start; self.start = self.start.next(); Some(page) @@ -415,3 +251,12 @@ impl Iterator for PageIter { } } } + +/// Round down to the nearest multiple of page size +pub fn round_down_pages(number: usize) -> usize { + number - number % PAGE_SIZE +} +/// Round up to the nearest multiple of page size +pub fn round_up_pages(number: usize) -> usize { + round_down_pages(number + PAGE_SIZE - 1) +} diff --git a/src/arch/x86_64/paging/table.rs b/src/arch/x86_64/paging/table.rs deleted file mode 100644 index 9e907d35e0f0d42713e8162e690ae08b7d26e6dd..0000000000000000000000000000000000000000 --- a/src/arch/x86_64/paging/table.rs +++ /dev/null @@ -1,135 +0,0 @@ -//! # Page table -//! Code borrowed from [Phil Opp's Blog](http://os.phil-opp.com/modifying-page-tables.html) - -use core::marker::PhantomData; -use core::ops::{Index, IndexMut}; - -use crate::memory::allocate_frames; -use crate::paging::{linear_phys_to_virt, VirtualAddress}; - -use super::{ENTRY_COUNT, PageFlags}; -use super::entry::{Entry, EntryFlags}; - -pub trait TableLevel {} - -pub enum Level4 {} -pub enum Level3 {} -pub enum Level2 {} -pub enum Level1 {} - -impl TableLevel for Level4 {} -impl TableLevel for Level3 {} -impl TableLevel for Level2 {} -impl TableLevel for Level1 {} - -pub trait HierarchicalLevel: TableLevel { - type NextLevel: TableLevel; -} - -impl HierarchicalLevel for Level4 { - type NextLevel = Level3; -} - -impl HierarchicalLevel for Level3 { - type NextLevel = Level2; -} - -impl HierarchicalLevel for Level2 { - type NextLevel = Level1; -} - -#[repr(C, align(4096))] -pub struct Table { - entries: [Entry; ENTRY_COUNT], - level: PhantomData, -} - -impl Table where L: TableLevel { - pub fn is_unused(&self) -> bool { - if self.entry_count() > 0 { - return false; - } - - true - } - - pub fn zero(&mut self) { - for entry in self.entries.iter_mut() { - entry.set_zero(); - } - } - - /// Set number of entries in first table entry - fn set_entry_count(&mut self, count: u64) { - debug_assert!(count <= ENTRY_COUNT as u64, "count can't be greater than ENTRY_COUNT"); - self.entries[0].set_counter_bits(count) - } - - /// Get number of entries in first table entry - fn entry_count(&self) -> u64 { - self.entries[0].counter_bits() - } - - pub fn increment_entry_count(&mut self) { - let current_count = self.entry_count(); - self.set_entry_count(current_count + 1); - } - - pub fn decrement_entry_count(&mut self) { - let current_count = self.entry_count(); - self.set_entry_count(current_count - 1); - } -} - -impl Table where L: HierarchicalLevel { - pub fn next_table(&self, index: usize) -> Option<&Table> { - self.next_table_address(index).map(|address| unsafe { &*(address.data() as *const _) }) - } - - pub fn next_table_mut(&mut self, index: usize) -> Option<&mut Table> { - self.next_table_address(index).map(|address| unsafe { &mut *(address.data() as *mut _) }) - } - - pub fn next_table_create(&mut self, index: usize) -> &mut Table { - if self.next_table(index).is_none() { - assert!(!self[index].flags().has_flag(EntryFlags::HUGE_PAGE.bits()), - "next_table_create does not support huge pages"); - let frame = allocate_frames(1).expect("no frames available"); - self.increment_entry_count(); - //TODO: RISC-V will not like this - self[index].set(frame, PageFlags::new_table().execute(true).write(true).user(true) /* Allow users to go down the page table, implement permissions at the page level */); - self.next_table_mut(index).unwrap().zero(); - } - self.next_table_mut(index).unwrap() - } - - fn next_table_address(&self, index: usize) -> Option { - let entry = &self[index]; - let entry_flags = entry.flags(); - - entry.pointed_frame().and_then(|next_table_frame| { - if entry_flags.has_flag(EntryFlags::HUGE_PAGE.bits()) { - return None; - } - let next_table_physaddr = next_table_frame.start_address(); - let next_table_virtaddr = linear_phys_to_virt(next_table_physaddr) - .expect("expected page table frame to fit within linear mapping"); - - Some(next_table_virtaddr) - }) - } -} - -impl Index for Table where L: TableLevel { - type Output = Entry; - - fn index(&self, index: usize) -> &Entry { - &self.entries[index] - } -} - -impl IndexMut for Table where L: TableLevel { - fn index_mut(&mut self, index: usize) -> &mut Entry { - &mut self.entries[index] - } -} diff --git a/src/arch/x86_64/paging/temporary_page.rs b/src/arch/x86_64/paging/temporary_page.rs deleted file mode 100644 index c8427cc1b10fcd41cffaa7a7093a8dbfcc0096a5..0000000000000000000000000000000000000000 --- a/src/arch/x86_64/paging/temporary_page.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! Temporarily map a page -//! From [Phil Opp's Blog](http://os.phil-opp.com/remap-the-kernel.html) - -use crate::memory::Frame; - -use super::{ActivePageTable, Page, PageFlags, RmmA, VirtualAddress}; -use super::table::{Table, Level1}; - -pub struct TemporaryPage { - page: Page, -} - -impl TemporaryPage { - pub fn new(page: Page) -> TemporaryPage { - TemporaryPage { page } - } - - pub fn start_address (&self) -> VirtualAddress { - self.page.start_address() - } - - /// Maps the temporary page to the given frame in the active table. - /// Returns the start address of the temporary page. - pub fn map(&mut self, frame: Frame, flags: PageFlags, active_table: &mut ActivePageTable) -> VirtualAddress { - assert!(active_table.translate_page(self.page).is_none(), "temporary page is already mapped"); - let result = active_table.map_to(self.page, frame, flags); - result.flush(); - self.page.start_address() - } - - /// Maps the temporary page to the given page table frame in the active - /// table. Returns a reference to the now mapped table. - pub fn map_table_frame(&mut self, frame: Frame, flags: PageFlags, active_table: &mut ActivePageTable) -> &mut Table { - unsafe { &mut *(self.map(frame, flags, active_table).data() as *mut Table) } - } - - /// Unmaps the temporary page in the active table. - pub fn unmap(&mut self, active_table: &mut ActivePageTable) { - let (result, _frame) = active_table.unmap_return(self.page, true); - result.flush(); - } -} diff --git a/src/arch/x86_64/rmm.rs b/src/arch/x86_64/rmm.rs index 11eb527d364c5944ee2c342c1602aeaa05226b08..5fc741a2d5aedc8457199f2189ed61d34503f0ad 100644 --- a/src/arch/x86_64/rmm.rs +++ b/src/arch/x86_64/rmm.rs @@ -2,6 +2,7 @@ use core::{ cmp, mem, slice, + sync::atomic::{self, AtomicUsize, Ordering}, }; use rmm::{ KILOBYTE, @@ -20,7 +21,7 @@ use rmm::{ X8664Arch as RmmA, }; -use spin::Mutex; +use spin::{Mutex, MutexGuard}; extern "C" { /// The starting byte of the text (code) data segment. @@ -210,21 +211,15 @@ unsafe fn inner( BuddyAllocator::::new(bump_allocator).expect("failed to create BuddyAllocator") } -pub struct LockedAllocator { - inner: Mutex>>, -} +// There can only be one allocator (at the moment), so making this a ZST is great! +#[derive(Clone, Copy)] +pub struct LockedAllocator; -impl LockedAllocator { - const fn new() -> Self { - Self { - inner: Mutex::new(None) - } - } -} +static INNER_ALLOCATOR: Mutex>> = Mutex::new(None); impl FrameAllocator for LockedAllocator { unsafe fn allocate(&mut self, count: FrameCount) -> Option { - if let Some(ref mut allocator) = *self.inner.lock() { + if let Some(ref mut allocator) = *INNER_ALLOCATOR.lock() { allocator.allocate(count) } else { None @@ -232,38 +227,105 @@ impl FrameAllocator for LockedAllocator { } unsafe fn free(&mut self, address: PhysicalAddress, count: FrameCount) { - if let Some(ref mut allocator) = *self.inner.lock() { + if let Some(ref mut allocator) = *INNER_ALLOCATOR.lock() { allocator.free(address, count) } } unsafe fn usage(&self) -> FrameUsage { - if let Some(ref allocator) = *self.inner.lock() { + if let Some(ref allocator) = *INNER_ALLOCATOR.lock() { allocator.usage() } else { FrameUsage::new(FrameCount::new(0), FrameCount::new(0)) } } } +impl core::fmt::Debug for LockedAllocator { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match INNER_ALLOCATOR.try_lock().as_deref() { + Some(Some(alloc)) => write!(f, "[locked allocator: {:?}]", unsafe { alloc.usage() }), + Some(None) => write!(f, "[uninitialized lock allocator]"), + None => write!(f, "[failed to lock]"), + } + } +} static mut AREAS: [MemoryArea; 512] = [MemoryArea { base: PhysicalAddress::new(0), size: 0, }; 512]; -pub static mut FRAME_ALLOCATOR: LockedAllocator = LockedAllocator::new(); - -pub unsafe fn mapper_new(table_addr: PhysicalAddress) -> PageMapper<'static, RmmA, LockedAllocator> { - PageMapper::new(table_addr, &mut FRAME_ALLOCATOR) +pub static FRAME_ALLOCATOR: LockedAllocator = LockedAllocator; + +const NO_PROCESSOR: usize = !0; +static LOCK_OWNER: AtomicUsize = AtomicUsize::new(NO_PROCESSOR); +static LOCK_COUNT: AtomicUsize = AtomicUsize::new(0); + +// TODO: Support, perhaps via const generics, embedding address checking in PageMapper, thereby +// statically enforcing that the kernel mapper can only map things in the kernel half, and vice +// versa. +/// A guard to the global lock protecting the upper 128 TiB of kernel address space. +/// +/// NOTE: Use this with great care! Since heap allocations may also require this lock when the heap +/// needs to be expended, it must not be held while memory allocations are done! +// TODO: Make the lock finer-grained so that e.g. the heap part can be independent from e.g. +// PHYS_PML4? +pub struct KernelMapper { + mapper: crate::paging::PageMapper, + ro: bool, } +impl KernelMapper { + fn lock_inner(current_processor: usize) -> bool { + loop { + match LOCK_OWNER.compare_exchange_weak(NO_PROCESSOR, current_processor, Ordering::Acquire, Ordering::Relaxed) { + Ok(_) => break, + // already owned by this hardware thread + Err(id) if id == current_processor => break, + // either CAS failed, or some other hardware thread holds the lock + Err(_) => core::hint::spin_loop(), + } + } + + let prev_count = LOCK_COUNT.fetch_add(1, Ordering::Relaxed); + atomic::compiler_fence(Ordering::Acquire); -//TODO: global paging lock? -pub unsafe fn mapper_create() -> Option> { - PageMapper::create(&mut FRAME_ALLOCATOR) + prev_count > 0 + } + pub unsafe fn lock_for_manual_mapper(current_processor: usize, mapper: crate::paging::PageMapper) -> Self { + let ro = Self::lock_inner(current_processor); + Self { + mapper, + ro, + } + } + pub fn lock_manually(current_processor: usize) -> Self { + unsafe { Self::lock_for_manual_mapper(current_processor, PageMapper::new(RmmA::table(), FRAME_ALLOCATOR)) } + } + pub fn lock() -> Self { + Self::lock_manually(crate::cpu_id()) + } + pub fn get_mut(&mut self) -> Option<&mut crate::paging::PageMapper> { + if self.ro { + None + } else { + Some(&mut self.mapper) + } + } } +impl core::ops::Deref for KernelMapper { + type Target = crate::paging::PageMapper; -pub unsafe fn mapper_current() -> PageMapper<'static, RmmA, LockedAllocator> { - PageMapper::current(&mut FRAME_ALLOCATOR) + fn deref(&self) -> &Self::Target { + &self.mapper + } +} +impl Drop for KernelMapper { + fn drop(&mut self) { + if LOCK_COUNT.fetch_sub(1, Ordering::Relaxed) == 1 { + LOCK_OWNER.store(NO_PROCESSOR, Ordering::Release); + } + atomic::compiler_fence(Ordering::Release); + } } pub unsafe fn init( @@ -388,5 +450,5 @@ pub unsafe fn init( acpi_base, acpi_size_aligned, initfs_base, initfs_size_aligned, ); - *FRAME_ALLOCATOR.inner.lock() = Some(allocator); + *INNER_ALLOCATOR.lock() = Some(allocator); } diff --git a/src/arch/x86_64/start.rs b/src/arch/x86_64/start.rs index f7e5225e4e541a7edbb463f567f57708bb1d650a..b8acccd2f0a4f1384e370166fbfb48edb981ba83 100644 --- a/src/arch/x86_64/start.rs +++ b/src/arch/x86_64/start.rs @@ -18,7 +18,7 @@ use crate::gdt; use crate::idt; use crate::interrupt; use crate::log::{self, info}; -use crate::paging; +use crate::paging::{self, KernelMapper}; /// Test of zero values in BSS. static BSS_TEST_ZERO: usize = 0; @@ -39,12 +39,12 @@ static BSP_READY: AtomicBool = AtomicBool::new(false); #[repr(packed)] pub struct KernelArgs { - kernel_base: u64, - kernel_size: u64, - stack_base: u64, - stack_size: u64, - env_base: u64, - env_size: u64, + kernel_base: usize, + kernel_size: usize, + stack_base: usize, + stack_size: usize, + env_base: usize, + env_size: usize, /// The base 64-bit pointer to an array of saved RSDPs. It's up to the kernel (and possibly /// userspace), to decide which RSDP to use. The buffer will be a linked list containing a @@ -53,36 +53,26 @@ pub struct KernelArgs { /// This field can be NULL, and if so, the system has not booted with UEFI or in some other way /// retrieved the RSDPs. The kernel or a userspace driver will thus try searching the BIOS /// memory instead. On UEFI systems, BIOS-like searching is not guaranteed to actually work though. - acpi_rsdps_base: u64, + acpi_rsdps_base: usize, /// The size of the RSDPs region. - acpi_rsdps_size: u64, + acpi_rsdps_size: usize, - areas_base: u64, - areas_size: u64, + areas_base: usize, + areas_size: usize, - /// The physical base 64-bit pointer to the contiguous initfs. - initfs_base: u64, - initfs_size: u64, + /// The physical base 64-bit pointer to the contiguous bootstrap/initfs. + bootstrap_base: usize, + /// Size of contiguous bootstrap/initfs physical region, not necessarily page aligned. + bootstrap_size: usize, + /// Entry point the kernel will jump to. + bootstrap_entry: usize, } /// The entry to Rust, all things must be initialized #[no_mangle] pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { - let env = { - let args = &*args_ptr; - - let kernel_base = args.kernel_base as usize; - let kernel_size = args.kernel_size as usize; - let stack_base = args.stack_base as usize; - let stack_size = args.stack_size as usize; - let env_base = args.env_base as usize; - let env_size = args.env_size as usize; - let acpi_rsdps_base = args.acpi_rsdps_base; - let acpi_rsdps_size = args.acpi_rsdps_size; - let areas_base = args.areas_base as usize; - let areas_size = args.areas_size as usize; - let initfs_base = args.initfs_base as usize; - let initfs_size = args.initfs_size as usize; + let bootstrap = { + let args = args_ptr.read(); // BSS should already be zero { @@ -90,12 +80,11 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { assert_eq!(DATA_TEST_NONZERO, 0xFFFF_FFFF_FFFF_FFFF); } - KERNEL_BASE.store(kernel_base, Ordering::SeqCst); - KERNEL_SIZE.store(kernel_size, Ordering::SeqCst); + KERNEL_BASE.store(args.kernel_base, Ordering::SeqCst); + KERNEL_SIZE.store(args.kernel_size, Ordering::SeqCst); // Convert env to slice - let env = slice::from_raw_parts((env_base + crate::PHYS_OFFSET) as *const u8, env_size); - let initfs = slice::from_raw_parts((initfs_base + crate::PHYS_OFFSET) as *const u8, initfs_size); + let env = slice::from_raw_parts((args.env_base + crate::PHYS_OFFSET) as *const u8, args.env_size); // Set up graphical debug #[cfg(feature = "graphical_debug")] @@ -117,12 +106,13 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { }); info!("Redox OS starting..."); - info!("Kernel: {:X}:{:X}", kernel_base, kernel_base + kernel_size); - info!("Stack: {:X}:{:X}", stack_base, stack_base + stack_size); - info!("Env: {:X}:{:X}", env_base, env_base + env_size); - info!("RSDPs: {:X}:{:X}", acpi_rsdps_base, acpi_rsdps_base + acpi_rsdps_size); - info!("Areas: {:X}:{:X}", areas_base, areas_base + areas_size); - info!("Initfs: {:X}:{:X}", initfs_base, initfs_base + initfs_size); + info!("Kernel: {:X}:{:X}", args.kernel_base, args.kernel_base + args.kernel_size); + info!("Stack: {:X}:{:X}", args.stack_base, args.stack_base + args.stack_size); + info!("Env: {:X}:{:X}", args.env_base, args.env_base + args.env_size); + info!("RSDPs: {:X}:{:X}", args.acpi_rsdps_base, args.acpi_rsdps_base + args.acpi_rsdps_size); + info!("Areas: {:X}:{:X}", args.areas_base, args.areas_base + args.areas_size); + info!("Bootstrap: {:X}:{:X}", args.bootstrap_base, args.bootstrap_base + args.bootstrap_size); + info!("Bootstrap entry point: {:X}", args.bootstrap_entry); // Set up GDT before paging gdt::init(); @@ -132,19 +122,19 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { // Initialize RMM crate::arch::rmm::init( - kernel_base, kernel_size, - stack_base, stack_size, - env_base, env_size, - acpi_rsdps_base as usize, acpi_rsdps_size as usize, - areas_base, areas_size, - initfs_base, initfs_size, + args.kernel_base, args.kernel_size, + args.stack_base, args.stack_size, + args.env_base, args.env_size, + args.acpi_rsdps_base, args.acpi_rsdps_size, + args.areas_base, args.areas_size, + args.bootstrap_base, args.bootstrap_size, ); // Initialize paging - let (mut active_table, tcb_offset) = paging::init(0); + let tcb_offset = paging::init(0); // Set up GDT after paging with TLS - gdt::init_paging(0, tcb_offset, stack_base + stack_size); + gdt::init_paging(0, tcb_offset, args.stack_base + args.stack_size); // Set up IDT idt::init_paging_bsp(); @@ -168,7 +158,7 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { BSP_READY.store(false, Ordering::SeqCst); // Setup kernel heap - allocator::init(&mut active_table); + allocator::init(); // Set up double buffer for grpahical debug now that heap is available #[cfg(feature = "graphical_debug")] @@ -180,34 +170,37 @@ pub unsafe extern fn kstart(args_ptr: *const KernelArgs) -> ! { log::init(); // Initialize devices - device::init(&mut active_table); + device::init(); // Read ACPI tables, starts APs #[cfg(feature = "acpi")] { - acpi::init(&mut active_table, if acpi_rsdps_base != 0 && acpi_rsdps_size > 0 { - Some((acpi_rsdps_base + crate::PHYS_OFFSET as u64, acpi_rsdps_size)) + acpi::init(if args.acpi_rsdps_base != 0 && args.acpi_rsdps_size > 0 { + Some(((args.acpi_rsdps_base + crate::PHYS_OFFSET) as u64, args.acpi_rsdps_size as u64)) } else { None }); - device::init_after_acpi(&mut active_table); + device::init_after_acpi(); } // Initialize all of the non-core devices not otherwise needed to complete initialization device::init_noncore(); - crate::scheme::initfs::init(initfs); - // Stop graphical debug #[cfg(feature = "graphical_debug")] graphical_debug::fini(); BSP_READY.store(true, Ordering::SeqCst); - env + crate::Bootstrap { + base: crate::memory::Frame::containing_address(crate::paging::PhysicalAddress::new(args.bootstrap_base)), + page_count: args.bootstrap_size / crate::memory::PAGE_SIZE, + entry: args.bootstrap_entry, + env, + } }; - crate::kmain(CPU_COUNT.load(Ordering::SeqCst), env); + crate::kmain(CPU_COUNT.load(Ordering::SeqCst), bootstrap); } #[repr(packed)] @@ -237,7 +230,13 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! { idt::init(); // Initialize paging - let tcb_offset = paging::init_ap(cpu_id, bsp_table); + let tcb_offset = { + use crate::paging::{PageMapper, PhysicalAddress}; + use crate::rmm::FRAME_ALLOCATOR; + + let mut mapper = KernelMapper::lock_for_manual_mapper(cpu_id, PageMapper::new(PhysicalAddress::new(bsp_table), FRAME_ALLOCATOR)); + paging::init_ap(cpu_id, &mut mapper) + }; // Set up GDT with TLS gdt::init_paging(cpu_id as u32, tcb_offset, stack_end); diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index c36772962b1d8a13074f86e237f34f3191d92181..97d14a94e26da910af22ed3614d231529ee06840 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -1,9 +1,13 @@ use core::mem; use core::sync::atomic::AtomicBool; +use alloc::sync::Arc; + +use crate::paging::{RmmA, RmmArch}; use crate::syscall::FloatRegisters; use memoffset::offset_of; +use spin::Once; /// This must be used by the kernel to ensure that context switches are done atomically /// Compare and exchange this to true when beginning a context switch on any CPU @@ -13,13 +17,12 @@ pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; +pub const KFX_SIZE: usize = 512; +pub const KFX_ALIGN: usize = 16; + #[derive(Clone, Debug)] #[repr(C)] pub struct Context { - /// FX location - fx: usize, - /// Page table pointer - cr3: usize, /// RFLAGS register rflags: usize, /// RBX register @@ -35,7 +38,7 @@ pub struct Context { /// Base pointer rbp: usize, /// Stack pointer - rsp: usize, + pub(crate) rsp: usize, /// FSBASE. /// /// NOTE: Same fsgsbase behavior as with gsbase. @@ -46,23 +49,11 @@ pub struct Context { /// running. With fsgsbase, this is neither saved nor restored upon every syscall (there is no /// need to!), and thus it must be re-read from the register before copying this struct. pub(crate) gsbase: usize, - /// FX valid? - loadable: AbiCompatBool, -} - -#[repr(u8)] -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum AbiCompatBool { - False, - True, } impl Context { pub fn new() -> Context { Context { - loadable: AbiCompatBool::False, - fx: 0, - cr3: 0, rflags: 0, rbx: 0, r12: 0, @@ -76,15 +67,30 @@ impl Context { } } - pub fn get_page_utable(&self) -> usize { - self.cr3 + pub fn set_stack(&mut self, address: usize) { + self.rsp = address; } - pub fn get_fx_regs(&self) -> Option { - if self.loadable == AbiCompatBool::False { - return None; - } - let mut regs = unsafe { *(self.fx as *const FloatRegisters) }; + pub unsafe fn signal_stack(&mut self, handler: extern fn(usize), sig: u8) { + self.push_stack(sig as usize); + self.push_stack(handler as usize); + self.push_stack(signal_handler_wrapper as usize); + } + + pub unsafe fn push_stack(&mut self, value: usize) { + self.rsp -= mem::size_of::(); + *(self.rsp as *mut usize) = value; + } + + pub unsafe fn pop_stack(&mut self) -> usize { + let value = *(self.rsp as *const usize); + self.rsp += mem::size_of::(); + value + } +} +impl super::Context { + pub fn get_fx_regs(&self) -> FloatRegisters { + let mut regs = unsafe { self.kfx.as_ptr().cast::().read() }; regs._reserved = 0; let mut new_st = regs.st_space; for st in &mut new_st { @@ -92,16 +98,12 @@ impl Context { *st &= !ST_RESERVED; } regs.st_space = new_st; - Some(regs) + regs } - pub fn set_fx_regs(&mut self, mut new: FloatRegisters) -> bool { - if self.loadable == AbiCompatBool::False { - return false; - } - + pub fn set_fx_regs(&mut self, mut new: FloatRegisters) { { - let old = unsafe { &*(self.fx as *const FloatRegisters) }; + let old = unsafe { &*(self.kfx.as_ptr().cast::()) }; new._reserved = old._reserved; let old_st = new.st_space; let mut new_st = new.st_space; @@ -115,95 +117,73 @@ impl Context { } unsafe { - *(self.fx as *mut FloatRegisters) = new; + self.kfx.as_mut_ptr().cast::().write(new); } - true - } - - pub fn set_fx(&mut self, address: usize) { - self.fx = address; } +} - pub fn set_page_utable(&mut self, address: usize) { - self.cr3 = address; - } +pub static EMPTY_CR3: Once = Once::new(); - pub fn set_stack(&mut self, address: usize) { - self.rsp = address; - } +// SAFETY: EMPTY_CR3 must be initialized. +pub unsafe fn empty_cr3() -> rmm::PhysicalAddress { + debug_assert!(EMPTY_CR3.poll().is_some()); + *EMPTY_CR3.get_unchecked() +} - pub unsafe fn signal_stack(&mut self, handler: extern fn(usize), sig: u8) { - self.push_stack(sig as usize); - self.push_stack(handler as usize); - self.push_stack(signal_handler_wrapper as usize); - } +/// Switch to the next context by restoring its stack and registers +pub unsafe fn switch_to(prev: &mut super::Context, next: &mut super::Context) { + core::arch::asm!(" + fxsave64 [{prev_fx}] + fxrstor64 [{next_fx}] + ", prev_fx = in(reg) prev.kfx.as_mut_ptr(), + next_fx = in(reg) next.kfx.as_ptr(), + ); - pub unsafe fn push_stack(&mut self, value: usize) { - self.rsp -= mem::size_of::(); - *(self.rsp as *mut usize) = value; + { + use x86::{bits64::segmentation::*, msr}; + + // This is so much shorter in Rust! + + if cfg!(feature = "x86_fsgsbase") { + prev.arch.fsbase = rdfsbase() as usize; + wrfsbase(next.arch.fsbase as u64); + swapgs(); + prev.arch.gsbase = rdgsbase() as usize; + wrgsbase(next.arch.gsbase as u64); + swapgs(); + } else { + prev.arch.fsbase = msr::rdmsr(msr::IA32_FS_BASE) as usize; + msr::wrmsr(msr::IA32_FS_BASE, next.arch.fsbase as u64); + prev.arch.gsbase = msr::rdmsr(msr::IA32_KERNEL_GSBASE) as usize; + msr::wrmsr(msr::IA32_KERNEL_GSBASE, next.arch.gsbase as u64); + } } - pub unsafe fn pop_stack(&mut self) -> usize { - let value = *(self.rsp as *const usize); - self.rsp += mem::size_of::(); - value + match next.addr_space { + // Since Arc is essentially just wraps a pointer, in this case a regular pointer (as + // opposed to dyn or slice fat pointers), and NonNull optimization exists, map_or will + // hopefully be optimized down to checking prev and next pointers, as next cannot be null. + Some(ref next_space) => if prev.addr_space.as_ref().map_or(true, |prev_space| !Arc::ptr_eq(&prev_space, &next_space)) { + // Suppose we have two sibling threads A and B. A runs on CPU 0 and B on CPU 1. A + // recently called yield and is now here about to switch back. Meanwhile, B is + // currently creating a new mapping in their shared address space, for example a + // message on a channel. + // + // Unless we acquire this lock, it may be possible that the TLB will not contain new + // entries. While this can be caught and corrected in a page fault handler, this is not + // true when entries are removed from a page table! + next_space.read().table.utable.make_current(); + } + None => { + RmmA::set_table(empty_cr3()); + } } + switch_to_inner(&mut prev.arch, &mut next.arch) } -macro_rules! load_msr( - ($name:literal, $offset:literal) => { - concat!(" - mov ecx, {", $name, "} - mov rdx, [rsi + {", $offset, "}] - mov eax, edx - shr rdx, 32 - - // MSR <= EDX:EAX - wrmsr - ") - } -); - -// NOTE: RAX is a scratch register and can be set to whatever. There is also no return -// value in switch_to, to it will also never be read. The same goes for RDX, and RCX. -// TODO: Use runtime code patching (perhaps in the bootloader) by pushing alternative code -// sequences into a specialized section, with some macro resembling Linux's `.ALTERNATIVE`. -#[cfg(feature = "x86_fsgsbase")] -macro_rules! switch_fsgsbase( - () => { - " - // placeholder: {MSR_FSBASE} {MSR_KERNELGSBASE} - - rdfsbase rax - mov [rdi + {off_fsbase}], rax - mov rax, [rsi + {off_fsbase}] - wrfsbase rax - - swapgs - rdgsbase rax - mov [rdi + {off_gsbase}], rax - mov rax, [rsi + {off_gsbase}] - wrgsbase rax - swapgs - " - } -); - -#[cfg(not(feature = "x86_fsgsbase"))] -macro_rules! switch_fsgsbase( - () => { - concat!( - load_msr!("MSR_FSBASE", "off_fsbase"), - load_msr!("MSR_KERNELGSBASE", "off_gsbase"), - ) - } -); - - -/// Switch to the next context by restoring its stack and registers -/// Check disassembly! +// Check disassembly! #[naked] -pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { +unsafe extern "sysv64" fn switch_to_inner(_prev: &mut Context, _next: &mut Context) { use Context as Cx; core::arch::asm!( @@ -214,36 +194,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { // - we cannot change callee-preserved registers arbitrarily, e.g. rbx, which is why we // store them here in the first place. concat!(" - // load `prev.fx` - mov rax, [rdi + {off_fx}] - - // save processor SSE/FPU/AVX state in `prev.fx` pointee - fxsave64 [rax] - - // set `prev.loadable` to true - mov BYTE PTR [rdi + {off_loadable}], {true} - // compare `next.loadable` with true - cmp BYTE PTR [rsi + {off_loadable}], {true} - je 3f - - fninit - jmp 3f - -2: - mov rax, [rsi + {off_fx}] - fxrstor64 [rax] - -3: - // Save the current CR3, and load the next CR3 if not identical - mov rcx, cr3 - mov [rdi + {off_cr3}], rcx - mov rax, [rsi + {off_cr3}] - cmp rax, rcx - - je 4f - mov cr3, rax - -4: // Save old registers, and load new ones mov [rdi + {off_rbx}], rbx mov rbx, [rsi + {off_rbx}] @@ -266,10 +216,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { mov [rdi + {off_rsp}], rsp mov rsp, [rsi + {off_rsp}] - ", - switch_fsgsbase!(), - " - // push RFLAGS (can only be modified via stack) pushfq // pop RFLAGS into `self.rflags` @@ -289,10 +235,7 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { "), - off_fx = const(offset_of!(Cx, fx)), - off_cr3 = const(offset_of!(Cx, cr3)), off_rflags = const(offset_of!(Cx, rflags)), - off_loadable = const(offset_of!(Cx, loadable)), off_rbx = const(offset_of!(Cx, rbx)), off_r12 = const(offset_of!(Cx, r12)), @@ -302,13 +245,6 @@ pub unsafe extern "C" fn switch_to(_prev: &mut Context, _next: &mut Context) { off_rbp = const(offset_of!(Cx, rbp)), off_rsp = const(offset_of!(Cx, rsp)), - off_fsbase = const(offset_of!(Cx, fsbase)), - off_gsbase = const(offset_of!(Cx, gsbase)), - - MSR_FSBASE = const(x86::msr::IA32_FS_BASE), - MSR_KERNELGSBASE = const(x86::msr::IA32_KERNEL_GSBASE), - - true = const(AbiCompatBool::True as u8), switch_hook = sym crate::context::switch_finish_hook, options(noreturn), ); diff --git a/src/context/context.rs b/src/context/context.rs index 99a8e7f94003c974c5685ccba1148486013f7cb6..b9bc23320579206929f554457dd18cbd07311ecd 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -16,13 +16,14 @@ use crate::arch::{interrupt::InterruptStack, paging::PAGE_SIZE}; use crate::common::unique::Unique; use crate::context::arch; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::{UserGrants, Memory, SharedMemory}; +use crate::context::memory::AddrSpace; use crate::ipi::{ipi, IpiKind, IpiTarget}; +use crate::memory::Enomem; use crate::scheme::{SchemeNamespace, FileHandle}; use crate::sync::WaitMap; use crate::syscall::data::SigAction; -use crate::syscall::error::{Result, Error, ENOMEM}; +use crate::syscall::error::{Result, Error, ESRCH}; use crate::syscall::flag::{SIG_DFL, SigActionFlags}; /// Unique identifier for a context (i.e. `pid`). @@ -219,21 +220,18 @@ pub struct Context { /// The architecture specific context pub arch: arch::Context, /// Kernel FX - used to store SIMD and FPU registers on context switch - pub kfx: Option>, + pub kfx: AlignedBox<[u8; {arch::KFX_SIZE}], {arch::KFX_ALIGN}>, /// Kernel stack pub kstack: Option>, /// Kernel signal backup: Registers, Kernel FX, Kernel Stack, Signal number - pub ksig: Option<(arch::Context, Option>, Option>, u8)>, + pub ksig: Option<(arch::Context, AlignedBox<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>, Option>, u8)>, /// Restore ksig context on next switch pub ksig_restore: bool, - /// Executable image - pub image: Vec, - /// User stack - pub stack: Option, - /// User signal stack - pub sigstack: Option, - /// User grants - pub grants: Arc>, + /// Address space containing a page table lock, and grants. Normally this will have a value, + /// but can be None while the context is being reaped or when a new context is created but has + /// not yet had its address space changed. Note that these are only for user mappings; kernel + /// mappings are universal and independent on address spaces or contexts. + pub addr_space: Option>>, /// The name of the context pub name: Arc>>, /// The current working directory @@ -250,7 +248,16 @@ pub struct Context { /// A somewhat hacky way to initially stop a context when creating /// a new instance of the proc: scheme, entirely separate from /// signals or any other way to restart a process. - pub ptrace_stop: bool + pub ptrace_stop: bool, + /// A pointer to the signal stack. If this is unset, none of the sigactions can be anything + /// else than SIG_DFL, otherwise signals will not be delivered. Userspace is responsible for + /// setting this. + pub sigstack: Option, + /// An even hackier way to pass the return entry point and stack pointer to new contexts while + /// implementing clone. Before a context has returned to userspace, its IntRegisters cannot be + /// set since there is no interrupt stack (unless the kernel stack is copied, but that is in my + /// opinion hackier and less efficient than this (and UB to do in Rust)). + pub clone_entry: Option<[usize; 2]>, } // Necessary because GlobalAlloc::dealloc requires the layout to be the same, and therefore Box @@ -274,14 +281,14 @@ impl AlignedBox { } }; #[inline(always)] - pub fn try_zeroed() -> Result + pub fn try_zeroed() -> Result where T: ValidForZero, { Ok(unsafe { let ptr = crate::ALLOCATOR.alloc_zeroed(Self::LAYOUT); if ptr.is_null() { - return Err(Error::new(ENOMEM))?; + return Err(Enomem)?; } Self { inner: Unique::new_unchecked(ptr.cast()), @@ -303,13 +310,32 @@ impl Drop for AlignedBox { } } } +impl core::ops::Deref for AlignedBox { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.inner.as_ptr() } + } +} +impl core::ops::DerefMut for AlignedBox { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.inner.as_ptr() } + } +} +impl Clone for AlignedBox { + fn clone(&self) -> Self { + let mut new = Self::try_zeroed().unwrap_or_else(|_| alloc::alloc::handle_alloc_error(Self::LAYOUT)); + T::clone_from(&mut new, self); + new + } +} impl Context { pub fn new(id: ContextId) -> Result { let syscall_head = AlignedBox::try_zeroed()?; let syscall_tail = AlignedBox::try_zeroed()?; - Ok(Context { + let mut this = Context { id, pgid: id, ppid: ContextId::from(0), @@ -334,28 +360,21 @@ impl Context { pending: VecDeque::new(), wake: None, arch: arch::Context::new(), - kfx: None, + kfx: AlignedBox::<[u8; arch::KFX_SIZE], {arch::KFX_ALIGN}>::try_zeroed()?, kstack: None, ksig: None, ksig_restore: false, - image: Vec::new(), - stack: None, - sigstack: None, - grants: Arc::new(RwLock::new(UserGrants::default())), + addr_space: None, name: Arc::new(RwLock::new(String::new().into_boxed_str())), cwd: Arc::new(RwLock::new(String::new())), files: Arc::new(RwLock::new(Vec::new())), - actions: Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])), + actions: Self::empty_actions(), regs: None, - ptrace_stop: false - }) + ptrace_stop: false, + sigstack: None, + clone_entry: None, + }; + Ok(this) } /// Make a relative path absolute @@ -524,4 +543,26 @@ impl Context { None } } + + pub fn addr_space(&self) -> Result<&Arc>> { + self.addr_space.as_ref().ok_or(Error::new(ESRCH)) + } + #[must_use = "grants must be manually unmapped, otherwise it WILL panic!"] + pub fn set_addr_space(&mut self, addr_space: Arc>) -> Option>> { + if self.id == super::context_id() { + unsafe { addr_space.read().table.utable.make_current(); } + } + + self.addr_space.replace(addr_space) + } + pub fn empty_actions() -> Arc>> { + Arc::new(RwLock::new(vec![( + SigAction { + sa_handler: unsafe { mem::transmute(SIG_DFL) }, + sa_mask: [0; 2], + sa_flags: SigActionFlags::empty(), + }, + 0 + ); 128])) + } } diff --git a/src/context/list.rs b/src/context/list.rs index e37b4b955c031b46f3e67756b3c6f90e2f582230..e900ebcfcddf7191433d4fc4fd2a58b8ee53aada 100644 --- a/src/context/list.rs +++ b/src/context/list.rs @@ -1,10 +1,8 @@ use alloc::sync::Arc; -use alloc::boxed::Box; use alloc::collections::BTreeMap; -use core::alloc::{GlobalAlloc, Layout}; use core::{iter, mem}; use core::sync::atomic::Ordering; -use crate::paging::{ActivePageTable, TableKind}; + use spin::RwLock; use crate::syscall::error::{Result, Error, EAGAIN}; @@ -79,10 +77,8 @@ impl ContextList { let context_lock = self.new_context()?; { let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]) }; - for b in fx.iter_mut() { - *b = 0; - } + let _ = context.set_addr_space(super::memory::new_addrspace()?); + let mut stack = vec![0; 65_536].into_boxed_slice(); let offset = stack.len() - mem::size_of::(); @@ -100,12 +96,7 @@ impl ContextList { context.arch.set_context_handle(); } - context.arch.set_page_utable(unsafe { ActivePageTable::new(TableKind::User).address() }); - #[cfg(target_arch = "aarch64")] - context.arch.set_page_ktable(unsafe { ActivePageTable::new(TableKind::Kernel).address() }); - context.arch.set_fx(fx.as_ptr() as usize); context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kfx = Some(fx); context.kstack = Some(stack); } Ok(context_lock) diff --git a/src/context/memory.rs b/src/context/memory.rs index 209d9080ad861a01d90bb716e7d5d203efc68527..61f695e9188a396b31f167a3fda2d5eb2dadac3e 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -1,31 +1,23 @@ use alloc::collections::{BTreeMap, BTreeSet}; -use alloc::sync::{Arc, Weak}; +use alloc::{sync::Arc, vec::Vec}; use core::borrow::Borrow; use core::cmp::{self, Eq, Ordering, PartialEq, PartialOrd}; use core::fmt::{self, Debug}; -use core::intrinsics; -use core::ops::{Deref, DerefMut}; -use spin::Mutex; +use core::ops::Deref; +use spin::{RwLock, RwLockWriteGuard}; use syscall::{ flag::MapFlags, error::*, }; +use rmm::Arch as _; use crate::arch::paging::PAGE_SIZE; use crate::context::file::FileDescriptor; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::Frame; -use crate::paging::mapper::PageFlushAll; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, PageIter, PhysicalAddress, RmmA, VirtualAddress}; - -/// Round down to the nearest multiple of page size -pub fn round_down_pages(number: usize) -> usize { - number - number % PAGE_SIZE -} -/// Round up to the nearest multiple of page size -pub fn round_up_pages(number: usize) -> usize { - round_down_pages(number + PAGE_SIZE - 1) -} +use crate::memory::{Enomem, Frame}; +use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; +use crate::paging::{KernelMapper, Page, PageFlags, PageIter, PageMapper, PhysicalAddress, RmmA, round_up_pages, VirtualAddress}; + +pub const MMAP_MIN_DEFAULT: usize = PAGE_SIZE; pub fn page_flags(flags: MapFlags) -> PageFlags { PageFlags::new() @@ -34,6 +26,14 @@ pub fn page_flags(flags: MapFlags) -> PageFlags { .write(flags.contains(MapFlags::PROT_WRITE)) //TODO: PROT_READ } +pub fn map_flags(page_flags: PageFlags) -> MapFlags { + let mut flags = MapFlags::PROT_READ; + if page_flags.has_write() { flags |= MapFlags::PROT_WRITE; } + if page_flags.has_execute() { flags |= MapFlags::PROT_EXEC; } + // TODO: MAP_SHARED/MAP_PRIVATE (requires that grants keep track of what they borrow and if + // they borrow shared or CoW). + flags +} pub struct UnmapResult { pub file_desc: Option, @@ -46,14 +46,214 @@ impl Drop for UnmapResult { } } -#[derive(Debug, Default)] +pub fn new_addrspace() -> Result>> { + Arc::try_new(RwLock::new(AddrSpace::new()?)).map_err(|_| Error::new(ENOMEM)) +} + +#[derive(Debug)] +pub struct AddrSpace { + pub table: Table, + pub grants: UserGrants, + /// Lowest offset for mmap invocations where the user has not already specified the offset + /// (using MAP_FIXED/MAP_FIXED_NOREPLACE). Cf. Linux's `/proc/sys/vm/mmap_min_addr`, but with + /// the exception that we have a memory safe kernel which doesn't have to protect itself + /// against null pointers, so fixed mmaps to address zero are still allowed. + pub mmap_min: usize, +} +impl AddrSpace { + pub fn current() -> Result>> { + Ok(Arc::clone(super::current()?.read().addr_space()?)) + } + + /// Attempt to clone an existing address space so that all mappings are copied (CoW). + pub fn try_clone(&mut self) -> Result>> { + let mut new = new_addrspace()?; + + let new_guard = Arc::get_mut(&mut new) + .expect("expected new address space Arc not to be aliased") + .get_mut(); + + let this_mapper = &mut self.table.utable; + let new_mapper = &mut new_guard.table.utable; + + for grant in self.grants.iter() { + if grant.desc_opt.is_some() { continue; } + + let new_grant; + + // TODO: Replace this with CoW + if grant.owned { + new_grant = Grant::zeroed(Page::containing_address(grant.start_address()), grant.size() / PAGE_SIZE, grant.flags(), new_mapper, ())?; + + for page in new_grant.pages().map(Page::start_address) { + let current_frame = unsafe { RmmA::phys_to_virt(this_mapper.translate(page).expect("grant containing unmapped pages").0) }.data() as *const u8; + let new_frame = unsafe { RmmA::phys_to_virt(new_mapper.translate(page).expect("grant containing unmapped pages").0) }.data() as *mut u8; + + unsafe { + new_frame.copy_from_nonoverlapping(current_frame, PAGE_SIZE); + } + } + } else { + // TODO: Remove reborrow? In that case, physmapped memory will need to either be + // remapped when cloning, or be backed by a file descriptor (like + // `memory:physical`). + new_grant = Grant::reborrow(&grant, Page::containing_address(grant.start_address()), this_mapper, new_mapper, ())?; + } + + new_guard.grants.insert(new_grant); + } + Ok(new) + } + pub fn new() -> Result { + Ok(Self { + grants: UserGrants::new(), + table: setup_new_utable()?, + mmap_min: MMAP_MIN_DEFAULT, + }) + } + pub fn is_current(&self) -> bool { + self.table.utable.is_current() + } + pub fn mprotect(&mut self, base: Page, page_count: usize, flags: MapFlags) -> Result<()> { + let (mut active, mut inactive); + let mut flusher = if self.is_current() { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher + } else { + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher + }; + let mut mapper = &mut self.table.utable; + + let region = Region::new(base.start_address(), page_count * PAGE_SIZE); + + // TODO: Remove allocation + let regions = self.grants.conflicts(region).map(|g| *g.region()).collect::>(); + + for grant_region in regions { + let grant = self.grants.take(&grant_region).expect("grant cannot magically disappear while we hold the lock!"); + let intersection = grant_region.intersect(region); + + let (before, mut grant, after) = grant.extract(intersection).expect("failed to extract grant"); + + if let Some(before) = before { self.grants.insert(before); } + if let Some(after) = after { self.grants.insert(after); } + + if !grant.can_have_flags(flags) { + self.grants.insert(grant); + return Err(Error::new(EACCES)); + } + + let new_flags = grant.flags() + // TODO: Require a capability in order to map executable memory? + .execute(flags.contains(MapFlags::PROT_EXEC)) + .write(flags.contains(MapFlags::PROT_WRITE)); + + // TODO: Allow enabling/disabling read access on architectures which allow it. On + // x86_64 with protection keys (although only enforced by userspace), and AArch64 (I + // think), execute-only memory is also supported. + + grant.remap(mapper, &mut flusher, new_flags); + self.grants.insert(grant); + } + Ok(()) + } + pub fn munmap(mut self: RwLockWriteGuard<'_, Self>, page: Page, page_count: usize) { + let mut notify_files = Vec::new(); + + let requested = Region::new(page.start_address(), page_count * PAGE_SIZE); + let mut flusher = PageFlushAll::new(); + + let conflicting: Vec = self.grants.conflicts(requested).map(Region::from).collect(); + + for conflict in conflicting { + let grant = self.grants.take(&conflict).expect("conflicting region didn't exist"); + let intersection = grant.intersect(requested); + let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); + + // Notify scheme that holds grant + if let Some(file_desc) = grant.desc_opt.take() { + notify_files.push((file_desc, intersection)); + } + + // Keep untouched regions + if let Some(before) = before { + self.grants.insert(before); + } + if let Some(after) = after { + self.grants.insert(after); + } + + // Remove irrelevant region + grant.unmap(&mut self.table.utable, &mut flusher); + } + drop(self); + + for (file_ref, intersection) in notify_files { + let scheme_id = { file_ref.desc.description.read().scheme }; + + let scheme = match crate::scheme::schemes().get(scheme_id) { + Some(scheme) => Arc::clone(scheme), + // One could argue that EBADFD could be returned here, but we have already unmapped + // the memory. + None => continue, + }; + // Same here, we don't really care about errors when schemes respond to unmap events. + // The caller wants the memory to be unmapped, period. When already unmapped, what + // would we do with error codes anyway? + let _ = scheme.funmap(intersection.start_address().data(), intersection.size()); + + let _ = file_ref.desc.close(); + } + } + pub fn mmap(&mut self, page: Option, page_count: usize, flags: MapFlags, map: impl FnOnce(Page, PageFlags, &mut PageMapper, &mut dyn Flusher) -> Result) -> Result { + // Finally, the end of all "T0DO: Abstract with other grant creation"! + + let region = match page { + Some(page) => self.grants.find_free_at(self.mmap_min, page.start_address(), page_count * PAGE_SIZE, flags)?, + None => self.grants.find_free(self.mmap_min, page_count * PAGE_SIZE).ok_or(Error::new(ENOMEM))?, + }; + let page = Page::containing_address(region.start_address()); + + let (mut active, mut inactive); + let flusher = if self.is_current() { + active = PageFlushAll::new(); + &mut active as &mut dyn Flusher + } else { + inactive = InactiveFlusher::new(); + &mut inactive as &mut dyn Flusher + }; + + self.grants.insert(map(page, page_flags(flags), &mut self.table.utable, flusher)?); + Ok(page) + } +} + +#[derive(Debug)] pub struct UserGrants { - pub inner: BTreeSet, + inner: BTreeSet, + holes: BTreeMap, + // TODO: Would an additional map ordered by (size,start) to allow for O(log n) allocations be + // beneficial? + //TODO: technically VirtualAddress is from a scheme's context! pub funmap: BTreeMap, } +impl Default for UserGrants { + fn default() -> Self { + Self::new() + } +} + impl UserGrants { + pub fn new() -> Self { + Self { + inner: BTreeSet::new(), + holes: core::iter::once((VirtualAddress::new(0), crate::USER_END_OFFSET)).collect::>(), + funmap: BTreeMap::new(), + } + } /// Returns the grant, if any, which occupies the specified address pub fn contains(&self, address: VirtualAddress) -> Option<&Grant> { let byte = Region::byte(address); @@ -73,59 +273,154 @@ impl UserGrants { .take_while(move |region| !region.intersect(requested).is_empty()) } /// Return a free region with the specified size - pub fn find_free(&self, size: usize) -> Region { - // Get last used region - let last = self.inner.iter().next_back().map(Region::from).unwrap_or(Region::new(VirtualAddress::new(0), 0)); - // At the earliest, start at grant offset - let address = cmp::max(last.end_address().data(), crate::USER_GRANT_OFFSET); + // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). + pub fn find_free(&self, min: usize, size: usize) -> Option { + // Get first available hole, but do reserve the page starting from zero as most compiled + // languages cannot handle null pointers safely even if they point to valid memory. If an + // application absolutely needs to map the 0th page, they will have to do so explicitly via + // MAP_FIXED/MAP_FIXED_NOREPLACE. + // TODO: Allow explicitly allocating guard pages? + + let (hole_start, hole_size) = self.holes.iter() + .skip_while(|(hole_offset, hole_size)| hole_offset.data() + **hole_size <= min) + .find(|(hole_offset, hole_size)| { + let avail_size = if hole_offset.data() <= min && min <= hole_offset.data() + **hole_size { + **hole_size - (min - hole_offset.data()) + } else { + **hole_size + }; + size <= avail_size + })?; // Create new region - Region::new(VirtualAddress::new(address), size) + Some(Region::new(VirtualAddress::new(cmp::max(hole_start.data(), min)), size)) } /// Return a free region, respecting the user's hinted address and flags. Address may be null. - pub fn find_free_at(&mut self, address: VirtualAddress, size: usize, flags: MapFlags) -> Result { + pub fn find_free_at(&mut self, min: usize, address: VirtualAddress, size: usize, flags: MapFlags) -> Result { if address == VirtualAddress::new(0) { // Free hands! - return Ok(self.find_free(size)); + return self.find_free(min, size).ok_or(Error::new(ENOMEM)); } // The user wished to have this region... let mut requested = Region::new(address, size); if - requested.end_address().data() >= crate::PML4_SIZE * 256 // There are 256 PML4 entries reserved for userspace - && address.data() % PAGE_SIZE != 0 + requested.end_address().data() > crate::USER_END_OFFSET + || address.data() % PAGE_SIZE != 0 { // ... but it was invalid return Err(Error::new(EINVAL)); } - if let Some(grant) = self.contains(requested.start_address()) { + + if let Some(grant) = self.conflicts(requested).next() { // ... but it already exists if flags.contains(MapFlags::MAP_FIXED_NOREPLACE) { - println!("grant: conflicts with: {:#x} - {:#x}", grant.start_address().data(), grant.end_address().data()); return Err(Error::new(EEXIST)); - } else if flags.contains(MapFlags::MAP_FIXED) { - // TODO: Overwrite existing grant + } + if flags.contains(MapFlags::MAP_FIXED) { return Err(Error::new(EOPNOTSUPP)); } else { // TODO: Find grant close to requested address? - requested = self.find_free(requested.size()); + requested = self.find_free(min, requested.size()).ok_or(Error::new(ENOMEM))?; } } Ok(requested) } -} -impl Deref for UserGrants { - type Target = BTreeSet; - fn deref(&self) -> &Self::Target { - &self.inner + fn reserve(&mut self, grant: &Region) { + let previous_hole = self.holes.range_mut(..grant.start_address()).next_back(); + + if let Some((hole_offset, hole_size)) = previous_hole { + let prev_hole_end = hole_offset.data() + *hole_size; + + // Note that prev_hole_end cannot exactly equal grant.start_address, since that would + // imply there is another grant at that position already, as it would otherwise have + // been larger. + + if prev_hole_end > grant.start_address().data() { + // hole_offset must be below (but never equal to) the start address due to the + // `..grant.start_address()` limit; hence, all we have to do is to shrink the + // previous offset. + *hole_size = grant.start_address().data() - hole_offset.data(); + } + if prev_hole_end > grant.end_address().data() { + // The grant is splitting this hole in two, so insert the new one at the end. + self.holes.insert(grant.end_address(), prev_hole_end - grant.end_address().data()); + } + } + + // Next hole + if let Some(hole_size) = self.holes.remove(&grant.start_address()) { + let remainder = hole_size - grant.size(); + if remainder > 0 { + self.holes.insert(grant.end_address(), remainder); + } + } } -} -impl DerefMut for UserGrants { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner + fn unreserve(holes: &mut BTreeMap, grant: &Region) { + // The size of any possible hole directly after the to-be-freed region. + let exactly_after_size = holes.remove(&grant.end_address()); + + // There was a range that began exactly prior to the to-be-freed region, so simply + // increment the size such that it occupies the grant too. If in addition there was a grant + // directly after the grant, include it too in the size. + if let Some((hole_offset, hole_size)) = holes.range_mut(..grant.start_address()).next_back().filter(|(offset, size)| offset.data() + **size == grant.start_address().data()) { + *hole_size = grant.end_address().data() - hole_offset.data() + exactly_after_size.unwrap_or(0); + } else { + // There was no free region directly before the to-be-freed region, however will + // now unconditionally insert a new free region where the grant was, and add that extra + // size if there was something after it. + holes.insert(grant.start_address(), grant.size() + exactly_after_size.unwrap_or(0)); + } + } + pub fn insert(&mut self, mut grant: Grant) { + assert!(self.conflicts(*grant).next().is_none()); + self.reserve(&grant); + + // FIXME: This currently causes issues, mostly caused by old code that unmaps only based on + // offsets. For instance, the scheme code does not specify any length, and would thus unmap + // memory outside of what it intended to. + + /* + let before_region = self.inner + .range(..grant.region).next_back() + .filter(|b| b.end_address() == grant.start_address() && b.can_be_merged_if_adjacent(&grant)).map(|g| g.region); + + let after_region = self.inner + .range(Region::new(grant.end_address(), 1)..).next() + .filter(|a| a.start_address() == grant.end_address() && a.can_be_merged_if_adjacent(&grant)).map(|g| g.region); + + if let Some(before) = before_region { + grant.region.start = before.start; + grant.region.size += before.size; + + core::mem::forget(self.inner.take(&before)); + } + if let Some(after) = after_region { + grant.region.size += after.size; + + core::mem::forget(self.inner.take(&after)); + } + */ + + self.inner.insert(grant); + } + pub fn remove(&mut self, region: &Region) -> bool { + self.take(region).is_some() + } + pub fn take(&mut self, region: &Region) -> Option { + let grant = self.inner.take(region)?; + Self::unreserve(&mut self.holes, grant.region()); + Some(grant) + } + pub fn iter(&self) -> impl Iterator + '_ { + self.inner.iter() + } + pub fn is_empty(&self) -> bool { self.inner.is_empty() } + pub fn into_iter(self) -> impl Iterator { + self.inner.into_iter() } } @@ -222,7 +517,7 @@ impl Region { /// Return all pages containing a chunk of the region pub fn pages(&self) -> PageIter { - Page::range_inclusive( + Page::range_exclusive( Page::containing_address(self.start_address()), Page::containing_address(self.end_address()) ) @@ -327,228 +622,160 @@ impl Grant { &mut self.region } - pub fn physmap(from: PhysicalAddress, to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - to.data() + from.data())); - let result = active_table.map_to(page, frame, flags); - flush_all.consume(result); + pub fn physmap(phys: Frame, dst: Page, page_count: usize, flags: PageFlags, mapper: &mut PageMapper, mut flusher: impl Flusher) -> Result { + for index in 0..page_count { + let result = unsafe { + mapper + .map_phys(dst.next_by(index).start_address(), phys.next_by(index).start_address(), flags) + .expect("TODO: handle OOM from paging structures in physmap") + }; + flusher.consume(result); } - flush_all.flush(); - - Grant { + Ok(Grant { region: Region { - start: to, - size, + start: dst.start_address(), + size: page_count * PAGE_SIZE, }, flags, mapped: true, owned: false, desc_opt: None, - } + }) } - - pub fn map(to: VirtualAddress, size: usize, flags: PageFlags) -> Grant { - let mut active_table = unsafe { ActivePageTable::new(to.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(to); - let end_page = Page::containing_address(VirtualAddress::new(to.data() + size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - let result = active_table - .map(page, flags) - .expect("TODO: handle ENOMEM in Grant::map"); - flush_all.consume(result); - } - - flush_all.flush(); - - Grant { - region: Region { - start: to, - size, - }, - flags, - mapped: true, - owned: true, - desc_opt: None, + pub fn zeroed(dst: Page, page_count: usize, flags: PageFlags, mapper: &mut PageMapper, mut flusher: impl Flusher) -> Result { + // TODO: Unmap partially in case of ENOMEM + for page in Page::range_exclusive(dst, dst.next_by(page_count)) { + let flush = unsafe { mapper.map(page.start_address(), flags) }.ok_or(Enomem)?; + flusher.consume(flush); } - } - - pub fn map_inactive(src: VirtualAddress, dst: VirtualAddress, size: usize, flags: PageFlags, desc_opt: Option, inactive_table: &mut InactivePageTable) -> Grant { - let active_table = unsafe { ActivePageTable::new(src.kind()) }; - let mut inactive_mapper = inactive_table.mapper(); - - let src_start_page = Page::containing_address(src); - let src_end_page = Page::containing_address(VirtualAddress::new(src.data() + size - 1)); - let src_range = Page::range_inclusive(src_start_page, src_end_page); - - let dst_start_page = Page::containing_address(dst); - let dst_end_page = Page::containing_address(VirtualAddress::new(dst.data() + size - 1)); - let dst_range = Page::range_inclusive(dst_start_page, dst_end_page); - - for (src_page, dst_page) in src_range.zip(dst_range) { - let frame = active_table.translate_page(src_page).expect("grant references unmapped memory"); - - let inactive_flush = inactive_mapper.map_to(dst_page, frame, flags); - // Ignore result due to mapping on inactive table - unsafe { inactive_flush.ignore(); } - } - - ipi(IpiKind::Tlb, IpiTarget::Other); - - Grant { - region: Region { - start: dst, - size, - }, - flags, - mapped: true, - owned: false, - desc_opt, - } - } - - /// This function should only be used in clone! - pub fn secret_clone(&self, new_start: VirtualAddress) -> Grant { - assert!(self.mapped); - - let mut active_table = unsafe { ActivePageTable::new(new_start.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(self.region.start); - let end_page = Page::containing_address(VirtualAddress::new(self.region.start.data() + self.region.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let frame = active_table.translate_page(page).expect("grant references unmapped memory"); - - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.region.start.data() + new_start.data())); - if self.owned { - let result = active_table.map(new_page, PageFlags::new().write(true)) - .expect("TODO: handle ENOMEM in Grant::secret_clone"); - flush_all.consume(result); + Ok(Grant { region: Region { start: dst.start_address(), size: page_count * PAGE_SIZE }, flags, mapped: true, owned: true, desc_opt: None }) + } + pub fn borrow(src_base: Page, dst_base: Page, page_count: usize, flags: PageFlags, desc_opt: Option, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { + Self::copy_inner(src_base, dst_base, page_count, flags, desc_opt, src_mapper, dst_mapper, (), dst_flusher, false, false) + } + pub fn reborrow(src_grant: &Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, dst_flusher: impl Flusher) -> Result { + Self::borrow(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), src_grant.desc_opt.clone(), src_mapper, dst_mapper, dst_flusher).map_err(Into::into) + } + pub fn transfer(mut src_grant: Grant, dst_base: Page, src_mapper: &mut PageMapper, dst_mapper: &mut PageMapper, src_flusher: impl Flusher, dst_flusher: impl Flusher) -> Result { + assert!(core::mem::replace(&mut src_grant.mapped, false)); + let desc_opt = src_grant.desc_opt.take(); + + Self::copy_inner(Page::containing_address(src_grant.start_address()), dst_base, src_grant.size() / PAGE_SIZE, src_grant.flags(), desc_opt, src_mapper, dst_mapper, src_flusher, dst_flusher, src_grant.owned, true).map_err(Into::into) + } + + fn copy_inner( + src_base: Page, + dst_base: Page, + page_count: usize, + flags: PageFlags, + desc_opt: Option, + src_mapper: &mut PageMapper, + dst_mapper: &mut PageMapper, + mut src_flusher: impl Flusher, + mut dst_flusher: impl Flusher, + owned: bool, + unmap: bool, + ) -> Result { + let mut successful_count = 0; + + for index in 0..page_count { + let src_page = src_base.next_by(index); + let (address, entry_flags) = if unmap { + let (entry, entry_flags, flush) = unsafe { src_mapper.unmap_phys(src_page.start_address(), true).expect("grant references unmapped memory") }; + src_flusher.consume(flush); + + (entry, entry_flags) } else { - let result = active_table.map_to(new_page, frame, flags); - flush_all.consume(result); - } - } - - flush_all.flush(); + src_mapper.translate(src_page.start_address()).expect("grant references unmapped memory") + }; - if self.owned { - unsafe { - intrinsics::copy(self.region.start.data() as *const u8, new_start.data() as *mut u8, self.region.size); - } + let flush = match unsafe { dst_mapper.map_phys(dst_base.next_by(index).start_address(), address, flags) } { + Some(f) => f, + // ENOMEM + None => break, + }; - let flush_all = PageFlushAll::new(); + dst_flusher.consume(flush); - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); + successful_count = index + 1; + } - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.region.start.data() + new_start.data())); - let result = active_table.remap(new_page, flags); - flush_all.consume(result); + if successful_count != page_count { + // TODO: The grant will be lost in case of ENOMEM. Allow putting it back in source? + for index in 0..successful_count { + let (frame, _, flush) = match unsafe { dst_mapper.unmap_phys(dst_base.next_by(index).start_address(), true) } { + Some(f) => f, + None => unreachable!("grant unmapped by someone else in the meantime despite having a &mut PageMapper"), + }; + dst_flusher.consume(flush); + + if owned { + crate::memory::deallocate_frames(Frame::containing_address(frame), 1); + } } - - flush_all.flush(); + return Err(Enomem); } - Grant { + Ok(Grant { region: Region { - start: new_start, - size: self.region.size, + start: dst_base.start_address(), + size: page_count * PAGE_SIZE, }, - flags: self.flags, + flags, mapped: true, - owned: self.owned, - desc_opt: self.desc_opt.clone() - } - } - - pub fn move_to(&mut self, new_start: VirtualAddress, new_table: &mut InactivePageTable) { - assert!(self.mapped); - - let mut active_table = unsafe { ActivePageTable::new(new_start.kind()) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(self.region.start); - let end_page = Page::containing_address(VirtualAddress::new(self.region.start.data() + self.region.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - //TODO: One function to do both? - let flags = active_table.translate_page_flags(page).expect("grant references unmapped memory"); - let (result, frame) = active_table.unmap_return(page, false); - flush_all.consume(result); - - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.region.start.data() + new_start.data())); - let result = new_table.mapper().map_to(new_page, frame, flags); - // Ignore result due to mapping on inactive table - unsafe { result.ignore(); } - } - - flush_all.flush(); - - self.region.start = new_start; + owned, + desc_opt, + }) } pub fn flags(&self) -> PageFlags { self.flags } - pub fn unmap(mut self) -> UnmapResult { + pub fn remap(&mut self, mapper: &mut PageMapper, mut flusher: impl Flusher, flags: PageFlags) { assert!(self.mapped); - let mut active_table = unsafe { ActivePageTable::new(self.start_address().kind()) }; - - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(self.start_address()); - let end_page = Page::containing_address(self.final_address()); - for page in Page::range_inclusive(start_page, end_page) { - let (result, frame) = active_table.unmap_return(page, false); - if self.owned { - //TODO: make sure this frame can be safely freed, physical use counter - crate::memory::deallocate_frames(frame, 1); + for page in self.pages() { + unsafe { + let result = mapper.remap(page.start_address(), flags).expect("grant contained unmap address"); + flusher.consume(result); } - flush_all.consume(result); } - flush_all.flush(); - - self.mapped = false; - - // TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap - UnmapResult { file_desc: self.desc_opt.take() } + self.flags = flags; + } + pub fn can_have_flags(&self, flags: MapFlags) -> bool { + self.owned || ((self.flags.has_write() || !flags.contains(MapFlags::PROT_WRITE)) && (self.flags.has_execute() || !flags.contains(MapFlags::PROT_EXEC))) } - pub fn unmap_inactive(mut self, new_table: &mut InactivePageTable) -> UnmapResult { + pub fn unmap(mut self, mapper: &mut PageMapper, mut flusher: impl Flusher) -> UnmapResult { assert!(self.mapped); - let start_page = Page::containing_address(self.start_address()); - let end_page = Page::containing_address(self.final_address()); - for page in Page::range_inclusive(start_page, end_page) { - let (result, frame) = new_table.mapper().unmap_return(page, false); + for page in self.pages() { + let (entry, _, flush) = unsafe { mapper.unmap_phys(page.start_address(), true) } + .unwrap_or_else(|| panic!("missing page at {:#0x} for grant {:?}", page.start_address().data(), self)); + if self.owned { - //TODO: make sure this frame can be safely freed, physical use counter - crate::memory::deallocate_frames(frame, 1); + // TODO: make sure this frame can be safely freed, physical use counter. + // + // Namely, we can either have MAP_PRIVATE or MAP_SHARED-style mappings. The former + // maps the source memory read-only and then (not yet) implements CoW on top (as of + // now the kernel does not yet support this distinction), while the latter simply + // means the memory is shared. We can in addition to the desc_opt also include an + // address space and region within, indicating borrowed memory. The source grant + // will have a refcount, and if it is unmapped, it will be transferred to a + // borrower. Only if this refcount becomes zero when decremented, will it be + // possible to unmap. + // + // So currently, it is technically possible to get double frees if the scheme + // "hosting" the memory of an fmap call, decides to funmap its memory before the + // fmapper does. + crate::memory::deallocate_frames(Frame::containing_address(entry), 1); } - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } + flusher.consume(flush); } - ipi(IpiKind::Tlb, IpiTarget::Other); - self.mapped = false; // TODO: This imposes a large cost on unmapping, but that cost cannot be avoided without modifying fmap and funmap @@ -593,6 +820,18 @@ impl Grant { Some((before_grant, self, after_grant)) } + // FIXME + /* + pub fn can_be_merged_if_adjacent(&self, with: &Self) -> bool { + match (&self.desc_opt, &with.desc_opt) { + (None, None) => (), + (Some(ref a), Some(ref b)) if Arc::ptr_eq(&a.desc.description, &b.desc.description) => (), + + _ => return false, + } + self.owned == with.owned && self.mapped == with.mapped && self.flags.data() == with.flags.data() + } + */ } impl Deref for Grant { @@ -631,202 +870,64 @@ impl Drop for Grant { } } -#[derive(Clone, Debug)] -pub enum SharedMemory { - Owned(Arc>), - Borrowed(Weak>) -} - -impl SharedMemory { - pub fn with(&self, f: F) -> T where F: FnOnce(&mut Memory) -> T { - match *self { - SharedMemory::Owned(ref memory_lock) => { - let mut memory = memory_lock.lock(); - f(&mut *memory) - }, - SharedMemory::Borrowed(ref memory_weak) => { - let memory_lock = memory_weak.upgrade().expect("SharedMemory::Borrowed no longer valid"); - let mut memory = memory_lock.lock(); - f(&mut *memory) - } - } - } - - pub fn borrow(&self) -> SharedMemory { - match *self { - SharedMemory::Owned(ref memory_lock) => SharedMemory::Borrowed(Arc::downgrade(memory_lock)), - SharedMemory::Borrowed(ref memory_lock) => SharedMemory::Borrowed(memory_lock.clone()) - } - } -} +pub const DANGLING: usize = 1 << (usize::BITS - 2); #[derive(Debug)] -pub struct Memory { - start: VirtualAddress, - size: usize, - flags: PageFlags, +pub struct Table { + pub utable: PageMapper, } -impl Memory { - pub fn new(start: VirtualAddress, size: usize, flags: PageFlags, clear: bool) -> Self { - let mut memory = Memory { - start, - size, - flags, - }; - - memory.map(clear); - - memory - } - - pub fn to_shared(self) -> SharedMemory { - SharedMemory::Owned(Arc::new(Mutex::new(self))) - } - - pub fn start_address(&self) -> VirtualAddress { - self.start - } - - pub fn size(&self) -> usize { - self.size - } - - pub fn flags(&self) -> PageFlags { - self.flags - } - - pub fn pages(&self) -> PageIter { - let start_page = Page::containing_address(self.start); - let end_page = Page::containing_address(VirtualAddress::new(self.start.data() + self.size - 1)); - Page::range_inclusive(start_page, end_page) - } - - fn map(&mut self, clear: bool) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let result = active_table - .map(page, self.flags) - .expect("TODO: handle ENOMEM in Memory::map"); - flush_all.consume(result); - } - - flush_all.flush(); - - if clear { - assert!(self.flags.has_write()); +impl Drop for Table { + fn drop(&mut self) { + if self.utable.is_current() { + // TODO: Do not flush (we immediately context switch after exit(), what else is there + // to do?). Instead, we can garbage-collect such page tables in the idle kernel context + // before it waits for interrupts. Or maybe not, depends on what future benchmarks will + // indicate. unsafe { - intrinsics::write_bytes(self.start_address().data() as *mut u8, 0, self.size); + RmmA::set_table(super::empty_cr3()); } } + crate::memory::deallocate_frames(Frame::containing_address(self.utable.table().phys()), 1); } +} - fn unmap(&mut self) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let result = active_table.unmap(page); - flush_all.consume(result); - } - - flush_all.flush(); - } - - /// A complicated operation to move a piece of memory to a new page table - /// It also allows for changing the address at the same time - pub fn move_to(&mut self, new_start: VirtualAddress, new_table: &mut InactivePageTable) { - let mut inactive_mapper = new_table.mapper(); - - let mut active_table = unsafe { ActivePageTable::new(new_start.kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let (result, frame) = active_table.unmap_return(page, false); - flush_all.consume(result); - - let new_page = Page::containing_address(VirtualAddress::new(page.start_address().data() - self.start.data() + new_start.data())); - let result = inactive_mapper.map_to(new_page, frame, self.flags); - // This is not the active table, so the flush can be ignored - unsafe { result.ignore(); } - } - - flush_all.flush(); - - self.start = new_start; - } - - pub fn remap(&mut self, new_flags: PageFlags) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; - - let flush_all = PageFlushAll::new(); - - for page in self.pages() { - let result = active_table.remap(page, new_flags); - flush_all.consume(result); - } - - flush_all.flush(); +/// Allocates a new identically mapped ktable and empty utable (same memory on x86_64). +pub fn setup_new_utable() -> Result { + let mut utable = unsafe { PageMapper::create(crate::rmm::FRAME_ALLOCATOR).ok_or(Error::new(ENOMEM))? }; - self.flags = new_flags; - } + #[cfg(target_arch = "x86_64")] + { + let active_ktable = KernelMapper::lock(); - pub fn resize(&mut self, new_size: usize, clear: bool) { - let mut active_table = unsafe { ActivePageTable::new(self.start.kind()) }; + let mut copy_mapping = |p4_no| unsafe { + let entry = active_ktable.table().entry(p4_no) + .unwrap_or_else(|| panic!("expected kernel PML {} to be mapped", p4_no)); - //TODO: Calculate page changes to minimize operations - if new_size > self.size { - let flush_all = PageFlushAll::new(); + utable.table().set_entry(p4_no, entry) + }; + // TODO: Just copy all 256 mappings? Or copy KERNEL_PML4+KERNEL_PERCPU_PML4 (needed for + // paranoid ISRs which can occur anywhere; we don't want interrupts to triple fault!) and + // map lazily via page faults in the kernel. - let start_page = Page::containing_address(VirtualAddress::new(self.start.data() + self.size)); - let end_page = Page::containing_address(VirtualAddress::new(self.start.data() + new_size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { - let result = active_table - .map(page, self.flags) - .expect("TODO: Handle OOM in Memory::resize"); - flush_all.consume(result); - } - } + // Copy kernel image mapping + copy_mapping(crate::KERNEL_PML4); - flush_all.flush(); + // Copy kernel heap mapping + copy_mapping(crate::KERNEL_HEAP_PML4); - if clear { - unsafe { - intrinsics::write_bytes((self.start.data() + self.size) as *mut u8, 0, new_size - self.size); - } - } - } else if new_size < self.size { - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(VirtualAddress::new(self.start.data() + new_size)); - let end_page = Page::containing_address(VirtualAddress::new(self.start.data() + self.size - 1)); - for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_some() { - let result = active_table.unmap(page); - flush_all.consume(result); - } - } - - flush_all.flush(); - } + // Copy physmap mapping + copy_mapping(crate::PHYS_PML4); - self.size = new_size; + // Copy kernel percpu (similar to TLS) mapping. + copy_mapping(crate::KERNEL_PERCPU_PML4); } -} -impl Drop for Memory { - fn drop(&mut self) { - self.unmap(); - } + Ok(Table { + utable, + }) } -pub const DANGLING: usize = 1 << (usize::BITS - 2); #[cfg(tests)] mod tests { diff --git a/src/context/mod.rs b/src/context/mod.rs index 9268b142a9c3414cc7e8a1f46c905dd4ec586211..dea6e2357294d9e65b461d551244a49dc781b730 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -1,10 +1,14 @@ //! # Context management //! //! For resources on contexts, please consult [wikipedia](https://en.wikipedia.org/wiki/Context_switch) and [osdev](https://wiki.osdev.org/Context_Switching) -use alloc::boxed::Box; -use core::alloc::{GlobalAlloc, Layout}; use core::sync::atomic::Ordering; -use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +use alloc::sync::Arc; + +use spin::{RwLock, RwLockReadGuard, RwLockWriteGuard}; + +use crate::paging::{RmmA, RmmArch}; +use crate::syscall::error::{Error, ESRCH, Result}; pub use self::context::{Context, ContextId, ContextSnapshot, Status, WaitpidKey}; pub use self::list::ContextList; @@ -53,28 +57,21 @@ static CONTEXTS: RwLock = RwLock::new(ContextList::new()); #[thread_local] static CONTEXT_ID: context::AtomicContextId = context::AtomicContextId::default(); +pub use self::arch::empty_cr3; + pub fn init() { let mut contexts = contexts_mut(); let context_lock = contexts.new_context().expect("could not initialize first context"); let mut context = context_lock.write(); - let mut fx = unsafe { Box::from_raw(crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)) as *mut [u8; 1024]) }; - for b in fx.iter_mut() { - *b = 0; - } - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); + self::arch::EMPTY_CR3.call_once(|| unsafe { RmmA::table() }); + context.status = Status::Runnable; context.running = true; context.cpu_id = Some(crate::cpu_id()); CONTEXT_ID.store(context.id, Ordering::SeqCst); } -/// Initialize contexts, called if needed -fn init_contexts() -> RwLock { - RwLock::new(ContextList::new()) -} - /// Get the global schemes list, const pub fn contexts() -> RwLockReadGuard<'static, ContextList> { CONTEXTS.read() @@ -94,3 +91,7 @@ pub fn context_id() -> ContextId { core::sync::atomic::compiler_fence(Ordering::Acquire); id } + +pub fn current() -> Result>> { + contexts().current().ok_or(Error::new(ESRCH)).map(Arc::clone) +} diff --git a/src/context/signal.rs b/src/context/signal.rs index ae6b2529e07277ad79182d682bbe1008666e624e..7b19831e734982be4741fce76f6a642ebbecc316 100644 --- a/src/context/signal.rs +++ b/src/context/signal.rs @@ -13,12 +13,12 @@ pub fn is_user_handled(handler: Option) -> bool { } pub extern "C" fn signal_handler(sig: usize) { - let (action, restorer) = { + let ((action, restorer), sigstack) = { let contexts = contexts(); let context_lock = contexts.current().expect("context::signal_handler not inside of context"); let context = context_lock.read(); let actions = context.actions.read(); - actions[sig] + (actions[sig], context.sigstack) }; let handler = action.sa_handler.map(|ptr| ptr as usize).unwrap_or(0); @@ -115,7 +115,7 @@ pub extern "C" fn signal_handler(sig: usize) { }; unsafe { - let mut sp = crate::USER_SIGSTACK_OFFSET + crate::USER_SIGSTACK_SIZE - 256; + let mut sp = sigstack.expect("sigaction was set while sigstack was not") - 256; sp = (sp / 16) * 16; diff --git a/src/context/switch.rs b/src/context/switch.rs index 9d7c38c1a96c07ab5d8e901d09a8a5001fb76ca0..8a565e5692ba18671f81bce00a5cbc546f1a2e4d 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -29,14 +29,10 @@ unsafe fn update(context: &mut Context, cpu_id: usize) { let ksig = context.ksig.take().expect("context::switch: ksig not set with ksig_restore"); context.arch = ksig.0; - if let Some(ref mut kfx) = context.kfx { - kfx.clone_from_slice(&ksig.1.expect("context::switch: ksig kfx not set with ksig_restore")); - } else { - panic!("context::switch: kfx not set with ksig_restore"); - } + context.kfx.copy_from_slice(&*ksig.1); if let Some(ref mut kstack) = context.kstack { - kstack.clone_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); + kstack.copy_from_slice(&ksig.2.expect("context::switch: ksig kstack not set with ksig_restore")); } else { panic!("context::switch: kstack not set with ksig_restore"); } @@ -194,11 +190,11 @@ pub unsafe fn switch() -> bool { to_context.arch.signal_stack(signal_handler, sig); } - let from_arch_ptr: *mut arch::Context = &mut from_context_guard.arch; + let from_ptr: *mut Context = &mut *from_context_guard; core::mem::forget(from_context_guard); - let prev_arch: &mut arch::Context = &mut *from_arch_ptr; - let next_arch: &mut arch::Context = &mut to_context.arch; + let prev: &mut Context = &mut *from_ptr; + let next: &mut Context = &mut *to_context; // to_context_guard only exists as a raw pointer, but is still locked @@ -207,7 +203,7 @@ pub unsafe fn switch() -> bool { next_lock: to_context_lock, })); - arch::switch_to(prev_arch, next_arch); + arch::switch_to(prev, next); // NOTE: After switch_to is called, the return address can even be different from the // current return address, meaning that we cannot use local variables here, and that we diff --git a/src/debugger.rs b/src/debugger.rs index b0c2735dc30b5c313c088a67eb50e80c596f73f8..23dfc3b838cf2fbe226d16884fcdf9f0608b331b 100644 --- a/src/debugger.rs +++ b/src/debugger.rs @@ -1,16 +1,23 @@ +use crate::paging::{RmmA, RmmArch}; + // Super unsafe due to page table switching and raw pointers! -pub unsafe fn debugger() { +pub unsafe fn debugger(target_id: Option) { println!("DEBUGGER START"); println!(); - let mut active_table = crate::paging::ActivePageTable::new(crate::paging::TableKind::User); + let old_table = RmmA::table(); + for (id, context_lock) in crate::context::contexts().iter() { + if target_id.map_or(false, |target_id| *id != target_id) { continue; } let context = context_lock.read(); println!("{}: {}", (*id).into(), context.name.read()); // Switch to context page table to ensure syscall debug and stack dump will work - let new_table = crate::paging::InactivePageTable::from_address(context.arch.get_page_utable()); - let old_table = active_table.switch(new_table); + if let Some(ref space) = context.addr_space { + RmmA::set_table(space.read().table.utable.table().phys()); + } + + check_consistency(&mut context.addr_space.as_ref().unwrap().write()); println!("status: {:?}", context.status); if ! context.status_reason.is_empty() { @@ -19,26 +26,11 @@ pub unsafe fn debugger() { if let Some((a, b, c, d, e, f)) = context.syscall { println!("syscall: {}", crate::syscall::debug::format_call(a, b, c, d, e, f)); } - if ! context.image.is_empty() { - println!("image:"); - for shared_memory in context.image.iter() { - shared_memory.with(|memory| { - let region = crate::context::memory::Region::new( - memory.start_address(), - memory.size() - ); - println!( - " virt 0x{:016x}:0x{:016x} size 0x{:08x}", - region.start_address().data(), region.final_address().data(), region.size() - ); - }); - } - } - { - let grants = context.grants.read(); - if ! grants.is_empty() { + if let Some(ref addr_space) = context.addr_space { + let addr_space = addr_space.read(); + if ! addr_space.grants.is_empty() { println!("grants:"); - for grant in grants.iter() { + for grant in addr_space.grants.iter() { let region = grant.region(); println!( " virt 0x{:016x}:0x{:016x} size 0x{:08x} {}", @@ -56,7 +48,7 @@ pub unsafe fn debugger() { println!("stack: {:>016x}", rsp); //Maximum 64 qwords for i in 0..64 { - if active_table.translate(crate::paging::VirtualAddress::new(rsp)).is_some() { + if context.addr_space.as_ref().map_or(false, |space| space.read().table.utable.translate(crate::paging::VirtualAddress::new(rsp)).is_some()) { let value = *(rsp as *const usize); println!(" {:>016x}: {:>016x}", rsp, value); if let Some(next_rsp) = rsp.checked_add(core::mem::size_of::()) { @@ -73,10 +65,73 @@ pub unsafe fn debugger() { } // Switch to original page table - active_table.switch(old_table); + RmmA::set_table(old_table); println!(); } println!("DEBUGGER END"); } + +pub unsafe fn check_consistency(addr_space: &mut crate::context::memory::AddrSpace) { + use crate::paging::*; + + let p4 = addr_space.table.utable.table(); + + for p4i in 0..256 { + let p3 = match p4.next(p4i) { + Some(p3) => p3, + None => continue, + }; + + for p3i in 0..512 { + let p2 = match p3.next(p3i) { + Some(p2) => p2, + None => continue, + }; + + for p2i in 0..512 { + let p1 = match p2.next(p2i) { + Some(p1) => p1, + None => continue, + }; + + for p1i in 0..512 { + let (physaddr, flags) = match p1.entry(p1i) { + Some(e) => if let Ok(address) = e.address() { + (address, e.flags()) + } else { + continue; + } + _ => continue, + }; + let address = VirtualAddress::new((p1i << 12) | (p2i << 21) | (p3i << 30) | (p4i << 39)); + + let grant = match addr_space.grants.contains(address) { + Some(g) => g, + None => { + log::error!("ADDRESS {:p} LACKING GRANT BUT MAPPED TO {:#0x} FLAGS {:?}!", address.data() as *const u8, physaddr.data(), flags); + continue; + } + }; + const STICKY: usize = (1 << 5) | (1 << 6); // accessed+dirty + if grant.flags().data() & !STICKY != flags.data() & !STICKY { + log::error!("FLAG MISMATCH: {:?} != {:?}, address {:p} in grant at {:?}", grant.flags(), flags, address.data() as *const u8, grant.region()); + } + } + } + } + } + + for grant in addr_space.grants.iter() { + for page in grant.pages() { + let entry = match addr_space.table.utable.translate(page.start_address()) { + Some(e) => e, + None => { + log::error!("GRANT AT {:?} LACKING MAPPING AT PAGE {:p}", grant.region(), page.start_address().data() as *const u8); + continue; + } + }; + } + } +} diff --git a/src/lib.rs b/src/lib.rs index b1743892dbc50aa7a5b41308a89832e650431656..18646e04ee8e5b4ad539e8e66c1293571871469b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,8 +43,11 @@ #![deny(unused_must_use)] #![feature(allocator_api)] +#![feature(arbitrary_self_types)] +#![feature(array_chunks)] #![feature(asm_const, asm_sym)] // TODO: Relax requirements of most asm invocations #![cfg_attr(target_arch = "aarch64", feature(llvm_asm))] // TODO: Rewrite using asm! +#![feature(bool_to_option)] #![feature(concat_idents)] #![feature(const_btree_new)] #![feature(const_ptr_offset_from)] @@ -53,6 +56,7 @@ #![feature(lang_items)] #![feature(naked_functions)] #![feature(ptr_internals)] +#![feature(slice_ptr_get, slice_ptr_len)] #![feature(thread_local)] #![no_std] @@ -72,7 +76,6 @@ extern crate spin; #[cfg(feature = "slab")] extern crate slab_allocator; -use alloc::vec::Vec; use core::sync::atomic::{AtomicUsize, Ordering}; use crate::scheme::{FileHandle, SchemeNamespace}; @@ -167,52 +170,36 @@ pub fn cpu_count() -> usize { CPU_COUNT.load(Ordering::Relaxed) } -static mut INIT_ENV: &[u8] = &[]; - -/// Initialize userspace by running the initfs:bin/init process -/// This function will also set the CWD to initfs:bin and open debug: as stdio -pub extern fn userspace_init() { - let path = "initfs:/bin/init"; - let env = unsafe { INIT_ENV }; - - if let Err(err) = syscall::chdir("initfs:") { - info!("Failed to enter initfs ({}).", err); - panic!("Unexpected error while trying to enter initfs:."); - } - - assert_eq!(syscall::open("debug:", syscall::flag::O_RDONLY).map(FileHandle::into), Ok(0)); - assert_eq!(syscall::open("debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(1)); - assert_eq!(syscall::open("debug:", syscall::flag::O_WRONLY).map(FileHandle::into), Ok(2)); - - let fd = syscall::open(path, syscall::flag::O_RDONLY).expect("failed to open init"); - - let mut args = Vec::new(); - args.push(path.as_bytes().to_vec().into_boxed_slice()); - - let mut vars = Vec::new(); - for var in env.split(|b| *b == b'\n') { - if ! var.is_empty() { - vars.push(var.to_vec().into_boxed_slice()); - } - } +pub fn init_env() -> &'static [u8] { + crate::BOOTSTRAP.get().expect("BOOTSTRAP was not set").env +} - syscall::fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None, None).expect("failed to execute init"); +pub extern "C" fn userspace_init() { + let bootstrap = crate::BOOTSTRAP.get().expect("BOOTSTRAP was not set"); + unsafe { crate::syscall::process::usermode_bootstrap(bootstrap) } +} - panic!("init returned"); +pub struct Bootstrap { + pub base: crate::memory::Frame, + pub page_count: usize, + pub entry: usize, + pub env: &'static [u8], } +static BOOTSTRAP: spin::Once = spin::Once::new(); /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this -pub fn kmain(cpus: usize, env: &'static [u8]) -> ! { +pub fn kmain(cpus: usize, bootstrap: Bootstrap) -> ! { CPU_ID.store(0, Ordering::SeqCst); CPU_COUNT.store(cpus, Ordering::SeqCst); - unsafe { INIT_ENV = env }; //Initialize the first context, stored in kernel/src/context/mod.rs context::init(); let pid = syscall::getpid(); info!("BSP: {:?} {}", pid, cpus); - info!("Env: {:?}", ::core::str::from_utf8(unsafe { INIT_ENV })); + info!("Env: {:?}", ::core::str::from_utf8(bootstrap.env)); + + BOOTSTRAP.call_once(|| bootstrap); match context::contexts_mut().spawn(userspace_init) { Ok(context_lock) => { diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 42dc53bf04bbeb4bb30ba946fce8e5f6d9371299..f8bd8b1f6af4c618ba44967cb7d9946e89522bf8 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -3,14 +3,15 @@ use core::cmp; -use crate::arch::rmm::FRAME_ALLOCATOR; +use crate::arch::rmm::LockedAllocator; pub use crate::paging::{PAGE_SIZE, PhysicalAddress}; use rmm::{ FrameAllocator, FrameCount, }; -use syscall::{PartialAllocStrategy, PhysallocFlags}; +use crate::syscall::flag::{PartialAllocStrategy, PhysallocFlags}; +use crate::syscall::error::{ENOMEM, Error}; /// A memory map area #[derive(Copy, Clone, Debug, Default)] @@ -25,21 +26,21 @@ pub struct MemoryArea { /// Get the number of frames available pub fn free_frames() -> usize { unsafe { - FRAME_ALLOCATOR.usage().free().data() + LockedAllocator.usage().free().data() } } /// Get the number of frames used pub fn used_frames() -> usize { unsafe { - FRAME_ALLOCATOR.usage().used().data() + LockedAllocator.usage().used().data() } } /// Allocate a range of frames pub fn allocate_frames(count: usize) -> Option { unsafe { - FRAME_ALLOCATOR.allocate(FrameCount::new(count)).map(|phys| { + LockedAllocator.allocate(FrameCount::new(count)).map(|phys| { Frame::containing_address(PhysicalAddress::new(phys.data())) }) } @@ -64,7 +65,7 @@ pub fn allocate_frames_complex(count: usize, flags: PhysallocFlags, strategy: Op /// Deallocate a range of frames frame pub fn deallocate_frames(frame: Frame, count: usize) { unsafe { - FRAME_ALLOCATOR.free( + LockedAllocator.free( rmm::PhysicalAddress::new(frame.start_address().data()), FrameCount::new(count) ); @@ -102,6 +103,11 @@ impl Frame { pub fn range_inclusive(start: Frame, end: Frame) -> FrameIter { FrameIter { start, end } } + pub fn next_by(&self, n: usize) -> Self { + Self { + number: self.number + n, + } + } } pub struct FrameIter { @@ -125,3 +131,9 @@ impl Iterator for FrameIter { #[derive(Debug)] pub struct Enomem; + +impl From for Error { + fn from(_: Enomem) -> Self { + Self::new(ENOMEM) + } +} diff --git a/src/ptrace.rs b/src/ptrace.rs index 7f7711a1995696d0fc16fefc0af848aec0613905..81d06115b859a3022ec630f39767dc83103b222e 100644 --- a/src/ptrace.rs +++ b/src/ptrace.rs @@ -2,16 +2,15 @@ //! handling should go here, unless they closely depend on the design //! of the scheme. +use rmm::Arch; + use crate::{ arch::{ interrupt::InterruptStack, - paging::{ - mapper::PageFlushAll, - ActivePageTable, InactivePageTable, Page, PAGE_SIZE, TableKind, VirtualAddress - } + paging::{PAGE_SIZE, VirtualAddress}, }, common::unique::Unique, - context::{self, signal, Context, ContextId}, + context::{self, signal, Context, ContextId, memory::AddrSpace}, event, scheme::proc, sync::WaitCondition, @@ -21,6 +20,7 @@ use crate::{ flag::*, ptrace_event }, + CurrentRmmArch as RmmA, }; use alloc::{ @@ -31,12 +31,8 @@ use alloc::{ btree_map::Entry }, sync::Arc, - vec::Vec -}; -use core::{ - cmp, - sync::atomic::Ordering }; +use core::cmp; use spin::{Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; // ____ _ @@ -187,7 +183,11 @@ pub fn is_traced(pid: ContextId) -> bool { /// Trigger a notification to the event: scheme fn proc_trigger_event(file_id: usize, flags: EventFlags) { - event::trigger(proc::PROC_SCHEME_ID.load(Ordering::SeqCst), file_id, flags); + if let Some(scheme_id) = proc::PROC_SCHEME_ID.get() { + event::trigger(*scheme_id, file_id, flags); + } else { + log::warn!("Failed to trigger proc event: scheme never initialized"); + } } /// Dispatch an event to any tracer tracing `self`. This will cause @@ -445,66 +445,41 @@ pub unsafe fn regs_for_mut(context: &mut Context) -> Option<&mut InterruptStack> // |_| |_|\___|_| |_| |_|\___/|_| \__, | // |___/ -pub fn with_context_memory(context: &mut Context, offset: VirtualAddress, len: usize, f: F) -> Result<()> -where F: FnOnce(*mut u8) -> Result<()> -{ - // As far as I understand, mapping any regions following - // USER_TMP_MISC_OFFSET is safe because no other memory location - // is used after it. In the future it might be necessary to define - // a maximum amount of pages that can be mapped in one batch, - // which could be used to either internally retry `read`/`write` - // in `proc:/mem`, or return a partial read/write. - let start = Page::containing_address(VirtualAddress::new(crate::USER_TMP_MISC_OFFSET)); - - let mut active_page_table = unsafe { ActivePageTable::new(TableKind::User) }; - let mut target_page_table = unsafe { - InactivePageTable::from_address(context.arch.get_page_utable()) - }; - - // Find the physical frames for all pages - let mut frames = Vec::new(); - - { - let mapper = target_page_table.mapper(); - - let mut inner = || -> Result<()> { - let start = Page::containing_address(offset); - let end = Page::containing_address(VirtualAddress::new(offset.data() + len - 1)); - for page in Page::range_inclusive(start, end) { - frames.push(( - mapper.translate_page(page).ok_or(Error::new(EFAULT))?, - mapper.translate_page_flags(page).ok_or(Error::new(EFAULT))? - )); - } - Ok(()) - }; - inner()?; +// Returns an iterator which splits [start, start + len) into an iterator of possibly trimmed +// pages. +fn page_aligned_chunks(mut start: usize, mut len: usize) -> impl Iterator { + // Ensure no pages can overlap with kernel memory. + if start.saturating_add(len) > crate::USER_END_OFFSET { + len = crate::USER_END_OFFSET.saturating_sub(start); } - // Map all the physical frames into linear pages - let pages = frames.len(); - let mut page = start; - let flush_all = PageFlushAll::new(); - for (frame, mut flags) in frames { - flags = flags.execute(false).write(true); - flush_all.consume(active_page_table.map_to(page, frame, flags)); + let first_len = core::cmp::min(len, PAGE_SIZE - start % PAGE_SIZE); + let first = Some((start, first_len)).filter(|(_, len)| *len > 0); + start += first_len; + len -= first_len; - page = page.next(); - } + let last_len = len % PAGE_SIZE; + len -= last_len; + let last = Some((start + len, last_len)).filter(|(_, len)| *len > 0); - flush_all.flush(); + first.into_iter().chain((start..start + len).step_by(PAGE_SIZE).map(|off| (off, PAGE_SIZE))).chain(last) +} - let res = f((start.start_address().data() + offset.data() % PAGE_SIZE) as *mut u8); +pub fn context_memory(addrspace: &mut AddrSpace, offset: VirtualAddress, len: usize) -> impl Iterator> + '_ { + let end = core::cmp::min(offset.data().saturating_add(len), crate::USER_END_OFFSET); + let len = end - offset.data(); - // Unmap all the pages (but allow no deallocation!) - let mut page = start; - let flush_all = PageFlushAll::new(); - for _ in 0..pages { - flush_all.consume(active_page_table.unmap_return(page, true).0); - page = page.next(); - } + // TODO: Iterate over grants instead to avoid yielding None too many times. What if + // context_memory is used for an entire process's address space, where the stack is at the very + // end? Alternatively we can skip pages recursively, i.e. first skip unpopulated PML4s and then + // onwards. + page_aligned_chunks(offset.data(), len).map(move |(addr, len)| unsafe { + // [addr,addr+len) is a continuous page starting and/or ending at page boundaries, with the + // possible exception of an unaligned head/tail. - flush_all.flush(); + let (address, flags) = addrspace.table.utable.translate(VirtualAddress::new(addr))?; - res + let start = RmmA::phys_to_virt(address).data() + addr % crate::memory::PAGE_SIZE; + Some((core::ptr::slice_from_raw_parts_mut(start as *mut u8, len), flags.has_write())) + }) } diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs index 41e731b6ddb96d85535bdef71c66287098392b31..fd0929eecaab76bd1e1267b4d93b4cf30a410335 100644 --- a/src/scheme/acpi.rs +++ b/src/scheme/acpi.rs @@ -288,3 +288,4 @@ impl Scheme for AcpiScheme { Ok(0) } } +impl crate::scheme::KernelScheme for AcpiScheme {} diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs index 0aa6ee744b6075e5cfc4ebd29cc389472917797a..9fd1739ce51f74a692a872755bb87c7200976493 100644 --- a/src/scheme/debug.rs +++ b/src/scheme/debug.rs @@ -165,3 +165,4 @@ impl Scheme for DebugScheme { Ok(0) } } +impl crate::scheme::KernelScheme for DebugScheme {} diff --git a/src/scheme/event.rs b/src/scheme/event.rs index 3ae5015c341d4ecf4191ca240b3a5b54e78b1870..c9cf66321e289ecd874ff37841a9ce711b3fc7c9 100644 --- a/src/scheme/event.rs +++ b/src/scheme/event.rs @@ -71,3 +71,4 @@ impl Scheme for EventScheme { queues_mut().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for EventScheme {} diff --git a/src/scheme/initfs.rs b/src/scheme/initfs.rs deleted file mode 100644 index 4c7f3a59a5ad9ccee3d0c52f0f418fe38e665598..0000000000000000000000000000000000000000 --- a/src/scheme/initfs.rs +++ /dev/null @@ -1,275 +0,0 @@ -use core::convert::TryFrom; -use core::str; -use core::sync::atomic::{AtomicUsize, Ordering}; - -use alloc::collections::BTreeMap; -use alloc::string::String; -use alloc::vec::Vec; - -use spin::{Once, RwLock}; - -use redox_initfs::{InitFs, InodeStruct, Inode, InodeDir, InodeKind, types::Timespec}; - -use crate::syscall::data::Stat; -use crate::syscall::error::*; -use crate::syscall::flag::{MODE_DIR, MODE_FILE}; -use crate::syscall::scheme::{calc_seek_offset_usize, Scheme}; - -struct Handle { - inode: Inode, - seek: usize, - // TODO: Any better way to implement fpath? Or maybe work around it, e.g. by giving paths such - // as `initfs:__inodes__/`? - filename: String, -} - -static NEXT_ID: AtomicUsize = AtomicUsize::new(0); -static HANDLES: RwLock> = RwLock::new(BTreeMap::new()); - -static FS: Once> = Once::new(); - -fn fs() -> Result> { - FS.get().copied().ok_or(Error::new(ENODEV)) -} -fn get_inode(inode: Inode) -> Result> { - fs()?.get_inode(inode).ok_or_else(|| Error::new(EIO)) -} - -pub fn init(bytes: &'static [u8]) { - let mut called = false; - - FS.call_once(|| { - called = true; - - InitFs::new(bytes) - .expect("failed to parse initfs header") - }); - - assert!(called, "called initfs::init more than once"); -} - -fn next_id() -> usize { - let old = NEXT_ID.fetch_add(1, Ordering::Relaxed); - assert_ne!(old, usize::MAX, "usize overflow in initfs scheme"); - old -} - -pub struct InitFsScheme; - -struct Iter { - dir: InodeDir<'static>, - idx: u32, -} -impl Iterator for Iter { - type Item = Result>; - - fn next(&mut self) -> Option { - let entry = self.dir.get_entry(self.idx).map_err(|_| Error::new(EIO)); - self.idx += 1; - entry.transpose() - } - fn size_hint(&self) -> (usize, Option) { - match self.dir.entry_count().ok() { - Some(size) => { - let size = usize::try_from(size).expect("expected u32 to be convertible into usize"); - (size, Some(size)) - } - None => (0, None), - } - } -} - -fn entries_iter(dir: InodeDir<'static>) -> impl IntoIterator>> + 'static { - let mut index = 0_u32; - - core::iter::from_fn(move || { - let idx = index; - index += 1; - - dir.get_entry(idx).map_err(|_| Error::new(EIO)).transpose() - }) -} -fn inode_len(inode: InodeStruct<'static>) -> Result { - Ok(match inode.kind() { - InodeKind::File(file) => file.data().map_err(|_| Error::new(EIO))?.len(), - InodeKind::Dir(dir) => (Iter { dir, idx: 0 }) - .fold(0, |len, entry| len + entry.and_then(|entry| entry.name().map_err(|_| Error::new(EIO))).map_or(0, |name| name.len() + 1)), - InodeKind::Unknown => return Err(Error::new(EIO)), - }) -} - -impl Scheme for InitFsScheme { - fn open(&self, path: &str, _flags: usize, _uid: u32, _gid: u32) -> Result { - let mut components = path - // trim leading and trailing slash - .trim_matches('/') - // divide into components - .split('/') - // filter out double slashes (e.g. /usr//bin/...) - .filter(|c| !c.is_empty()); - - let mut current_inode = InitFs::ROOT_INODE; - - while let Some(component) = components.next() { - match component { - "." => continue, - ".." => { - let _ = components.next_back(); - continue - } - - _ => (), - } - - let current_inode_struct = get_inode(current_inode)?; - - let dir = match current_inode_struct.kind() { - InodeKind::Dir(dir) => dir, - - // If we still have more components in the path, and the file tree for that - // particular branch is not all directories except the last, then that file cannot - // exist. - InodeKind::File(_) | InodeKind::Unknown => return Err(Error::new(ENOENT)), - }; - - let mut entries = Iter { - dir, - idx: 0, - }; - - current_inode = loop { - let entry_res = match entries.next() { - Some(e) => e, - None => return Err(Error::new(ENOENT)), - }; - let entry = entry_res?; - let name = entry.name().map_err(|_| Error::new(EIO))?; - if name == component.as_bytes() { - break entry.inode(); - } - }; - } - - let id = next_id(); - let old = HANDLES.write().insert(id, Handle { - inode: current_inode, - seek: 0_usize, - filename: path.into(), - }); - assert!(old.is_none()); - - Ok(id) - } - - fn read(&self, id: usize, buffer: &mut [u8]) -> Result { - let mut handles = HANDLES.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - match get_inode(handle.inode)?.kind() { - InodeKind::Dir(dir) => { - let mut bytes_read = 0; - let mut bytes_skipped = 0; - - for entry_res in (Iter { dir, idx: 0 }) { - let entry = entry_res?; - let name = entry.name().map_err(|_| Error::new(EIO))?; - let entry_len = name.len() + 1; - - let to_skip = core::cmp::min(handle.seek - bytes_skipped, entry_len); - let max_to_read = core::cmp::min(entry_len - to_skip, buffer.len()); - - let to_copy = entry_len.saturating_sub(to_skip).saturating_sub(1); - buffer[bytes_read..bytes_read + to_copy].copy_from_slice(&name[..to_copy]); - - if to_copy.saturating_sub(to_skip) == 1 { - buffer[bytes_read + to_copy] = b'\n'; - bytes_read += 1; - } - - bytes_read += to_copy; - bytes_skipped += to_skip; - } - - handle.seek = handle.seek.checked_add(bytes_read).ok_or(Error::new(EOVERFLOW))?; - - Ok(bytes_read) - } - InodeKind::File(file) => { - let data = file.data().map_err(|_| Error::new(EIO))?; - let src_buf = &data[core::cmp::min(handle.seek, data.len())..]; - - let to_copy = core::cmp::min(src_buf.len(), buffer.len()); - buffer[..to_copy].copy_from_slice(&src_buf[..to_copy]); - - handle.seek = handle.seek.checked_add(to_copy).ok_or(Error::new(EOVERFLOW))?; - - Ok(to_copy) - } - InodeKind::Unknown => return Err(Error::new(EIO)), - } - } - - fn seek(&self, id: usize, pos: isize, whence: usize) -> Result { - let mut handles = HANDLES.write(); - let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; - - let new_offset = calc_seek_offset_usize(handle.seek, pos, whence, inode_len(get_inode(handle.inode)?)?)?; - handle.seek = new_offset as usize; - Ok(new_offset) - } - - fn fcntl(&self, id: usize, _cmd: usize, _arg: usize) -> Result { - let handles = HANDLES.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - Ok(0) - } - - fn fpath(&self, id: usize, buf: &mut [u8]) -> Result { - let handles = HANDLES.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - // TODO: Copy scheme part in kernel - let scheme_path = b"initfs:"; - let scheme_bytes = core::cmp::min(scheme_path.len(), buf.len()); - buf[..scheme_bytes].copy_from_slice(&scheme_path[..scheme_bytes]); - - let source = handle.filename.as_bytes(); - let path_bytes = core::cmp::min(buf.len() - scheme_bytes, source.len()); - buf[scheme_bytes..scheme_bytes + path_bytes].copy_from_slice(&source[..path_bytes]); - - Ok(scheme_bytes + path_bytes) - } - - fn fstat(&self, id: usize, stat: &mut Stat) -> Result { - let handles = HANDLES.read(); - let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - - let Timespec { sec, nsec } = fs()?.image_creation_time(); - - let inode = get_inode(handle.inode)?; - - stat.st_mode = inode.mode() | match inode.kind() { InodeKind::Dir(_) => MODE_DIR, InodeKind::File(_) => MODE_FILE, _ => 0 }; - stat.st_uid = inode.uid(); - stat.st_gid = inode.gid(); - stat.st_size = u64::try_from(inode_len(inode)?).unwrap_or(u64::MAX); - - stat.st_ctime = sec.get(); - stat.st_ctime_nsec = nsec.get(); - stat.st_mtime = sec.get(); - stat.st_mtime_nsec = nsec.get(); - - Ok(0) - } - - fn fsync(&self, id: usize) -> Result { - let handles = HANDLES.read(); - let _handle = handles.get(&id).ok_or(Error::new(EBADF))?; - Ok(0) - } - - fn close(&self, id: usize) -> Result { - let _ = HANDLES.write().remove(&id).ok_or(Error::new(EBADF))?; - Ok(0) - } -} diff --git a/src/scheme/irq.rs b/src/scheme/irq.rs index 09d4e5cf24a3d3836527c979692400bce0bedf06..dcf0172637e228254d65ed2e5082835d0a8c77ad 100644 --- a/src/scheme/irq.rs +++ b/src/scheme/irq.rs @@ -371,3 +371,4 @@ impl Scheme for IrqScheme { Ok(0) } } +impl crate::scheme::KernelScheme for IrqScheme {} diff --git a/src/scheme/itimer.rs b/src/scheme/itimer.rs index 11bc559343fc3cefa1a75f725c89201e479b7086..1c8f27d246ab8a2a99a8f7370836084a668d1403 100644 --- a/src/scheme/itimer.rs +++ b/src/scheme/itimer.rs @@ -106,3 +106,4 @@ impl Scheme for ITimerScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for ITimerScheme {} diff --git a/src/scheme/live.rs b/src/scheme/live.rs index 6cc6963838e7e9fbd7e8c9745530da2b5e55e17b..acf14a03fef808698dd7abdfb6e9a9e6f8ca934c 100644 --- a/src/scheme/live.rs +++ b/src/scheme/live.rs @@ -5,6 +5,7 @@ use alloc::collections::BTreeMap; use core::{slice, str}; use core::sync::atomic::{AtomicUsize, Ordering}; use spin::RwLock; +use rmm::Flusher; use syscall::data::Stat; use syscall::error::*; @@ -12,7 +13,7 @@ use syscall::flag::{MODE_DIR, MODE_FILE}; use syscall::scheme::{calc_seek_offset_usize, Scheme}; use crate::memory::Frame; -use crate::paging::{ActivePageTable, Page, PageFlags, PhysicalAddress, TableKind, VirtualAddress}; +use crate::paging::{KernelMapper, Page, PageFlags, PhysicalAddress, VirtualAddress}; use crate::paging::mapper::PageFlushAll; static mut LIST: [u8; 2] = [b'0', b'\n']; @@ -36,7 +37,7 @@ impl DiskScheme { let mut phys = 0; let mut size = 0; - for line in str::from_utf8(unsafe { crate::INIT_ENV }).unwrap_or("").lines() { + for line in str::from_utf8(crate::init_env()).unwrap_or("").lines() { let mut parts = line.splitn(2, '='); let name = parts.next().unwrap_or(""); let value = parts.next().unwrap_or(""); @@ -54,15 +55,16 @@ impl DiskScheme { // Ensure live disk pages are mapped let virt = phys + crate::PHYS_OFFSET; unsafe { - let mut active_table = ActivePageTable::new(TableKind::Kernel); - let flush_all = PageFlushAll::new(); + let mut mapper = KernelMapper::lock(); + + let mut flush_all = PageFlushAll::new(); let start_page = Page::containing_address(VirtualAddress::new(virt)); let end_page = Page::containing_address(VirtualAddress::new(virt + size - 1)); for page in Page::range_inclusive(start_page, end_page) { - if active_table.translate_page(page).is_none() { + if mapper.translate(page.start_address()).is_none() { let frame = Frame::containing_address(PhysicalAddress::new(page.start_address().data() - crate::PHYS_OFFSET)); let flags = PageFlags::new().write(true); - let result = active_table.map_to(page, frame, flags); + let result = mapper.get_mut().expect("expected KernelMapper not to be in use while initializing live scheme").map_phys(page.start_address(), frame.start_address(), flags).expect("failed to map live page"); flush_all.consume(result); } } @@ -202,3 +204,4 @@ impl Scheme for DiskScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for DiskScheme {} diff --git a/src/scheme/memory.rs b/src/scheme/memory.rs index 2636ebf84ef2bb8ddf94f4aed533262e5288fcb1..603ece511bcaa0e15e2591c41079415b5748961b 100644 --- a/src/scheme/memory.rs +++ b/src/scheme/memory.rs @@ -1,8 +1,12 @@ +use alloc::sync::Arc; +use spin::RwLock; + use crate::context; -use crate::context::memory::{page_flags, Grant}; +use crate::context::memory::{AddrSpace, page_flags, Grant}; use crate::memory::{free_frames, used_frames, PAGE_SIZE}; -use crate::paging::{ActivePageTable, VirtualAddress}; -use crate::syscall::data::{Map, OldMap, StatVfs}; +use crate::paging::{mapper::PageFlushAll, Page, VirtualAddress}; + +use crate::syscall::data::{Map, StatVfs}; use crate::syscall::error::*; use crate::syscall::flag::MapFlags; use crate::syscall::scheme::Scheme; @@ -14,37 +18,16 @@ impl MemoryScheme { MemoryScheme } - pub fn fmap_anonymous(map: &Map) -> Result { - //TODO: Abstract with other grant creation - if map.size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.write(); - - let region = grants.find_free_at(VirtualAddress::new(map.address), map.size, map.flags)?.round(); - - { - // Make sure it's *absolutely* not mapped already - // TODO: Keep track of all allocated memory so this isn't necessary - - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(map.address).kind()) }; + pub fn fmap_anonymous(addr_space: &Arc>, map: &Map) -> Result { + let (requested_page, page_count) = crate::syscall::validate::validate_region(map.address, map.size)?; - for page in region.pages() { - if active_table.translate_page(page).is_some() { - println!("page at {:#x} was already mapped", page.start_address().data()); - return Err(Error::new(EEXIST)) - } - } - } + let page = addr_space + .write() + .mmap((map.address != 0).then_some(requested_page), page_count, map.flags, |page, flags, mapper, flusher| { + Ok(Grant::zeroed(page, page_count, flags, mapper, flusher)?) + })?; - grants.insert(Grant::map(region.start_address(), region.size(), page_flags(map.flags))); - - Ok(region.start_address().data()) - } + Ok(page.start_address().data()) } } impl Scheme for MemoryScheme { @@ -65,19 +48,7 @@ impl Scheme for MemoryScheme { } fn fmap(&self, _id: usize, map: &Map) -> Result { - Self::fmap_anonymous(map) - } - fn fmap_old(&self, id: usize, map: &OldMap) -> Result { - if map.flags.contains(MapFlags::MAP_FIXED) { - // not supported for fmap, which lacks the address argument. - return Err(Error::new(EINVAL)); - } - self.fmap(id, &Map { - offset: map.offset, - size: map.size, - flags: map.flags, - address: 0, - }) + Self::fmap_anonymous(&Arc::clone(context::current()?.read().addr_space()?), map) } fn fcntl(&self, _id: usize, _cmd: usize, _arg: usize) -> Result { @@ -98,3 +69,8 @@ impl Scheme for MemoryScheme { Ok(0) } } +impl crate::scheme::KernelScheme for MemoryScheme { + fn kfmap(&self, _number: usize, addr_space: &Arc>, map: &Map, _consume: bool) -> Result { + Self::fmap_anonymous(addr_space, map) + } +} diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index f8f1a1bdeee024273becaa1c922d87ff01119870..2e7df3949ef2b7c4ba4568759921dc2080f5268f 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -16,6 +16,7 @@ use alloc::{ use core::sync::atomic::AtomicUsize; use spin::{Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use crate::context::{memory::AddrSpace, file::FileDescriptor}; use crate::syscall::error::*; use crate::syscall::scheme::Scheme; @@ -24,7 +25,6 @@ use self::acpi::AcpiScheme; use self::debug::DebugScheme; use self::event::EventScheme; -use self::initfs::InitFsScheme; use self::irq::IrqScheme; use self::itimer::ITimerScheme; use self::memory::MemoryScheme; @@ -45,9 +45,6 @@ pub mod debug; /// `event:` - allows reading of `Event`s which are registered using `fevent` pub mod event; -/// `initfs:` - a readonly filesystem used for initializing the system -pub mod initfs; - /// `irq:` - allows userspace handling of IRQs pub mod irq; @@ -107,7 +104,7 @@ impl<'a> Iterator for SchemeIter<'a> { /// Scheme list type pub struct SchemeList { - map: BTreeMap>, + map: BTreeMap>, names: BTreeMap, SchemeId>>, next_ns: usize, next_id: usize @@ -165,7 +162,6 @@ impl SchemeList { self.insert(ns, "kernel/acpi", |scheme_id| Arc::new(AcpiScheme::new(scheme_id))).unwrap(); } self.insert(ns, "debug", |scheme_id| Arc::new(DebugScheme::new(scheme_id))).unwrap(); - self.insert(ns, "initfs", |_| Arc::new(InitFsScheme)).unwrap(); self.insert(ns, "irq", |scheme_id| Arc::new(IrqScheme::new(scheme_id))).unwrap(); self.insert(ns, "proc", |scheme_id| Arc::new(ProcScheme::new(scheme_id))).unwrap(); self.insert(ns, "thisproc", |_| Arc::new(ProcScheme::restricted())).unwrap(); @@ -201,7 +197,7 @@ impl SchemeList { Ok(to) } - pub fn iter(&self) -> ::alloc::collections::btree_map::Iter> { + pub fn iter(&self) -> ::alloc::collections::btree_map::Iter> { self.map.iter() } @@ -212,11 +208,11 @@ impl SchemeList { } /// Get the nth scheme. - pub fn get(&self, id: SchemeId) -> Option<&Arc> { + pub fn get(&self, id: SchemeId) -> Option<&Arc> { self.map.get(&id) } - pub fn get_name(&self, ns: SchemeNamespace, name: &str) -> Option<(SchemeId, &Arc)> { + pub fn get_name(&self, ns: SchemeNamespace, name: &str) -> Option<(SchemeId, &Arc)> { if let Some(names) = self.names.get(&ns) { if let Some(&id) = names.get(name) { return self.get(id).map(|scheme| (id, scheme)); @@ -227,7 +223,7 @@ impl SchemeList { /// Create a new scheme. pub fn insert(&mut self, ns: SchemeNamespace, name: &str, scheme_fn: F) -> Result - where F: Fn(SchemeId) -> Arc + where F: Fn(SchemeId) -> Arc { if let Some(names) = self.names.get(&ns) { if names.contains_key(name) { @@ -298,3 +294,20 @@ pub fn schemes() -> RwLockReadGuard<'static, SchemeList> { pub fn schemes_mut() -> RwLockWriteGuard<'static, SchemeList> { SCHEMES.call_once(init_schemes).write() } + +#[allow(unused_variables)] +pub trait KernelScheme: Scheme + Send + Sync + 'static { + fn as_filetable(&self, number: usize) -> Result>>>> { + Err(Error::new(EBADF)) + } + fn as_addrspace(&self, number: usize) -> Result>> { + Err(Error::new(EBADF)) + } + fn as_sigactions(&self, number: usize) -> Result>>> { + Err(Error::new(EBADF)) + } + + fn kfmap(&self, number: usize, addr_space: &Arc>, map: &crate::syscall::data::Map, consume: bool) -> Result { + Err(Error::new(EOPNOTSUPP)) + } +} diff --git a/src/scheme/pipe.rs b/src/scheme/pipe.rs index d9e3fdca08466dd973ebeb87d6d1de58820e9fca..4090e7e9027065ab31446be4d0ee2363305c0bf1 100644 --- a/src/scheme/pipe.rs +++ b/src/scheme/pipe.rs @@ -1,7 +1,7 @@ use alloc::sync::{Arc, Weak}; use alloc::collections::{BTreeMap, VecDeque}; use core::sync::atomic::{AtomicUsize, Ordering}; -use spin::{Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use spin::{Mutex, Once, RwLock}; use crate::event; use crate::scheme::SchemeId; @@ -264,3 +264,4 @@ impl Drop for PipeWrite { self.condition.notify(); } } +impl crate::scheme::KernelScheme for PipeScheme {} diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 3445451388c473a2b1867e3ff5b6b399207a1775..23d7f9c6dfc52dbc4e9ac91a926b39a6598b15eb 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -1,34 +1,45 @@ use crate::{ - arch::paging::VirtualAddress, - context::{self, Context, ContextId, Status}, + arch::paging::{Flusher, mapper::{InactiveFlusher, PageFlushAll}, Page, RmmA, VirtualAddress}, + context::{self, Context, ContextId, Status, file::{FileDescription, FileDescriptor}, memory::{AddrSpace, Grant, new_addrspace, map_flags, page_flags, Region}}, + memory::PAGE_SIZE, ptrace, - scheme::{AtomicSchemeId, SchemeId}, + scheme::{self, FileHandle, KernelScheme, SchemeId}, syscall::{ FloatRegisters, IntRegisters, EnvRegisters, - data::{PtraceEvent, Stat}, + data::{Map, PtraceEvent, SigAction, Stat}, error::*, flag::*, scheme::{calc_seek_offset_usize, Scheme}, self, - validate, }, }; use alloc::{ boxed::Box, collections::BTreeMap, + string::{String, ToString}, + sync::Arc, vec::Vec, }; use core::{ cmp, + convert::TryFrom, mem, slice, str, sync::atomic::{AtomicUsize, Ordering}, }; -use spin::RwLock; +use spin::{Once, RwLock}; + +fn read_from(dst: &mut [u8], src: &[u8], offset: &mut usize) -> Result { + let byte_count = cmp::min(dst.len(), src.len().saturating_sub(*offset)); + let next_offset = offset.saturating_add(byte_count); + dst[..byte_count].copy_from_slice(&src[*offset..next_offset]); + *offset = next_offset; + Ok(byte_count) +} fn with_context(pid: ContextId, callback: F) -> Result where @@ -56,7 +67,7 @@ where } fn try_stop_context(pid: ContextId, mut callback: F) -> Result where - F: FnMut(&mut Context) -> Result, + F: FnOnce(&mut Context) -> Result, { if pid == context::context_id() { return Err(Error::new(EBADF)); @@ -95,21 +106,60 @@ enum RegsKind { Int, Env, } -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone)] enum Operation { - Memory, + Memory { addrspace: Arc> }, Regs(RegsKind), Trace, Static(&'static str), + Name, + Cwd, + Sigstack, + Attr(Attr), + Filetable { filetable: Arc>>> }, + AddrSpace { addrspace: Arc> }, + CurrentAddrSpace, + + // "operations CAN change". The reason we split changing the address space into two handle + // types, is that we would rather want the actual switch to occur when closing, as opposed to + // when writing. This is so that we can actually guarantee that no file descriptors are leaked. + AwaitingAddrSpaceChange { + new: Arc>, + new_sp: usize, + new_ip: usize, + }, + + CurrentFiletable, + + AwaitingFiletableChange(Arc>>>), + + // TODO: Remove this once openat is implemented, or allow openat-via-dup via e.g. the top-level + // directory. + OpenViaDup, + // Allows calling fmap directly on a FileDescriptor (as opposed to a FileDescriptor). + // + // TODO: Remove this once cross-scheme links are merged. That would allow acquiring a new + // FD to access the file descriptor behind grants. + GrantHandle { description: Arc> }, + + Sigactions(Arc>>), + CurrentSigactions, + AwaitingSigactionsChange(Arc>>), + + MmapMinAddr(Arc>), +} +#[derive(Clone, Copy, PartialEq, Eq)] +enum Attr { + Uid, + Gid, + // TODO: namespace, tid, etc. } impl Operation { - fn needs_child_process(self) -> bool { - match self { - Self::Memory => true, - Self::Regs(_) => true, - Self::Trace => true, - Self::Static(_) => false, - } + fn needs_child_process(&self) -> bool { + matches!(self, Self::Memory { .. } | Self::Regs(_) | Self::Trace | Self::Filetable { .. } | Self::AddrSpace { .. } | Self::CurrentAddrSpace | Self::CurrentFiletable | Self::Sigactions(_) | Self::CurrentSigactions | Self::AwaitingSigactionsChange(_)) + } + fn needs_root(&self) -> bool { + matches!(self, Self::Attr(_)) } } struct MemData { @@ -140,6 +190,7 @@ enum OperationData { Memory(MemData), Trace(TraceData), Static(StaticData), + Offset(usize), Other, } impl OperationData { @@ -163,7 +214,7 @@ impl OperationData { } } -#[derive(Clone, Copy)] +#[derive(Clone)] struct Info { pid: ContextId, flags: usize, @@ -195,7 +246,7 @@ impl Handle { } } -pub static PROC_SCHEME_ID: AtomicSchemeId = AtomicSchemeId::default(); +pub static PROC_SCHEME_ID: Once = Once::new(); pub struct ProcScheme { next_id: AtomicUsize, @@ -210,7 +261,7 @@ pub enum Access { impl ProcScheme { pub fn new(scheme_id: SchemeId) -> Self { - PROC_SCHEME_ID.store(scheme_id, Ordering::SeqCst); + PROC_SCHEME_ID.call_once(|| scheme_id); Self { next_id: AtomicUsize::new(0), @@ -225,46 +276,57 @@ impl ProcScheme { access: Access::Restricted, } } + fn new_handle(&self, handle: Handle) -> Result { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + let _ = self.handles.write().insert(id, handle); + Ok(id) + } } -impl Scheme for ProcScheme { - fn open(&self, path: &str, flags: usize, uid: u32, gid: u32) -> Result { - let mut parts = path.splitn(2, '/'); - let pid_str = parts.next() - .ok_or(Error::new(ENOENT))?; - - let pid = if pid_str == "current" { - context::context_id() - } else if self.access == Access::Restricted { - return Err(Error::new(EACCES)); - } else { - ContextId::from(pid_str.parse().map_err(|_| Error::new(ENOENT))?) - }; +fn get_context(id: ContextId) -> Result>> { + context::contexts().get(id).ok_or(Error::new(ENOENT)).map(Arc::clone) +} - let operation = match parts.next() { - Some("mem") => Operation::Memory, +impl ProcScheme { + fn open_inner(&self, pid: ContextId, operation_str: Option<&str>, flags: usize, uid: u32, gid: u32) -> Result { + let operation = match operation_str { + Some("mem") => Operation::Memory { addrspace: Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?) }, + Some("addrspace") => Operation::AddrSpace { addrspace: Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?) }, + Some("filetable") => Operation::Filetable { filetable: Arc::clone(&get_context(pid)?.read().files) }, + Some("current-addrspace") => Operation::CurrentAddrSpace, + Some("current-filetable") => Operation::CurrentFiletable, Some("regs/float") => Operation::Regs(RegsKind::Float), Some("regs/int") => Operation::Regs(RegsKind::Int), Some("regs/env") => Operation::Regs(RegsKind::Env), Some("trace") => Operation::Trace, Some("exe") => Operation::Static("exe"), + Some("name") => Operation::Name, + Some("cwd") => Operation::Cwd, + Some("sigstack") => Operation::Sigstack, + Some("uid") => Operation::Attr(Attr::Uid), + Some("gid") => Operation::Attr(Attr::Gid), + Some("open_via_dup") => Operation::OpenViaDup, + Some("sigactions") => Operation::Sigactions(Arc::clone(&get_context(pid)?.read().actions)), + Some("current-sigactions") => Operation::CurrentSigactions, + Some("mmap-min-addr") => Operation::MmapMinAddr(Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?)), _ => return Err(Error::new(EINVAL)) }; let contexts = context::contexts(); let target = contexts.get(pid).ok_or(Error::new(ESRCH))?; - let data; + let mut data; { let target = target.read(); data = match operation { - Operation::Memory => OperationData::Memory(MemData::default()), + Operation::Memory { .. } => OperationData::Memory(MemData::default()), Operation::Trace => OperationData::Trace(TraceData::default()), Operation::Static(_) => OperationData::Static(StaticData::new( target.name.read().clone().into() )), + Operation::AddrSpace { .. } => OperationData::Offset(0), _ => OperationData::Other, }; @@ -296,12 +358,33 @@ impl Scheme for ProcScheme { None => return Err(Error::new(EPERM)), } } + } else if operation.needs_root() && (uid != 0 || gid != 0) { + return Err(Error::new(EPERM)); + } + + if matches!(operation, Operation::Filetable { .. }) { + data = OperationData::Static(StaticData::new({ + use core::fmt::Write; + + let mut data = String::new(); + for index in target.files.read().iter().enumerate().filter_map(|(idx, val)| val.as_ref().map(|_| idx)) { + write!(data, "{}\n", index).unwrap(); + } + data.into_bytes().into_boxed_slice() + })); } }; - let id = self.next_id.fetch_add(1, Ordering::SeqCst); + let id = self.new_handle(Handle { + info: Info { + flags, + pid, + operation: operation.clone(), + }, + data, + })?; - if let Operation::Trace { .. } = operation { + if let Operation::Trace = operation { if !ptrace::try_new_session(pid, id) { // There is no good way to handle id being occupied for nothing // here, is there? @@ -314,44 +397,96 @@ impl Scheme for ProcScheme { } } - self.handles.write().insert(id, Handle { - info: Info { - flags, - pid, - operation, - }, - data, - }); Ok(id) } +} + +impl Scheme for ProcScheme { + fn open(&self, path: &str, flags: usize, uid: u32, gid: u32) -> Result { + let mut parts = path.splitn(2, '/'); + let pid_str = parts.next() + .ok_or(Error::new(ENOENT))?; + + let pid = if pid_str == "current" { + context::context_id() + } else if pid_str == "new" { + inherit_context()? + } else if self.access == Access::Restricted { + return Err(Error::new(EACCES)); + } else { + ContextId::from(pid_str.parse().map_err(|_| Error::new(ENOENT))?) + }; + + self.open_inner(pid, parts.next(), flags, uid, gid) + } - /// Using dup for `proc:` simply opens another operation on the same PID - /// ```rust,ignore - /// let trace = syscall::open("proc:1234/trace")?; - /// - /// // let regs = syscall::open("proc:1234/regs/int")?; - /// let regs = syscall::dup(trace, "regs/int")?; - /// ``` + /// Dup is currently used to implement clone() and execve(). fn dup(&self, old_id: usize, buf: &[u8]) -> Result { let info = { let handles = self.handles.read(); let handle = handles.get(&old_id).ok_or(Error::new(EBADF))?; - handle.info + + handle.info.clone() + }; + + let handle = |operation, data| Handle { + info: Info { + flags: 0, + pid: info.pid, + operation, + }, + data, }; - let buf_str = str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?; + self.new_handle(match info.operation { + Operation::OpenViaDup => { + let (uid, gid) = match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read() { + context => (context.euid, context.egid), + }; + return self.open_inner(info.pid, Some(core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?).filter(|s| !s.is_empty()), O_RDWR | O_CLOEXEC, uid, gid); + }, + + Operation::Filetable { ref filetable } => { + // TODO: Maybe allow userspace to either copy or transfer recently dupped file + // descriptors between file tables. + if buf != b"copy" { + return Err(Error::new(EINVAL)); + } + let new_filetable = Arc::try_new(RwLock::new(filetable.read().clone())).map_err(|_| Error::new(ENOMEM))?; - let mut path = format!("{}/", info.pid.into()); - path.push_str(buf_str); + handle(Operation::Filetable { filetable: new_filetable }, OperationData::Other) + } + Operation::AddrSpace { ref addrspace } => { + let (operation, is_mem) = match buf { + // TODO: Better way to obtain new empty address spaces, perhaps using SYS_OPEN. But + // in that case, what scheme? + b"empty" => (Operation::AddrSpace { addrspace: new_addrspace()? }, false), + b"exclusive" => (Operation::AddrSpace { addrspace: addrspace.write().try_clone()? }, false), + b"mem" => (Operation::Memory { addrspace: Arc::clone(&addrspace) }, true), + b"mmap-min-addr" => (Operation::MmapMinAddr(Arc::clone(&addrspace)), false), + + grant_handle if grant_handle.starts_with(b"grant-") => { + let start_addr = usize::from_str_radix(core::str::from_utf8(&grant_handle[6..]).map_err(|_| Error::new(EINVAL))?, 16).map_err(|_| Error::new(EINVAL))?; + (Operation::GrantHandle { + description: Arc::clone(&addrspace.read().grants.contains(VirtualAddress::new(start_addr)).ok_or(Error::new(EINVAL))?.desc_opt.as_ref().ok_or(Error::new(EINVAL))?.desc.description) + }, false) + } - let (uid, gid) = { - let contexts = context::contexts(); - let context = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context.read(); - (context.euid, context.egid) - }; + _ => return Err(Error::new(EINVAL)), + }; - self.open(&path, info.flags, uid, gid) + handle(operation, if is_mem { OperationData::Memory(MemData { offset: VirtualAddress::new(0) }) } else { OperationData::Offset(0) }) + } + Operation::Sigactions(ref sigactions) => { + let new = match buf { + b"empty" => Context::empty_actions(), + b"copy" => Arc::new(RwLock::new(sigactions.read().clone())), + _ => return Err(Error::new(EINVAL)), + }; + handle(Operation::Sigactions(new), OperationData::Other) + } + _ => return Err(Error::new(EINVAL)), + }) } fn seek(&self, id: usize, pos: isize, whence: usize) -> Result { @@ -376,7 +511,7 @@ impl Scheme for ProcScheme { let info = { let handles = self.handles.read(); let handle = handles.get(&id).ok_or(Error::new(EBADF))?; - handle.info + handle.info.clone() }; match info.operation { @@ -390,24 +525,55 @@ impl Scheme for ProcScheme { data.offset += len; Ok(len) }, - Operation::Memory => { + Operation::Memory { addrspace } => { // Won't context switch, don't worry about the locks let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; let data = handle.data.mem_data().expect("operations can't change"); - let contexts = context::contexts(); - let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; - let mut context = context.write(); + let mut bytes_read = 0; - ptrace::with_context_memory(&mut context, data.offset, buf.len(), |ptr| { - buf.copy_from_slice(validate::validate_slice(ptr, buf.len())?); - Ok(()) - })?; + for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { + let (chunk, _writable) = chunk_opt.ok_or(Error::new(EFAULT))?; + let dst_slice = &mut buf[bytes_read..bytes_read + chunk.len()]; + unsafe { + chunk.as_mut_ptr().copy_to_nonoverlapping(dst_slice.as_mut_ptr(), dst_slice.len()); + } + bytes_read += chunk.len(); + } - data.offset = VirtualAddress::new(data.offset.data() + buf.len()); - Ok(buf.len()) + data.offset = VirtualAddress::new(data.offset.data() + bytes_read); + Ok(bytes_read) }, + // TODO: Support reading only a specific address range. Maybe using seek? + Operation::AddrSpace { addrspace } => { + let mut handles = self.handles.write(); + let offset = if let OperationData::Offset(ref mut offset) = handles.get_mut(&id).ok_or(Error::new(EBADF))?.data { + offset + } else { + return Err(Error::new(EBADFD)); + }; + + // TODO: Define a struct somewhere? + const RECORD_SIZE: usize = mem::size_of::() * 4; + let records = buf.array_chunks_mut::(); + + let addrspace = addrspace.read(); + let mut bytes_read = 0; + + for (record_bytes, grant) in records.zip(addrspace.grants.iter()).skip(*offset / RECORD_SIZE) { + let mut qwords = record_bytes.array_chunks_mut::<{mem::size_of::()}>(); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.start_address().data())); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.size())); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(map_flags(grant.flags()).bits() | if grant.desc_opt.is_some() { 0x8000_0000 } else { 0 })); + qwords.next().unwrap().copy_from_slice(&usize::to_ne_bytes(grant.desc_opt.as_ref().map_or(0, |d| d.offset))); + bytes_read += RECORD_SIZE; + } + + *offset += bytes_read; + Ok(bytes_read) + } + Operation::Regs(kind) => { union Output { float: FloatRegisters, @@ -419,11 +585,7 @@ impl Scheme for ProcScheme { RegsKind::Float => with_context(info.pid, |context| { // NOTE: The kernel will never touch floats - // In the rare case of not having floating - // point registers uninitiated, return - // empty everything. - let fx = context.arch.get_fx_regs().unwrap_or_default(); - Ok((Output { float: fx }, mem::size_of::())) + Ok((Output { float: context.get_fx_regs() }, mem::size_of::())) })?, RegsKind::Int => try_stop_context(info.pid, |context| match unsafe { ptrace::regs_for(&context) } { None => { @@ -519,6 +681,34 @@ impl Scheme for ProcScheme { // Return read events Ok(read * mem::size_of::()) } + Operation::Name => read_from(buf, context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.read().as_bytes(), &mut 0), + Operation::Cwd => read_from(buf, context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().cwd.read().as_bytes(), &mut 0), + Operation::Sigstack => read_from(buf, &context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().sigstack.unwrap_or(!0).to_ne_bytes(), &mut 0), + Operation::Attr(attr) => { + let src_buf = match (attr, &*Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?).read()) { + (Attr::Uid, context) => context.euid.to_string(), + (Attr::Gid, context) => context.egid.to_string(), + }.into_bytes(); + + read_from(buf, &src_buf, &mut 0) + } + Operation::Filetable { .. } => { + let mut handles = self.handles.write(); + let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; + let data = handle.data.static_data().expect("operations can't change"); + + read_from(buf, &data.buf, &mut data.offset) + } + Operation::MmapMinAddr(ref addrspace) => { + let val = addrspace.read().mmap_min; + *buf.array_chunks_mut::<{mem::size_of::()}>().next().unwrap() = usize::to_ne_bytes(val); + Ok(mem::size_of::()) + } + // TODO: Replace write() with SYS_DUP_FORWARD. + // TODO: Find a better way to switch address spaces, since they also require switching + // the instruction and stack pointer. Maybe remove `/regs` altogether and replace it + // with `/ctx` + _ => return Err(Error::new(EBADF)), } } @@ -535,29 +725,68 @@ impl Scheme for ProcScheme { let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; handle.continue_ignored_children(); - handle.info + handle.info.clone() }; match info.operation { Operation::Static(_) => Err(Error::new(EBADF)), - Operation::Memory => { + Operation::Memory { addrspace } => { // Won't context switch, don't worry about the locks let mut handles = self.handles.write(); let handle = handles.get_mut(&id).ok_or(Error::new(EBADF))?; let data = handle.data.mem_data().expect("operations can't change"); - let contexts = context::contexts(); - let context = contexts.get(info.pid).ok_or(Error::new(ESRCH))?; - let mut context = context.write(); + let mut bytes_written = 0; - ptrace::with_context_memory(&mut context, data.offset, buf.len(), |ptr| { - validate::validate_slice_mut(ptr, buf.len())?.copy_from_slice(buf); - Ok(()) - })?; + for chunk_opt in ptrace::context_memory(&mut *addrspace.write(), data.offset, buf.len()) { + let (chunk, writable) = chunk_opt.ok_or(Error::new(EFAULT))?; - data.offset = VirtualAddress::new(data.offset.data() + buf.len()); - Ok(buf.len()) + if !writable { return Err(Error::new(EACCES)); } + + let src_slice = &buf[bytes_written..bytes_written + chunk.len()]; + unsafe { + chunk.as_mut_ptr().copy_from_nonoverlapping(src_slice.as_ptr(), src_slice.len()); + } + bytes_written += chunk.len(); + } + + data.offset = data.offset.add(bytes_written); + Ok(bytes_written) }, + Operation::AddrSpace { addrspace } => { + let mut chunks = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); + let mut next = || chunks.next().ok_or(Error::new(EINVAL)); + + match next()? { + op @ ADDRSPACE_OP_MMAP | op @ ADDRSPACE_OP_TRANSFER => { + let fd = next()?; + let offset = next()?; + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + let flags = MapFlags::from_bits(next()?).ok_or(Error::new(EINVAL))?; + + if !flags.contains(MapFlags::MAP_FIXED) { + return Err(Error::new(EOPNOTSUPP)); + } + + let (scheme, number) = extract_scheme_number(fd)?; + + return scheme.kfmap(number, &addrspace, &Map { offset, size: page_count * PAGE_SIZE, address: page.start_address().data(), flags }, op == ADDRSPACE_OP_TRANSFER); + } + ADDRSPACE_OP_MUNMAP => { + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + + addrspace.write().munmap(page, page_count); + } + ADDRSPACE_OP_MPROTECT => { + let (page, page_count) = crate::syscall::validate_region(next()?, next()?)?; + let flags = MapFlags::from_bits(next()?).ok_or(Error::new(EINVAL))?; + + addrspace.write().mprotect(page, page_count, flags)?; + } + _ => return Err(Error::new(EINVAL)), + } + Ok(0) + } Operation::Regs(kind) => match kind { RegsKind::Float => { if buf.len() < mem::size_of::() { @@ -575,7 +804,7 @@ impl Scheme for ProcScheme { // Ignore the rare case of floating point // registers being uninitiated - let _ = context.arch.set_fx_regs(regs); + let _ = context.set_fx_regs(regs); Ok(mem::size_of::()) }) @@ -704,6 +933,72 @@ impl Scheme for ProcScheme { Ok(mem::size_of::()) }, + // TODO: Deduplicate name and cwd + Operation::Name => { + let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?.into_boxed_str(); + *context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().name.write() = utf8; + Ok(buf.len()) + } + Operation::Cwd => { + let utf8 = alloc::string::String::from_utf8(buf.to_vec()).map_err(|_| Error::new(EINVAL))?; + *context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.read().cwd.write() = utf8; + Ok(buf.len()) + } + Operation::Sigstack => { + let bytes = <[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?; + let sigstack = usize::from_ne_bytes(bytes); + context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?.write().sigstack = (sigstack != !0).then(|| sigstack); + Ok(buf.len()) + } + Operation::Attr(attr) => { + let context_lock = Arc::clone(context::contexts().get(info.pid).ok_or(Error::new(ESRCH))?); + let id = core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?.parse::().map_err(|_| Error::new(EINVAL))?; + + match attr { + Attr::Uid => context_lock.write().euid = id, + Attr::Gid => context_lock.write().egid = id, + } + Ok(buf.len()) + } + Operation::Filetable { .. } => return Err(Error::new(EBADF)), + + Operation::CurrentFiletable => { + let filetable_fd = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + let (hopefully_this_scheme, number) = extract_scheme_number(filetable_fd)?; + + let mut filetable = hopefully_this_scheme.as_filetable(number)?; + + self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingFiletableChange(filetable); + + Ok(mem::size_of::()) + } + Operation::CurrentAddrSpace { .. } => { + let mut iter = buf.array_chunks::<{mem::size_of::()}>().copied().map(usize::from_ne_bytes); + let addrspace_fd = iter.next().ok_or(Error::new(EINVAL))?; + let sp = iter.next().ok_or(Error::new(EINVAL))?; + let ip = iter.next().ok_or(Error::new(EINVAL))?; + + let (hopefully_this_scheme, number) = extract_scheme_number(addrspace_fd)?; + let space = hopefully_this_scheme.as_addrspace(number)?; + + self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingAddrSpaceChange { new: space, new_sp: sp, new_ip: ip }; + + Ok(3 * mem::size_of::()) + } + Operation::CurrentSigactions => { + let sigactions_fd = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + let (hopefully_this_scheme, number) = extract_scheme_number(sigactions_fd)?; + let sigactions = hopefully_this_scheme.as_sigactions(number)?; + self.handles.write().get_mut(&id).ok_or(Error::new(EBADF))?.info.operation = Operation::AwaitingSigactionsChange(sigactions); + Ok(mem::size_of::()) + } + Operation::MmapMinAddr(ref addrspace) => { + let val = usize::from_ne_bytes(<[u8; mem::size_of::()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + if val % PAGE_SIZE != 0 || val > crate::USER_END_OFFSET { return Err(Error::new(EINVAL)); } + addrspace.write().mmap_min = val; + Ok(mem::size_of::()) + } + _ => return Err(Error::new(EBADF)), } } @@ -735,18 +1030,30 @@ impl Scheme for ProcScheme { let handle = handles.get(&id).ok_or(Error::new(EBADF))?; let path = format!("proc:{}/{}", handle.info.pid.into(), match handle.info.operation { - Operation::Memory => "mem", + Operation::Memory { .. } => "mem", Operation::Regs(RegsKind::Float) => "regs/float", Operation::Regs(RegsKind::Int) => "regs/int", Operation::Regs(RegsKind::Env) => "regs/env", Operation::Trace => "trace", Operation::Static(path) => path, + Operation::Name => "name", + Operation::Cwd => "cwd", + Operation::Sigstack => "sigstack", + Operation::Attr(Attr::Uid) => "uid", + Operation::Attr(Attr::Gid) => "gid", + Operation::Filetable { .. } => "filetable", + Operation::AddrSpace { .. } => "addrspace", + Operation::Sigactions(_) => "sigactions", + Operation::CurrentAddrSpace => "current-addrspace", + Operation::CurrentFiletable => "current-filetable", + Operation::CurrentSigactions => "current-sigactions", + Operation::OpenViaDup => "open-via-dup", + Operation::MmapMinAddr(_) => "mmap-min-addr", + + _ => return Err(Error::new(EOPNOTSUPP)), }); - let len = cmp::min(path.len(), buf.len()); - buf[..len].copy_from_slice(&path.as_bytes()[..len]); - - Ok(len) + read_from(buf, &path.as_bytes(), &mut 0) } fn fstat(&self, id: usize, stat: &mut Stat) -> Result { @@ -774,19 +1081,214 @@ impl Scheme for ProcScheme { let mut handle = self.handles.write().remove(&id).ok_or(Error::new(EBADF))?; handle.continue_ignored_children(); - if let Operation::Trace = handle.info.operation { - ptrace::close_session(handle.info.pid); + let stop_context = if handle.info.pid == context::context_id() { with_context_mut } else { try_stop_context }; + + match handle.info.operation { + Operation::AwaitingAddrSpaceChange { new, new_sp, new_ip } => { + stop_context(handle.info.pid, |context: &mut Context| unsafe { + if let Some(saved_regs) = ptrace::regs_for_mut(context) { + saved_regs.iret.rip = new_ip; + saved_regs.iret.rsp = new_sp; + } else { + context.clone_entry = Some([new_ip, new_sp]); + } - if handle.info.flags & O_EXCL == O_EXCL { - syscall::kill(handle.info.pid, SIGKILL)?; + let prev_addr_space = context.set_addr_space(new); + + if let Some(prev_addr_space) = prev_addr_space { + maybe_cleanup_addr_space(prev_addr_space); + } + + Ok(()) + })?; + let _ = ptrace::send_event(crate::syscall::ptrace_event!(PTRACE_EVENT_ADDRSPACE_SWITCH, 0)); } + Operation::AddrSpace { addrspace } | Operation::Memory { addrspace } | Operation::MmapMinAddr(addrspace) => maybe_cleanup_addr_space(addrspace), + + Operation::AwaitingFiletableChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { + context.files = new; + Ok(()) + })?, + Operation::AwaitingSigactionsChange(new) => with_context_mut(handle.info.pid, |context: &mut Context| { + context.actions = new; + Ok(()) + })?, + Operation::Trace => { + ptrace::close_session(handle.info.pid); - let contexts = context::contexts(); - if let Some(context) = contexts.get(handle.info.pid) { - let mut context = context.write(); - context.ptrace_stop = false; + if handle.info.flags & O_EXCL == O_EXCL { + syscall::kill(handle.info.pid, SIGKILL)?; + } + + let contexts = context::contexts(); + if let Some(context) = contexts.get(handle.info.pid) { + let mut context = context.write(); + context.ptrace_stop = false; + } } + _ => (), } Ok(0) } + fn fmap(&self, id: usize, map: &Map) -> Result { + self.kfmap(id, &AddrSpace::current()?, map, false) + } +} +impl KernelScheme for ProcScheme { + fn as_addrspace(&self, number: usize) -> Result>> { + if let Operation::AddrSpace { ref addrspace } | Operation::Memory { ref addrspace } = self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation { + Ok(Arc::clone(addrspace)) + } else { + Err(Error::new(EBADF)) + } + } + fn as_filetable(&self, number: usize) -> Result>>>> { + if let Operation::Filetable { ref filetable } = self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation { + Ok(Arc::clone(filetable)) + } else { + Err(Error::new(EBADF)) + } + } + fn as_sigactions(&self, number: usize) -> Result>>> { + if let Operation::Sigactions(ref sigactions) = self.handles.read().get(&number).ok_or(Error::new(EBADF))?.info.operation { + Ok(Arc::clone(sigactions)) + } else { + Err(Error::new(EBADF)) + } + } + fn kfmap(&self, id: usize, dst_addr_space: &Arc>, map: &crate::syscall::data::Map, consume: bool) -> Result { + let info = self.handles.read().get(&id).ok_or(Error::new(EBADF))?.info.clone(); + + match info.operation { + Operation::GrantHandle { ref description } => { + let (scheme_id, number) = { + let description = description.read(); + + (description.scheme, description.number) + }; + let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(EBADFD))?); + scheme.fmap(number, map) + } + Operation::AddrSpace { ref addrspace } => { + if Arc::ptr_eq(addrspace, dst_addr_space) { + return Err(Error::new(EBUSY)); + } + // Limit to transferring/borrowing at most one grant, or part of a grant (splitting + // will be mandatory if grants are coalesced). + + let (requested_dst_page, page_count) = crate::syscall::validate_region(map.address, map.size)?; + let (src_page, _) = crate::syscall::validate_region(map.offset, map.size)?; + + let requested_dst_page = (map.address != 0).then_some(requested_dst_page); + + let mut src_addr_space = addrspace.write(); + let src_addr_space = &mut *src_addr_space; + let mut dst_addr_space = dst_addr_space.write(); + + let src_grant_region = { + let src_region = Region::new(src_page.start_address(), page_count * PAGE_SIZE); + let mut conflicts = src_addr_space.grants.conflicts(src_region); + let first = conflicts.next().ok_or(Error::new(EINVAL))?; + if conflicts.next().is_some() { + return Err(Error::new(EINVAL)); + } + + if !first.can_have_flags(map.flags) { + return Err(Error::new(EACCES)); + } + + first.region().intersect(src_region) + }; + + let grant_page_count = src_grant_region.size() / PAGE_SIZE; + + let src_mapper = &mut src_addr_space.table.utable; + + let result_page = if consume { + let grant = src_addr_space.grants.take(&src_grant_region).expect("grant cannot disappear"); + let (before, middle, after) = grant.extract(src_grant_region).expect("called intersect(), must succeed"); + + if let Some(before) = before { src_addr_space.grants.insert(before); } + if let Some(after) = after { src_addr_space.grants.insert(after); } + + dst_addr_space.mmap(requested_dst_page, grant_page_count, map.flags, |dst_page, flags, dst_mapper, dst_flusher| Ok(Grant::transfer(middle, dst_page, src_mapper, dst_mapper, InactiveFlusher::new(), dst_flusher)?))? + } else { + dst_addr_space.mmap(requested_dst_page, grant_page_count, map.flags, |dst_page, flags, dst_mapper, flusher| Ok(Grant::borrow(Page::containing_address(src_grant_region.start_address()), dst_page, grant_page_count, flags, None, src_mapper, dst_mapper, flusher)?))? + }; + + Ok(result_page.start_address().data()) + } + _ => return Err(Error::new(EBADF)), + } + } +} +extern "C" fn clone_handler() { + let context_lock = Arc::clone(context::contexts().current().expect("expected the current context to be set in a spawn closure")); + + #[cfg(target_arch = "x86_64")] + unsafe { + let [ip, sp] = context_lock.read().clone_entry.expect("clone_entry must be set"); + let [arg, is_singlestep] = [0; 2]; + + crate::start::usermode(ip, sp, arg, is_singlestep); + } +} + +fn inherit_context() -> Result { + let new_id = { + let current_context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); + let new_context_lock = Arc::clone(context::contexts_mut().spawn(clone_handler)?); + + let current_context = current_context_lock.read(); + let mut new_context = new_context_lock.write(); + + new_context.status = Status::Stopped(SIGSTOP); + new_context.euid = current_context.euid; + new_context.egid = current_context.egid; + new_context.ruid = current_context.ruid; + new_context.rgid = current_context.rgid; + new_context.ens = current_context.ens; + new_context.rns = current_context.rns; + new_context.ppid = current_context.id; + new_context.pgid = current_context.pgid; + new_context.umask = current_context.umask; + new_context.sigmask = current_context.sigmask; + new_context.cpu_id = current_context.cpu_id; + + // TODO: More to copy? + + new_context.id + }; + + if ptrace::send_event(crate::syscall::ptrace_event!(PTRACE_EVENT_CLONE, new_id.into())).is_some() { + // Freeze the clone, allow ptrace to put breakpoints + // to it before it starts + let contexts = context::contexts(); + let context = contexts.get(new_id).expect("Newly created context doesn't exist??"); + let mut context = context.write(); + context.ptrace_stop = true; + } + + Ok(new_id) +} +fn extract_scheme_number(fd: usize) -> Result<(Arc, usize)> { + let (scheme_id, number) = match &*context::contexts().current().ok_or(Error::new(ESRCH))?.read().get_file(FileHandle::from(fd)).ok_or(Error::new(EBADF))?.description.read() { + desc => (desc.scheme, desc.number) + }; + let scheme = Arc::clone(scheme::schemes().get(scheme_id).ok_or(Error::new(ENODEV))?); + + Ok((scheme, number)) +} +fn maybe_cleanup_addr_space(addr_space: Arc>) { + if let Ok(mut space) = Arc::try_unwrap(addr_space).map(RwLock::into_inner) { + // We are the last reference to the address space; therefore it must be + // unmapped. + + // TODO: Optimize away clearing of page tables? In that case, what about memory + // deallocation? + for grant in space.grants.into_iter() { + grant.unmap(&mut space.table.utable, ()); + } + } + } diff --git a/src/scheme/root.rs b/src/scheme/root.rs index 248c4b5824acb21c0d52ef5b7ad6739174a28ec8..ff2a13f7da5c9067ed8b132a642fc66e2a3d9597 100644 --- a/src/scheme/root.rs +++ b/src/scheme/root.rs @@ -344,3 +344,4 @@ impl Scheme for RootScheme { Ok(0) } } +impl crate::scheme::KernelScheme for RootScheme {} diff --git a/src/scheme/serio.rs b/src/scheme/serio.rs index 0a747423fd5fd84587930a8a128aea5a1a36fb90..ba0169d11b42322bfc12669a4f770f7e2b561b31 100644 --- a/src/scheme/serio.rs +++ b/src/scheme/serio.rs @@ -162,3 +162,4 @@ impl Scheme for SerioScheme { Ok(0) } } +impl crate::scheme::KernelScheme for SerioScheme {} diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index 3002f5558d2a81aa4737d497ee20ed5c091dadde..8602e9b101968e30c785a816b268575a15b633ad 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -26,11 +26,17 @@ pub fn resource() -> Result> { let context = context_lock.read(); let mut stat_string = String::new(); - if context.stack.is_some() { - stat_string.push('U'); + // TODO: All user programs must have some grant in order for executable memory to even + // exist, but is this a good indicator of whether it is user or kernel? + stat_string.push(if let Ok(addr_space) = context.addr_space() { + if addr_space.read().grants.is_empty() { + 'K' + } else { + 'U' + } } else { - stat_string.push('K'); - } + 'R' + }); match context.status { context::Status::Runnable => { stat_string.push('R'); @@ -77,22 +83,11 @@ pub fn resource() -> Result> { if let Some(ref kstack) = context.kstack { memory += kstack.len(); } - for shared_mem in context.image.iter() { - shared_mem.with(|mem| { - memory += mem.size(); - }); - } - if let Some(ref stack) = context.stack { - stack.with(|stack| { - memory += stack.size(); - }); - } - if let Some(ref sigstack) = context.sigstack { - memory += sigstack.size(); - } - for grant in context.grants.read().iter() { - if grant.is_owned() { - memory += grant.size(); + if let Ok(addr_space) = context.addr_space() { + for grant in addr_space.read().grants.iter() { + if grant.is_owned() { + memory += grant.size(); + } } } diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs index da6576cf917b912dbdd0a6bc1e9c71a2997a1ea8..d8f1989c5eb74f7513525397eddc0e93ad6ad1a5 100644 --- a/src/scheme/sys/mod.rs +++ b/src/scheme/sys/mod.rs @@ -52,6 +52,7 @@ impl SysScheme { files.insert("scheme_num", Box::new(scheme_num::resource)); files.insert("syscall", Box::new(syscall::resource)); files.insert("uname", Box::new(uname::resource)); + files.insert("env", Box::new(|| Ok(Vec::from(crate::init_env())))); #[cfg(target_arch = "x86_64")] files.insert("spurious_irq", Box::new(irq::spurious_irq_resource)); @@ -169,3 +170,4 @@ impl Scheme for SysScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for SysScheme {} diff --git a/src/scheme/time.rs b/src/scheme/time.rs index bc0143bd269b775ba6f9c008c8c436c48dcdbc6c..a5f9286428d571bce754d17f4fe16717b1a41e03 100644 --- a/src/scheme/time.rs +++ b/src/scheme/time.rs @@ -117,3 +117,4 @@ impl Scheme for TimeScheme { self.handles.write().remove(&id).ok_or(Error::new(EBADF)).and(Ok(0)) } } +impl crate::scheme::KernelScheme for TimeScheme {} diff --git a/src/scheme/user.rs b/src/scheme/user.rs index 5d7f9c6dbba2d6fe3a4d81bd15563afb4363054e..0f7f2ccc0823211106ddebb709c72ceb0306db13 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -8,12 +8,12 @@ use spin::{Mutex, RwLock}; use crate::context::{self, Context}; use crate::context::file::FileDescriptor; -use crate::context::memory::{DANGLING, page_flags, round_down_pages, Grant, Region, GrantFileRef}; +use crate::context::memory::{AddrSpace, DANGLING, page_flags, Grant, Region, GrantFileRef}; use crate::event; -use crate::paging::{PAGE_SIZE, InactivePageTable, VirtualAddress}; +use crate::paging::{PAGE_SIZE, mapper::InactiveFlusher, Page, round_down_pages, round_up_pages, VirtualAddress}; use crate::scheme::{AtomicSchemeId, SchemeId}; use crate::sync::{WaitQueue, WaitMap}; -use crate::syscall::data::{Map, OldMap, Packet, Stat, StatVfs, TimeSpec}; +use crate::syscall::data::{Map, Packet, Stat, StatVfs, TimeSpec}; use crate::syscall::error::*; use crate::syscall::flag::{EventFlags, EVENT_READ, O_NONBLOCK, MapFlags, PROT_READ, PROT_WRITE}; use crate::syscall::number::*; @@ -123,10 +123,11 @@ impl UserInner { ).map(|addr| addr.data()) } + // TODO: Use an address space Arc over a context Arc. While contexts which share address spaces + // still can access borrowed scheme pages, it would both be cleaner and would handle the case + // where the initial context is closed. fn capture_inner(context_weak: &Weak>, dst_address: usize, address: usize, size: usize, flags: MapFlags, desc_opt: Option) -> Result { - // TODO: More abstractions over grant creation! - if size == 0 { // NOTE: Rather than returning NULL, we return a dummy dangling address, that is also // non-canonical on x86. This means that scheme handlers do not need to check the @@ -140,29 +141,23 @@ impl UserInner { return Ok(VirtualAddress::new(DANGLING)); } - let context_lock = context_weak.upgrade().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; + let dst_addr_space = Arc::clone(context_weak.upgrade().ok_or(Error::new(ESRCH))?.read().addr_space()?); + let mut dst_addr_space = dst_addr_space.write(); - let mut grants = context.grants.write(); + let src_page = Page::containing_address(VirtualAddress::new(round_down_pages(address))); + let offset = address - src_page.start_address().data(); + let page_count = round_up_pages(offset + size) / PAGE_SIZE; + let requested_dst_page = (dst_address != 0).then_some(Page::containing_address(VirtualAddress::new(round_down_pages(dst_address)))); - let src_address = round_down_pages(address); - let offset = address - src_address; - let src_region = Region::new(VirtualAddress::new(src_address), offset + size).round(); - let dst_region = grants.find_free_at(VirtualAddress::new(dst_address), src_region.size(), flags)?; + let current_addrspace = AddrSpace::current()?; + let mut current_addrspace = current_addrspace.write(); //TODO: Use syscall_head and syscall_tail to avoid leaking data - grants.insert(Grant::map_inactive( - src_region.start_address(), - dst_region.start_address(), - src_region.size(), - page_flags(flags), - desc_opt, - &mut new_table, - )); + let dst_page = dst_addr_space.mmap(requested_dst_page, page_count, flags, |dst_page, page_flags, mapper, flusher| { + Ok(Grant::borrow(src_page, dst_page, page_count, page_flags, desc_opt, &mut current_addrspace.table.utable, mapper, flusher)?) + })?; - Ok(VirtualAddress::new(dst_region.start_address().data() + offset)) + Ok(dst_page.start_address().add(offset)) } pub fn release(&self, address: usize) -> Result<()> { @@ -170,16 +165,15 @@ impl UserInner { return Ok(()); } let context_lock = self.context.upgrade().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); + let context = context_lock.write(); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - let mut grants = context.grants.write(); + let mut addr_space = context.addr_space()?.write(); - let region = match grants.contains(VirtualAddress::new(address)).map(Region::from) { + let region = match addr_space.grants.contains(VirtualAddress::new(address)).map(Region::from) { Some(region) => region, - None => return Err(Error::new(EFAULT)), + None => return Err(Error::new(EFAULT)), }; - grants.take(®ion).unwrap().unmap_inactive(&mut new_table); + addr_space.grants.take(®ion).unwrap().unmap(&mut addr_space.table.utable, InactiveFlusher::new()); Ok(()) } @@ -228,6 +222,9 @@ impl UserInner { _ => println!("Unknown scheme -> kernel message {}", packet.a) } } else { + // The motivation of doing this here instead of within the fmap handler, is that we + // can operate on an inactive table. This reduces the number of page table reloads + // from two (context switch + active TLB flush) to one (context switch). if let Some((context_weak, desc, map)) = self.fmap.lock().remove(&packet.id) { if let Ok(address) = Error::demux(packet.a) { if address % PAGE_SIZE > 0 { @@ -238,8 +235,8 @@ impl UserInner { if let Ok(grant_address) = res { if let Some(context_lock) = context_weak.upgrade() { let context = context_lock.read(); - let mut grants = context.grants.write(); - grants.funmap.insert( + let mut addr_space = context.addr_space()?.write(); + addr_space.grants.funmap.insert( Region::new(grant_address, map.size), VirtualAddress::new(address) ); @@ -269,6 +266,54 @@ impl UserInner { pub fn fsync(&self) -> Result { Ok(0) } + + fn fmap_inner(&self, file: usize, map: &Map) -> Result { + let (pid, uid, gid, context_weak, desc) = { + let context_lock = Arc::clone(context::contexts().current().ok_or(Error::new(ESRCH))?); + let context = context_lock.read(); + if map.size % PAGE_SIZE != 0 { + log::warn!("Unaligned map size for context {:?}", context.name.try_read().as_deref()); + } + // TODO: Faster, cleaner mechanism to get descriptor + let scheme = self.scheme_id.load(Ordering::SeqCst); + let mut desc_res = Err(Error::new(EBADF)); + for context_file_opt in context.files.read().iter() { + if let Some(context_file) = context_file_opt { + let (context_scheme, context_number) = { + let desc = context_file.description.read(); + (desc.scheme, desc.number) + }; + if context_scheme == scheme && context_number == file { + desc_res = Ok(context_file.clone()); + break; + } + } + } + let desc = desc_res?; + (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) + }; + + let address = self.capture(map)?; + + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + + self.fmap.lock().insert(id, (context_weak, desc, *map)); + + let result = self.call_inner(Packet { + id, + pid: pid.into(), + uid, + gid, + a: SYS_FMAP, + b: file, + c: address, + d: mem::size_of::() + }); + + let _ = self.release(address); + + result + } } /// `UserInner` has to be wrapped @@ -376,135 +421,10 @@ impl Scheme for UserScheme { inner.call(SYS_FEVENT, file, flags.bits(), 0).map(EventFlags::from_bits_truncate) } - fn fmap_old(&self, file: usize, map: &OldMap) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - - let (pid, uid, gid, context_lock, desc) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - // TODO: Faster, cleaner mechanism to get descriptor - let scheme = inner.scheme_id.load(Ordering::SeqCst); - let mut desc_res = Err(Error::new(EBADF)); - for context_file_opt in context.files.read().iter() { - if let Some(context_file) = context_file_opt { - let (context_scheme, context_number) = { - let desc = context_file.description.read(); - (desc.scheme, desc.number) - }; - if context_scheme == scheme && context_number == file { - desc_res = Ok(context_file.clone()); - break; - } - } - } - let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) - }; - - let address = inner.capture(map)?; - - let id = inner.next_id.fetch_add(1, Ordering::SeqCst); - - inner.fmap.lock().insert(id, (context_lock, desc, Map { - offset: map.offset, - size: map.size, - flags: map.flags, - address: 0, - })); - - let result = inner.call_inner(Packet { - id, - pid: pid.into(), - uid, - gid, - a: SYS_FMAP_OLD, - b: file, - c: address, - d: mem::size_of::() - }); - - let _ = inner.release(address); - - result - } - fn fmap(&self, file: usize, map: &Map) -> Result { let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let (pid, uid, gid, context_lock, desc) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - // TODO: Faster, cleaner mechanism to get descriptor - let scheme = inner.scheme_id.load(Ordering::SeqCst); - let mut desc_res = Err(Error::new(EBADF)); - for context_file_opt in context.files.read().iter() { - if let Some(context_file) = context_file_opt { - let (context_scheme, context_number) = { - let desc = context_file.description.read(); - (desc.scheme, desc.number) - }; - if context_scheme == scheme && context_number == file { - desc_res = Ok(context_file.clone()); - break; - } - } - } - let desc = desc_res?; - (context.id, context.euid, context.egid, Arc::downgrade(&context_lock), desc) - }; - - let address = inner.capture(map)?; - - let id = inner.next_id.fetch_add(1, Ordering::SeqCst); - - inner.fmap.lock().insert(id, (context_lock, desc, *map)); - - let result = inner.call_inner(Packet { - id, - pid: pid.into(), - uid, - gid, - a: SYS_FMAP, - b: file, - c: address, - d: mem::size_of::() - }); - - let _ = inner.release(address); - - result - } - - fn funmap_old(&self, grant_address: usize) -> Result { - let inner = self.inner.upgrade().ok_or(Error::new(ENODEV))?; - let address_opt = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let mut grants = context.grants.write(); - let funmap = &mut grants.funmap; - let entry = funmap.range(..=Region::byte(VirtualAddress::new(grant_address))).next_back(); - - let grant_address = VirtualAddress::new(grant_address); - - if let Some((&grant, &user_base)) = entry { - if grant_address >= grant.end_address() { - return Err(Error::new(EINVAL)); - } - funmap.remove(&grant); - let user = Region::new(user_base, grant.size()); - Some(grant.rebase(user, grant_address).data()) - } else { - None - } - }; - if let Some(user_address) = address_opt { - inner.call(SYS_FUNMAP_OLD, user_address, 0, 0) - } else { - Err(Error::new(EINVAL)) - } + inner.fmap_inner(file, map) } fn funmap(&self, grant_address: usize, size: usize) -> Result { @@ -513,8 +433,8 @@ impl Scheme for UserScheme { let contexts = context::contexts(); let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; let context = context_lock.read(); - let mut grants = context.grants.write(); - let funmap = &mut grants.funmap; + let mut addr_space = context.addr_space()?.write(); + let funmap = &mut addr_space.grants.funmap; let entry = funmap.range(..=Region::byte(VirtualAddress::new(grant_address))).next_back(); let grant_address = VirtualAddress::new(grant_address); @@ -606,3 +526,4 @@ impl Scheme for UserScheme { inner.call(SYS_CLOSE, file, 0, 0) } } +impl crate::scheme::KernelScheme for UserScheme {} diff --git a/src/syscall/debug.rs b/src/syscall/debug.rs index 4567058d1177ce627cfe989dc66f15a152faddb9..53e6ac517bf2a9eb3dce32e491c1d6105b6c3590 100644 --- a/src/syscall/debug.rs +++ b/src/syscall/debug.rs @@ -1,8 +1,7 @@ use core::{ascii, mem}; use alloc::string::String; -use alloc::vec::Vec; -use super::data::{OldMap, Map, Stat, TimeSpec}; +use super::data::{Map, Stat, TimeSpec}; use super::flag::*; use super::number::*; use super::validate::*; @@ -106,14 +105,6 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - c, d ), - SYS_FMAP_OLD => format!( - "fmap_old({}, {:?})", - b, - validate_slice( - c as *const OldMap, - d/mem::size_of::() - ), - ), SYS_FMAP => format!( "fmap({}, {:?})", b, @@ -122,10 +113,6 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - d/mem::size_of::() ), ), - SYS_FUNMAP_OLD => format!( - "funmap_old({:#X})", - b - ), SYS_FUNMAP => format!( "funmap({:#X}, {:#X})", b, @@ -183,37 +170,10 @@ pub fn format_call(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize) - b, validate_slice_mut(c as *mut TimeSpec, 1) ), - SYS_CLONE => format!( - "clone({:?})", - CloneFlags::from_bits(b) - ), SYS_EXIT => format!( "exit({})", b ), - //TODO: Cleanup, do not allocate - SYS_FEXEC => format!( - "fexec({}, {:?}, {:?})", - b, - validate_slice( - c as *const [usize; 2], - d - ).map(|slice| { - slice.iter().map(|a| - validate_slice(a[0] as *const u8, a[1]).ok() - .and_then(|s| ::core::str::from_utf8(s).ok()) - ).collect::>>() - }), - validate_slice( - e as *const [usize; 2], - f - ).map(|slice| { - slice.iter().map(|a| - validate_slice(a[0] as *const u8, a[1]).ok() - .and_then(|s| ::core::str::from_utf8(s).ok()) - ).collect::>>() - }) - ), SYS_FUTEX => format!( "futex({:#X} [{:?}], {}, {}, {}, {})", b, diff --git a/src/syscall/driver.rs b/src/syscall/driver.rs index 1e64e1bb4e58e5ed2b04267c7e32d472fcac5498..9440dd09cc2ea7d824125f8b3b45b85a57c7eb89 100644 --- a/src/syscall/driver.rs +++ b/src/syscall/driver.rs @@ -1,12 +1,14 @@ use crate::interrupt::InterruptStack; -use crate::memory::{allocate_frames_complex, deallocate_frames, Frame}; -use crate::paging::{ActivePageTable, PageFlags, PhysicalAddress, VirtualAddress}; +use crate::memory::{allocate_frames_complex, deallocate_frames, Frame, PAGE_SIZE}; +use crate::paging::{Page, PageFlags, PhysicalAddress, VirtualAddress, mapper::PageFlushAll}; use crate::paging::entry::EntryFlags; use crate::context; -use crate::context::memory::{Grant, Region}; +use crate::context::memory::{DANGLING, Grant, Region}; use crate::syscall::error::{Error, EFAULT, EINVAL, ENOMEM, EPERM, ESRCH, Result}; use crate::syscall::flag::{PhysallocFlags, PartialAllocStrategy, PhysmapFlags, PHYSMAP_WRITE, PHYSMAP_WRITE_COMBINE, PHYSMAP_NO_CACHE}; +use alloc::sync::Arc; + fn enforce_root() -> Result<()> { let contexts = context::contexts(); let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; @@ -71,22 +73,21 @@ pub fn physfree(physical_address: usize, size: usize) -> Result { } //TODO: verify exlusive access to physical memory +// TODO: Replace this completely with something such as `memory:physical`. Mmapping at offset +// `physaddr` to `address` (optional) will map that physical address. We would have to find out +// some way to pass flags such as WRITE_COMBINE/NO_CACHE however. pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { - //TODO: Abstract with other grant creation - if size == 0 { - Ok(0) - } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + // TODO: Check physical_address against MAXPHYADDR. - let mut grants = context.grants.write(); + let end = 1 << 52; + if physical_address.saturating_add(size) > end || physical_address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 { + return Err(Error::new(EINVAL)); + } - let from_address = (physical_address/4096) * 4096; - let offset = physical_address - from_address; - let full_size = ((offset + size + 4095)/4096) * 4096; - let mut to_address = crate::USER_GRANT_OFFSET; + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let mut addr_space = addr_space.write(); + addr_space.mmap(None, size / PAGE_SIZE, Default::default(), |dst_page, _, dst_mapper, dst_flusher| { let mut page_flags = PageFlags::new().user(true); if flags.contains(PHYSMAP_WRITE) { page_flags = page_flags.write(true); @@ -98,30 +99,18 @@ pub fn inner_physmap(physical_address: usize, size: usize, flags: PhysmapFlags) if flags.contains(PHYSMAP_NO_CACHE) { page_flags = page_flags.custom_flag(EntryFlags::NO_CACHE.bits(), true); } + Grant::physmap( + Frame::containing_address(PhysicalAddress::new(physical_address)), + dst_page, + size / PAGE_SIZE, + page_flags, + dst_mapper, + dst_flusher, + ) + }).map(|page| page.start_address().data()) - // TODO: Make this faster than Sonic himself by using le superpowers of BTreeSet - - for grant in grants.iter() { - let start = grant.start_address().data(); - if to_address + full_size < start { - break; - } - - let pages = (grant.size() + 4095) / 4096; - let end = start + pages * 4096; - to_address = end; - } - - grants.insert(Grant::physmap( - PhysicalAddress::new(from_address), - VirtualAddress::new(to_address), - full_size, - page_flags - )); - - Ok(to_address + offset) - } } +// TODO: Remove this syscall, funmap makes it redundant. pub fn physmap(physical_address: usize, size: usize, flags: PhysmapFlags) -> Result { enforce_root()?; inner_physmap(physical_address, size, flags) @@ -131,14 +120,12 @@ pub fn inner_physunmap(virtual_address: usize) -> Result { if virtual_address == 0 { Ok(0) } else { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let mut addr_space = addr_space.write(); - let mut grants = context.grants.write(); + if let Some(region) = addr_space.grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - if let Some(region) = grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - grants.take(®ion).unwrap().unmap(); + addr_space.grants.take(®ion).unwrap().unmap(&mut addr_space.table.utable, PageFlushAll::new()); return Ok(0); } @@ -153,10 +140,11 @@ pub fn physunmap(virtual_address: usize) -> Result { pub fn virttophys(virtual_address: usize) -> Result { enforce_root()?; - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(virtual_address).kind()) }; + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let addr_space = addr_space.read(); - match active_table.translate(VirtualAddress::new(virtual_address)) { - Some(physical_address) => Ok(physical_address.data()), + match addr_space.table.utable.translate(VirtualAddress::new(virtual_address)) { + Some((physical_address, _)) => Ok(physical_address.data()), None => Err(Error::new(EFAULT)) } } diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index 642a80af6e9023679267aad7371b527f30faadc2..353eab533ce37bc1915887531f179e08174c17ac 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -1,15 +1,10 @@ //! Filesystem syscalls use alloc::sync::Arc; -use alloc::vec::Vec; use core::str; -use core::sync::atomic::Ordering; use spin::RwLock; use crate::context::file::{FileDescriptor, FileDescription}; -use crate::context::memory::Region; use crate::context; -use crate::memory::PAGE_SIZE; -use crate::paging::VirtualAddress; use crate::scheme::{self, FileHandle}; use crate::syscall::data::{Packet, Stat}; use crate::syscall::error::*; @@ -469,103 +464,11 @@ pub fn fstat(fd: FileHandle, stat: &mut Stat) -> Result { scheme.fstat(description.number, stat) } -pub fn funmap_old(virtual_address: usize) -> Result { - if virtual_address == 0 { - Ok(0) - } else { - let mut desc_opt = None; - - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.write(); - - if let Some(region) = grants.contains(VirtualAddress::new(virtual_address)).map(Region::from) { - let mut grant = grants.take(®ion).unwrap(); - desc_opt = grant.desc_opt.take(); - grant.unmap(); - } - } - - if let Some(file_ref) = desc_opt { - let scheme_id = { file_ref.desc.description.read().scheme }; - - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(scheme_id).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - let res = scheme.funmap_old(virtual_address); - - let _ = file_ref.desc.close(); - - res - } else { - Err(Error::new(EFAULT)) - } - } -} - pub fn funmap(virtual_address: usize, length: usize) -> Result { - if virtual_address == 0 || length == 0 { - return Ok(0); - } else if virtual_address % PAGE_SIZE != 0 { - return Err(Error::new(EINVAL)); - } - - let mut notify_files = Vec::new(); + let (page, page_count) = crate::syscall::validate::validate_region(virtual_address, length)?; - let virtual_address = VirtualAddress::new(virtual_address); - let requested = Region::new(virtual_address, length); - - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - let mut grants = context.grants.write(); - - let conflicting: Vec = grants.conflicts(requested).map(Region::from).collect(); - - for conflict in conflicting { - let grant = grants.take(&conflict).expect("conflicting region didn't exist"); - let intersection = grant.intersect(requested); - let (before, mut grant, after) = grant.extract(intersection.round()).expect("conflicting region shared no common parts"); - - // Notify scheme that holds grant - if let Some(file_desc) = grant.desc_opt.take() { - notify_files.push((file_desc, intersection)); - } - - // Keep untouched regions - if let Some(before) = before { - grants.insert(before); - } - if let Some(after) = after { - grants.insert(after); - } - - // Remove irrelevant region - grant.unmap(); - } - } - - for (file_ref, intersection) in notify_files { - let scheme_id = { file_ref.desc.description.read().scheme }; - - let scheme = { - let schemes = scheme::schemes(); - let scheme = schemes.get(scheme_id).ok_or(Error::new(EBADF))?; - scheme.clone() - }; - let res = scheme.funmap(intersection.start_address().data(), intersection.size()); - - let _ = file_ref.desc.close(); - - res?; - } + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + addr_space.write().munmap(page, page_count); Ok(0) } diff --git a/src/syscall/futex.rs b/src/syscall/futex.rs index 53fd2837ff6457130e7ec1bcab7a3901f730c3e4..b3fde4bbe240cc529bb24f9bd44c943ec57cb48e 100644 --- a/src/syscall/futex.rs +++ b/src/syscall/futex.rs @@ -12,7 +12,7 @@ use rmm::Arch; use crate::context::{self, Context}; use crate::time; use crate::memory::PhysicalAddress; -use crate::paging::{ActivePageTable, TableKind, VirtualAddress}; +use crate::paging::VirtualAddress; use crate::syscall::data::TimeSpec; use crate::syscall::error::{Error, Result, ESRCH, EAGAIN, EFAULT, EINVAL}; use crate::syscall::flag::{FUTEX_WAIT, FUTEX_WAIT64, FUTEX_WAKE, FUTEX_REQUEUE}; @@ -44,8 +44,9 @@ pub fn futexes_mut() -> RwLockWriteGuard<'static, FutexList> { } pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> Result { - let target_physaddr = unsafe { - let active_table = ActivePageTable::new(TableKind::User); + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + + let (target_physaddr, _) = unsafe { let virtual_address = VirtualAddress::new(addr); if !crate::CurrentRmmArch::virt_is_valid(virtual_address) { @@ -58,7 +59,7 @@ pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> R return Err(Error::new(EFAULT)); } - active_table.translate(virtual_address).ok_or(Error::new(EFAULT))? + addr_space.read().table.utable.translate(virtual_address).ok_or(Error::new(EFAULT))? }; match op { @@ -162,7 +163,7 @@ pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> R Ok(woken) }, FUTEX_REQUEUE => { - let addr2_physaddr = unsafe { + let (addr2_physaddr, _) = unsafe { let addr2_virt = VirtualAddress::new(addr2); if !crate::CurrentRmmArch::virt_is_valid(addr2_virt) { @@ -175,8 +176,7 @@ pub fn futex(addr: usize, op: usize, val: usize, val2: usize, addr2: usize) -> R return Err(Error::new(EFAULT)); } - let active_table = ActivePageTable::new(TableKind::User); - active_table.translate(addr2_virt).ok_or(Error::new(EFAULT))? + addr_space.read().table.utable.translate(addr2_virt).ok_or(Error::new(EFAULT))? }; let mut woken = 0; diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs index cb64c4c18a4b5cefc076a51888501b8d11dacd83..613bb00f4738e8238c3d6de94a314765485b79c0 100644 --- a/src/syscall/mod.rs +++ b/src/syscall/mod.rs @@ -25,9 +25,11 @@ pub use self::process::*; pub use self::time::*; pub use self::validate::*; +use self::scheme::Scheme as _; + use self::data::{Map, SigAction, Stat, TimeSpec}; -use self::error::{Error, Result, ENOSYS}; -use self::flag::{CloneFlags, MapFlags, PhysmapFlags, WaitFlags}; +use self::error::{Error, Result, ENOSYS, EINVAL}; +use self::flag::{MapFlags, PhysmapFlags, WaitFlags}; use self::number::*; use crate::context::ContextId; @@ -70,7 +72,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u match a & SYS_ARG { SYS_ARG_SLICE => match a { SYS_FMAP if b == !0 => { - MemoryScheme::fmap_anonymous(unsafe { validate_ref(c as *const Map, d)? }) + MemoryScheme.fmap(!0, unsafe { validate_ref(c as *const Map, d)? }) }, _ => file_op_slice(a, fd, validate_slice(c as *const u8, d)?), } @@ -83,27 +85,8 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_DUP => dup(fd, validate_slice(c as *const u8, d)?).map(FileHandle::into), SYS_DUP2 => dup2(fd, FileHandle::from(c), validate_slice(d as *const u8, e)?).map(FileHandle::into), SYS_FCNTL => fcntl(fd, c, d), - SYS_FEXEC => fexec(fd, validate_slice(c as *const [usize; 2], d)?, validate_slice(e as *const [usize; 2], f)?), SYS_FRENAME => frename(fd, validate_str(c as *const u8, d)?), SYS_FUNMAP => funmap(b, c), - SYS_FMAP_OLD => { - { - let contexts = crate::context::contexts(); - let current = contexts.current().unwrap(); - let current = current.read(); - println!("{:?} using deprecated fmap(...) call", *current.name.read()); - } - file_op(a, fd, c, d) - }, - SYS_FUNMAP_OLD => { - { - let contexts = crate::context::contexts(); - let current = contexts.current().unwrap(); - let current = current.read(); - println!("{:?} using deprecated funmap(...) call", *current.name.read()); - } - funmap_old(b) - }, _ => file_op(a, fd, c, d) } } @@ -130,27 +113,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u SYS_GETPID => getpid().map(ContextId::into), SYS_GETPGID => getpgid(ContextId::from(b)).map(ContextId::into), SYS_GETPPID => getppid().map(ContextId::into), - SYS_CLONE => { - let b = CloneFlags::from_bits_truncate(b); - - #[cfg(not(target_arch = "x86_64"))] - { - //TODO: CLONE_STACK - let ret = clone(b, bp).map(ContextId::into); - ret - } - #[cfg(target_arch = "x86_64")] - { - let old_rsp = stack.iret.rsp; - if b.contains(flag::CLONE_STACK) { - stack.iret.rsp = c; - } - let ret = clone(b, bp).map(ContextId::into); - stack.iret.rsp = old_rsp; - ret - } - }, SYS_EXIT => exit((b & 0xFF) << 8), SYS_KILL => kill(ContextId::from(b), c), SYS_WAITPID => waitpid(ContextId::from(b), c, WaitFlags::from_bits_truncate(d)).map(ContextId::into), @@ -210,8 +173,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - /* - let debug = { + /*let debug = { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { let context = context_lock.read(); @@ -240,8 +202,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } println!("{}", debug::format_call(a, b, c, d, e, f)); - } - */ + }*/ // The next lines set the current syscall in the context struct, then once the inner() function // completes, we set the current syscall to none. @@ -266,8 +227,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u } } - /* - if debug { + /*if debug { let contexts = crate::context::contexts(); if let Some(context_lock) = contexts.current() { let context = context_lock.read(); @@ -284,8 +244,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, bp: u println!("Err({} ({:#X}))", err, err.errno); } } - } - */ + }*/ // errormux turns Result into -errno Error::mux(result) diff --git a/src/syscall/process.rs b/src/syscall/process.rs index c246568d76b7699d97fb49cda52cc55ffaefdf48..523d03b658113111fd54d2603064948910995f1b 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -1,599 +1,52 @@ use alloc::{ - boxed::Box, - collections::BTreeSet, - string::String, sync::Arc, vec::Vec, }; -use core::alloc::{GlobalAlloc, Layout}; -use core::ops::DerefMut; -use core::{intrinsics, mem, str}; +use core::mem; + use spin::{RwLock, RwLockWriteGuard}; -use crate::context::file::{FileDescription, FileDescriptor}; -use crate::context::memory::{UserGrants, Region}; -use crate::context::{Context, ContextId, WaitpidKey}; +use crate::context::{Context, ContextId, memory::AddrSpace, WaitpidKey}; + +use crate::Bootstrap; use crate::context; -#[cfg(not(feature="doc"))] -use crate::elf::{self, program_header}; use crate::interrupt; -use crate::ipi::{ipi, IpiKind, IpiTarget}; -use crate::memory::allocate_frames; -use crate::paging::mapper::PageFlushAll; -use crate::paging::{ActivePageTable, InactivePageTable, Page, PageFlags, TableKind, VirtualAddress, PAGE_SIZE}; -use crate::{ptrace, syscall}; -use crate::scheme::FileHandle; +use crate::paging::mapper::{Flusher, InactiveFlusher, PageFlushAll}; +use crate::paging::{Page, PageFlags, VirtualAddress, PAGE_SIZE}; +use crate::ptrace; use crate::start::usermode; -use crate::syscall::data::{SigAction, Stat}; +use crate::syscall::data::SigAction; use crate::syscall::error::*; -use crate::syscall::flag::{wifcontinued, wifstopped, AT_ENTRY, AT_NULL, AT_PHDR, AT_PHENT, AT_PHNUM, CloneFlags, - CLONE_FILES, CLONE_FS, CLONE_SIGHAND, CLONE_STACK, CLONE_VFORK, CLONE_VM, - MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, PTRACE_EVENT_CLONE, - PTRACE_STOP_EXIT, SigActionFlags, SIG_BLOCK, SIG_DFL, SIG_SETMASK, SIG_UNBLOCK, - SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED}; +use crate::syscall::flag::{wifcontinued, wifstopped, MapFlags, PROT_EXEC, PROT_READ, PROT_WRITE, + PTRACE_STOP_EXIT, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, + SIGCONT, SIGTERM, WaitFlags, WCONTINUED, WNOHANG, WUNTRACED}; use crate::syscall::ptrace_event; -use crate::syscall::validate::{validate_slice, validate_slice_mut}; - -pub fn clone(flags: CloneFlags, stack_base: usize) -> Result { - let ppid; - let pid; - { - let pgid; - let ruid; - let rgid; - let rns; - let euid; - let egid; - let ens; - let umask; - let sigmask; - let mut cpu_id_opt = None; - let arch; - let vfork; - let mut kfx_opt = None; - let mut kstack_opt = None; - let mut offset = 0; - let mut image = vec![]; - let mut stack_opt = None; - let mut sigstack_opt = None; - let mut grants; - let name; - let cwd; - let files; - let actions; - - // Copy from old process - { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - - ppid = context.id; - pgid = context.pgid; - ruid = context.ruid; - rgid = context.rgid; - rns = context.rns; - euid = context.euid; - egid = context.egid; - ens = context.ens; - sigmask = context.sigmask; - umask = context.umask; - - // Uncomment to disable threads on different CPUs - //TODO: fix memory allocation races when this is removed - if flags.contains(CLONE_VM) { - cpu_id_opt = context.cpu_id; - } - - arch = context.arch.clone(); - - if let Some(ref fx) = context.kfx { - let new_fx = unsafe { - let new_fx_ptr = crate::ALLOCATOR.alloc(Layout::from_size_align_unchecked(1024, 16)); - if new_fx_ptr.is_null() { - // FIXME: It's mildly ironic that the only place where clone can fail with - // ENOMEM, is when copying 1024 bytes to merely store vector registers. - // Although in order to achieve full kernel-panic immunity, we'll need to - // completely phase out all usage of liballoc data structures, and use our - // own library/port liballoc, since panicking on OOM is not good for a - // kernel. - return Err(Error::new(ENOMEM)); - } - new_fx_ptr.copy_from_nonoverlapping(fx.as_ptr(), fx.len()); - Box::from_raw(new_fx_ptr as *mut [u8; 1024]) - }; - kfx_opt = Some(new_fx); - } - - #[cfg(target_arch = "x86_64")] - { - if let Some(ref stack) = context.kstack { - // Get the relative offset to the return address of the function - // obtaining `stack_base`. - // - // (base pointer - start of stack) - one - offset = stack_base - stack.as_ptr() as usize - mem::size_of::(); // Add clone ret - let mut new_stack = stack.clone(); - - unsafe { - // Set clone's return value to zero. This is done because - // the clone won't return like normal, which means the value - // would otherwise never get set. - if let Some(regs) = ptrace::rebase_regs_ptr_mut(context.regs, Some(&mut new_stack)) { - (*regs).scratch.rax = 0; - } - - // Change the return address of the child (previously - // syscall) to the arch-specific clone_ret callback - let func_ptr = new_stack.as_mut_ptr().add(offset); - *(func_ptr as *mut usize) = interrupt::syscall::clone_ret as usize; - } - - kstack_opt = Some(new_stack); - } - } - - #[cfg(not(target_arch = "x86_64"))] - { - if let Some(ref stack) = context.kstack { - offset = stack_base - stack.as_ptr() as usize; - let mut new_stack = stack.clone(); - - kstack_opt = Some(new_stack); - } - } - - if flags.contains(CLONE_VM) { - for memory_shared in context.image.iter() { - image.push(memory_shared.clone()); - } - } else { - for memory_shared in context.image.iter() { - memory_shared.with(|memory| { - let mut new_memory = context::memory::Memory::new( - VirtualAddress::new(memory.start_address().data() + crate::USER_TMP_OFFSET), - memory.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(memory.start_address().data() as *const u8, - new_memory.start_address().data() as *mut u8, - memory.size()); - } - - new_memory.remap(memory.flags()); - image.push(new_memory.to_shared()); - }); - } - } - - if let Some(ref stack_shared) = context.stack { - if flags.contains(CLONE_STACK) { - stack_opt = Some(stack_shared.clone()); - } else { - stack_shared.with(|stack| { - let mut new_stack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_STACK_OFFSET), - stack.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(stack.start_address().data() as *const u8, - new_stack.start_address().data() as *mut u8, - stack.size()); - } - - new_stack.remap(stack.flags()); - stack_opt = Some(new_stack.to_shared()); - }); - } - } - - if let Some(ref sigstack) = context.sigstack { - let mut new_sigstack = context::memory::Memory::new( - VirtualAddress::new(crate::USER_TMP_SIGSTACK_OFFSET), - sigstack.size(), - PageFlags::new().write(true), - false - ); - - unsafe { - intrinsics::copy(sigstack.start_address().data() as *const u8, - new_sigstack.start_address().data() as *mut u8, - sigstack.size()); - } - - new_sigstack.remap(sigstack.flags()); - sigstack_opt = Some(new_sigstack); - } - - if flags.contains(CLONE_VM) { - grants = Arc::clone(&context.grants); - } else { - let mut grants_set = UserGrants::default(); - for grant in context.grants.read().iter() { - let start = VirtualAddress::new(grant.start_address().data() + crate::USER_TMP_GRANT_OFFSET - crate::USER_GRANT_OFFSET); - grants_set.insert(grant.secret_clone(start)); - } - grants = Arc::new(RwLock::new(grants_set)); - } - - if flags.contains(CLONE_VM) { - name = Arc::clone(&context.name); - } else { - name = Arc::new(RwLock::new(context.name.read().clone())); - } - - if flags.contains(CLONE_FS) { - cwd = Arc::clone(&context.cwd); - } else { - cwd = Arc::new(RwLock::new(context.cwd.read().clone())); - } - - if flags.contains(CLONE_FILES) { - files = Arc::clone(&context.files); - } else { - files = Arc::new(RwLock::new(context.files.read().clone())); - } - - if flags.contains(CLONE_SIGHAND) { - actions = Arc::clone(&context.actions); - } else { - actions = Arc::new(RwLock::new(context.actions.read().clone())); - } - } - - // If not cloning files, dup to get a new number from scheme - // This has to be done outside the context lock to prevent deadlocks - if !flags.contains(CLONE_FILES) { - for (_fd, file_opt) in files.write().iter_mut().enumerate() { - let new_file_opt = if let Some(ref file) = *file_opt { - Some(FileDescriptor { - description: Arc::clone(&file.description), - cloexec: file.cloexec, - }) - } else { - None - }; - - *file_opt = new_file_opt; - } - } - - // If not cloning virtual memory, use fmap to re-obtain every grant where possible - if !flags.contains(CLONE_VM) { - let grants = Arc::get_mut(&mut grants).ok_or(Error::new(EBUSY))?.get_mut(); - let old_grants = mem::take(&mut grants.inner); - - // TODO: Find some way to do this without having to allocate. - - // TODO: Check that the current process is not allowed to serve any scheme this logic - // could interfere with. Deadlocks would otherwise seem inevitable. - - for mut grant in old_grants.into_iter() { - let region = *grant.region(); - let address = region.start_address().data(); - let size = region.size(); - - let new_grant = if let Some(ref mut file_ref) = grant.desc_opt.take() { - // TODO: Technically this is redundant as the grants are already secret_cloned. - // Maybe grants with fds can be excluded from that step? - grant.unmap(); - - let FileDescription { scheme, number, .. } = { *file_ref.desc.description.read() }; - let scheme_arc = match crate::scheme::schemes().get(scheme) { - Some(s) => Arc::clone(s), - None => continue, - }; - let map = crate::syscall::data::Map { - address, - size, - offset: file_ref.offset, - flags: file_ref.flags | MapFlags::MAP_FIXED_NOREPLACE, - }; - - let ptr = match scheme_arc.fmap(number, &map) { - Ok(new_range) => new_range as *mut u8, - Err(_) => continue, - }; - - // This will eventually be freed from the parent context after move_to is - // called. - context::contexts().current().ok_or(Error::new(ESRCH))? - .read().grants.write() - .take(&Region::new(VirtualAddress::new(ptr as usize), map.size)) - .ok_or(Error::new(EFAULT))? - } else { - grant - }; - grants.insert(new_grant); - } - } - - // If vfork, block the current process - // This has to be done after the operations that may require context switches - if flags.contains(CLONE_VFORK) { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let mut context = context_lock.write(); - context.block("vfork"); - vfork = true; - } else { - vfork = false; - } - - // Set up new process - { - let mut contexts = context::contexts_mut(); - let context_lock = contexts.new_context()?; - let mut context = context_lock.write(); - - pid = context.id; - - context.pgid = pgid; - context.ppid = ppid; - context.ruid = ruid; - context.rgid = rgid; - context.rns = rns; - context.euid = euid; - context.egid = egid; - context.ens = ens; - context.sigmask = sigmask; - context.umask = umask; - - //TODO: Better CPU balancing - if let Some(cpu_id) = cpu_id_opt { - context.cpu_id = Some(cpu_id); - } else { - context.cpu_id = Some(pid.into() % crate::cpu_count()); - } - - context.status = context::Status::Runnable; - - context.vfork = vfork; - - context.arch = arch; - - // This is needed because these registers may have changed after this context was - // switched to, but before this was called. - #[cfg(all(target_arch = "x86_64", feature = "x86_fsgsbase"))] - unsafe { - context.arch.fsbase = x86::bits64::segmentation::rdfsbase() as usize; - context.arch.gsbase = x86::bits64::segmentation::rdgsbase() as usize; - } - - let mut active_utable = unsafe { ActivePageTable::new(TableKind::User) }; - let active_ktable = unsafe { ActivePageTable::new(TableKind::Kernel) }; - - let mut new_utable = unsafe { - let frame = allocate_frames(1).ok_or(Error::new(ENOMEM))?; - // SAFETY: This is safe because the frame is exclusive, owned, and valid, as we - // have just allocated it. - InactivePageTable::new(&mut active_utable, frame) - }; - context.arch.set_page_utable(unsafe { new_utable.address() }); - - #[cfg(target_arch = "aarch64")] - let mut new_ktable = { - let mut new_ktable = { - let frame = allocate_frames(1).expect("no more frames in syscall::clone new_table"); - InactivePageTable::new(frame, &mut active_ktable) - }; - context.arch.set_page_ktable(unsafe { new_ktable.address() }); - new_ktable - }; - - #[cfg(not(target_arch = "aarch64"))] - let mut new_ktable = unsafe { - InactivePageTable::from_address(new_utable.address()) - }; - - // Copy kernel image mapping - { - let frame = active_ktable.p4()[crate::KERNEL_PML4].pointed_frame().expect("kernel image not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PML4].flags(); - - new_ktable.mapper().p4_mut()[crate::KERNEL_PML4].set(frame, flags); - } - - // Copy kernel heap mapping - { - let frame = active_ktable.p4()[crate::KERNEL_HEAP_PML4].pointed_frame().expect("kernel heap not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_HEAP_PML4].flags(); - - new_ktable.mapper().p4_mut()[crate::KERNEL_HEAP_PML4].set(frame, flags); - } - - // Copy physmap mapping - { - let frame = active_ktable.p4()[crate::PHYS_PML4].pointed_frame().expect("physmap not mapped"); - let flags = active_ktable.p4()[crate::PHYS_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::PHYS_PML4].set(frame, flags); - } - // Copy kernel percpu (similar to TLS) mapping. - { - let frame = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].pointed_frame().expect("kernel TLS not mapped"); - let flags = active_ktable.p4()[crate::KERNEL_PERCPU_PML4].flags(); - new_ktable.mapper().p4_mut()[crate::KERNEL_PERCPU_PML4].set(frame, flags); - } - - if let Some(fx) = kfx_opt.take() { - context.arch.set_fx(fx.as_ptr() as usize); - context.kfx = Some(fx); - } - - // Set kernel stack - if let Some(stack) = kstack_opt.take() { - context.arch.set_stack(stack.as_ptr() as usize + offset); - context.kstack = Some(stack); - #[cfg(target_arch = "aarch64")] - { - context.arch.set_lr(interrupt::syscall::clone_ret as usize); - } - } - - // TODO: Clone ksig? - - // Setup image, heap, and grants - if flags.contains(CLONE_VM) { - // Copy user image mapping, if found - if ! image.is_empty() { - let frame = active_utable.p4()[crate::USER_PML4].pointed_frame().expect("user image not mapped"); - let flags = active_utable.p4()[crate::USER_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_PML4].set(frame, flags); - } - context.image = image; - - // Copy grant mapping - if ! grants.read().is_empty() { - let frame = active_utable.p4()[crate::USER_GRANT_PML4].pointed_frame().expect("user grants not mapped"); - let flags = active_utable.p4()[crate::USER_GRANT_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_GRANT_PML4].set(frame, flags); - } - context.grants = grants; - } else { - // Move copy of image - for memory_shared in image.iter_mut() { - memory_shared.with(|memory| { - let start = VirtualAddress::new(memory.start_address().data() - crate::USER_TMP_OFFSET + crate::USER_OFFSET); - memory.move_to(start, &mut new_utable); - }); - } - context.image = image; - - // Move grants - { - let mut grants = grants.write(); - let old_grants = mem::replace(&mut *grants, UserGrants::default()); - - for mut grant in old_grants.inner.into_iter() { - let start = VirtualAddress::new(grant.start_address().data() + crate::USER_GRANT_OFFSET - crate::USER_TMP_GRANT_OFFSET); - grant.move_to(start, &mut new_utable); - grants.insert(grant); - } - } - context.grants = grants; - } - - // Setup user stack - if let Some(stack_shared) = stack_opt { - if flags.contains(CLONE_STACK) { - let frame = active_utable.p4()[crate::USER_STACK_PML4].pointed_frame().expect("user stack not mapped"); - let flags = active_utable.p4()[crate::USER_STACK_PML4].flags(); - - new_utable.mapper().p4_mut()[crate::USER_STACK_PML4].set(frame, flags); - } else { - stack_shared.with(|stack| { - stack.move_to(VirtualAddress::new(crate::USER_STACK_OFFSET), &mut new_utable); - }); - } - context.stack = Some(stack_shared); - } - - // Setup user sigstack - if let Some(mut sigstack) = sigstack_opt { - sigstack.move_to(VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), &mut new_utable); - context.sigstack = Some(sigstack); - } - - #[cfg(target_arch = "aarch64")] - { - if let Some(stack) = &mut context.kstack { - unsafe { - // stack_base contains a pointer to InterruptStack. Get its offset from - // stack_base itself - let istack_offset = *(stack_base as *const u64) - stack_base as u64; - - // Get the top of the new process' stack - let new_sp = stack.as_mut_ptr().add(offset); - - // Update the pointer to the InterruptStack to reflect the new process' - // stack. (Without this the pointer would be InterruptStack on the parent - // process' stack). - *(new_sp as *mut u64) = new_sp as u64 + istack_offset; - - // Update tpidr_el0 in the new process' InterruptStack - let mut interrupt_stack = &mut *(stack.as_mut_ptr().add(offset + istack_offset as usize) as *mut crate::arch::interrupt::InterruptStack); - interrupt_stack.iret.tpidr_el0 = tcb_addr; - } - } - } - - context.name = name; - - context.cwd = cwd; - - context.files = files; - - context.actions = actions; - } - } - - if ptrace::send_event(ptrace_event!(PTRACE_EVENT_CLONE, pid.into())).is_some() { - // Freeze the clone, allow ptrace to put breakpoints - // to it before it starts - let contexts = context::contexts(); - let context = contexts.get(pid).expect("Newly created context doesn't exist??"); - let mut context = context.write(); - context.ptrace_stop = true; - } - - // Race to pick up the new process! - ipi(IpiKind::Switch, IpiTarget::Other); - - let _ = unsafe { context::switch() }; - - Ok(pid) -} +use crate::syscall::validate::validate_slice_mut; fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGuard<'lock, Context>, reaping: bool) -> RwLockWriteGuard<'lock, Context> { - if reaping { - // Memory should already be unmapped - assert!(context.image.is_empty()); - assert!(context.stack.is_none()); - assert!(context.sigstack.is_none()); - } else { - // Unmap previous image, heap, grants, stack - context.image.clear(); - drop(context.stack.take()); - drop(context.sigstack.take()); - } - // NOTE: If we do not replace the grants `Arc`, then a strange situation can appear where the // main thread and another thread exit simultaneously before either one is reaped. If that // happens, then the last context that runs exit will think that there is still are still // remaining references to the grants, where there are in fact none. However, if either one is // reaped before, then that reference will disappear, and no leak will occur. // - // By removing the reference to the grants when the context will no longer be used, this + // By removing the reference to the address space when the context will no longer be used, this // problem will never occur. + let addr_space_arc = match context.addr_space.take() { + Some(a) => a, + None => return context, + }; - // FIXME, UNOPTIMIZED: Right now, this will allocate memory in order to store the new empty - // grants, which may not even be used (only in fexec I think). We should turn grants into an - // `Option`, and only reinitialize it there. - let mut grants_arc = mem::take(&mut context.grants); - - if let Some(grants_lock_mut) = Arc::get_mut(&mut grants_arc) { - // TODO: Use get_mut to bypass the need to acquire a lock when there we already have an - // exclusive reference from `Arc::get_mut`. This will require updating `spin`. - let mut grants_guard = grants_lock_mut.write(); + if let Ok(mut addr_space) = Arc::try_unwrap(addr_space_arc).map(RwLock::into_inner) { + let mapper = &mut addr_space.table.utable; - let grants = mem::replace(&mut *grants_guard, UserGrants::default()); - for grant in grants.inner.into_iter() { + for grant in addr_space.grants.into_iter() { let unmap_result = if reaping { log::error!("{}: {}: Grant should not exist: {:?}", context.id.into(), *context.name.read(), grant); - let mut new_table = unsafe { InactivePageTable::from_address(context.arch.get_page_utable()) }; - - grant.unmap_inactive(&mut new_table) + grant.unmap(mapper, &mut InactiveFlusher::new()) } else { - grant.unmap() + grant.unmap(mapper, PageFlushAll::new()) }; if unmap_result.file_desc.is_some() { @@ -608,478 +61,16 @@ fn empty<'lock>(context_lock: &'lock RwLock, mut context: RwLockWriteGu context } -struct ExecFile(FileHandle); - -impl Drop for ExecFile { - fn drop(&mut self) { - let _ = syscall::close(self.0); - } -} - -#[allow(clippy::too_many_arguments)] -fn fexec_noreturn( - setuid: Option, - setgid: Option, - name: Box, - data: Box<[u8]>, - phdr_grant: context::memory::Grant, - args: Box<[Box<[u8]>]>, - vars: Box<[Box<[u8]>]>, - auxv: Box<[usize]>, -) -> ! { - let entry; - let singlestep; - let mut sp = crate::USER_STACK_OFFSET + crate::USER_STACK_SIZE - 256; - - { - let (vfork, ppid, files) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exec_noreturn pid not found"); - let mut context = context_lock.write(); - - singlestep = unsafe { - ptrace::regs_for(&context).map(|s| s.is_singlestep()).unwrap_or(false) - }; - - context.name = Arc::new(RwLock::new(name)); - - context = empty(&context_lock, context, false); - - context.grants.write().insert(phdr_grant); - - #[cfg(all(target_arch = "x86_64"))] - { - context.arch.fsbase = 0; - context.arch.gsbase = 0; - - #[cfg(feature = "x86_fsgsbase")] - unsafe { - x86::bits64::segmentation::wrfsbase(0); - x86::bits64::segmentation::swapgs(); - x86::bits64::segmentation::wrgsbase(0); - x86::bits64::segmentation::swapgs(); - } - #[cfg(not(feature = "x86_fsgsbase"))] - unsafe { - x86::msr::wrmsr(x86::msr::IA32_FS_BASE, 0); - x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, 0); - } - } - - if let Some(uid) = setuid { - context.euid = uid; - } - - if let Some(gid) = setgid { - context.egid = gid; - } - - // Map and copy new segments - { - let elf = elf::Elf::from(&data).unwrap(); - entry = elf.entry(); - - for segment in elf.segments() { - match segment.p_type { - program_header::PT_LOAD => { - let voff = segment.p_vaddr as usize % PAGE_SIZE; - let vaddr = segment.p_vaddr as usize - voff; - - let mut memory = context::memory::Memory::new( - VirtualAddress::new(vaddr), - segment.p_memsz as usize + voff, - PageFlags::new().write(true), - true - ); - - unsafe { - // Copy file data - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - segment.p_vaddr as *mut u8, - segment.p_filesz as usize); - } - - let mut flags = PageFlags::new().user(true); - - // W ^ X. If it is executable, do not allow it to be writable, even if requested - if segment.p_flags & program_header::PF_X == program_header::PF_X { - flags = flags.execute(true); - } else if segment.p_flags & program_header::PF_W == program_header::PF_W { - flags = flags.write(true); - } - - memory.remap(flags); - - context.image.push(memory.to_shared()); - }, - _ => (), - } - } - } - - // Map stack - context.stack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_STACK_OFFSET), - crate::USER_STACK_SIZE, - PageFlags::new().write(true).user(true), - true - ).to_shared()); - - // Map stack - context.sigstack = Some(context::memory::Memory::new( - VirtualAddress::new(crate::USER_SIGSTACK_OFFSET), - crate::USER_SIGSTACK_SIZE, - PageFlags::new().write(true).user(true), - true - )); - - // Data no longer required, can deallocate - drop(data); - - let mut push = |arg| { - sp -= mem::size_of::(); - unsafe { *(sp as *mut usize) = arg; } - }; - - // Push auxiliary vector - push(AT_NULL); - for &arg in auxv.iter().rev() { - push(arg); - } - - drop(auxv); // no longer required - - let mut arg_size = 0; - - // Push environment variables and arguments - for iter in &[&vars, &args] { - // Push null-terminator - push(0); - - // Push pointer to content - for arg in iter.iter().rev() { - push(crate::USER_ARG_OFFSET + arg_size); - arg_size += arg.len() + 1; - } - } - - // For some reason, Linux pushes the argument count here (in - // addition to being null-terminated), but not the environment - // variable count. - // TODO: Push more counts? Less? Stop having null-termination? - push(args.len()); - - // Write environment and argument pointers to USER_ARG_OFFSET - if arg_size > 0 { - let mut memory = context::memory::Memory::new( - VirtualAddress::new(crate::USER_ARG_OFFSET), - arg_size, - PageFlags::new().write(true), - true - ); - - let mut arg_offset = 0; - for arg in vars.iter().rev().chain(args.iter().rev()) { - unsafe { - intrinsics::copy(arg.as_ptr(), - (crate::USER_ARG_OFFSET + arg_offset) as *mut u8, - arg.len()); - } - arg_offset += arg.len(); - - unsafe { - *((crate::USER_ARG_OFFSET + arg_offset) as *mut u8) = 0; - } - arg_offset += 1; - } - - memory.remap(PageFlags::new().user(true)); - - context.image.push(memory.to_shared()); - } - - // Args and vars no longer required, can deallocate - drop(args); - drop(vars); - - context.actions = Arc::new(RwLock::new(vec![( - SigAction { - sa_handler: unsafe { mem::transmute(SIG_DFL) }, - sa_mask: [0; 2], - sa_flags: SigActionFlags::empty(), - }, - 0 - ); 128])); - - let vfork = context.vfork; - context.vfork = false; - - let files = Arc::clone(&context.files); - - (vfork, context.ppid, files) - }; - - for (_fd, file_opt) in files.write().iter_mut().enumerate() { - let mut cloexec = false; - if let Some(ref file) = *file_opt { - if file.cloexec { - cloexec = true; - } - } - - if cloexec { - let _ = file_opt.take().unwrap().close(); - } - } - - if vfork { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.get(ppid) { - let mut context = context_lock.write(); - if ! context.unblock() { - println!("{} not blocked for exec vfork unblock", ppid.into()); - } - } else { - println!("{} not found for exec vfork unblock", ppid.into()); - } - } - } - - // Go to usermode - unsafe { usermode(entry, sp, 0, usize::from(singlestep)) } -} - -pub fn fexec_kernel(fd: FileHandle, args: Box<[Box<[u8]>]>, vars: Box<[Box<[u8]>]>, name_override_opt: Option>, auxv: Option<(Vec, context::memory::Grant)>) -> Result { - let (uid, gid) = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - (context.euid, context.egid) - }; - - let mut stat: Stat; - let name: String; - let mut data: Vec; - { - let file = ExecFile(fd); - - stat = Stat::default(); - syscall::file_op_mut_slice(syscall::number::SYS_FSTAT, file.0, &mut stat)?; - - let mut perm = stat.st_mode & 0o7; - if stat.st_uid == uid { - perm |= (stat.st_mode >> 6) & 0o7; - } - if stat.st_gid == gid { - perm |= (stat.st_mode >> 3) & 0o7; - } - if uid == 0 { - perm |= 0o7; - } - - if perm & 0o1 != 0o1 { - return Err(Error::new(EACCES)); - } - - if let Some(name_override) = name_override_opt { - name = String::from(name_override); - } else { - let mut name_bytes = vec![0; 4096]; - let len = syscall::file_op_mut_slice(syscall::number::SYS_FPATH, file.0, &mut name_bytes)?; - name_bytes.truncate(len); - name = match String::from_utf8(name_bytes) { - Ok(ok) => ok, - Err(_err) => { - //TODO: print error? - return Err(Error::new(EINVAL)); - } - }; - } - - //TODO: Only read elf header, not entire file. Then read required segments - data = vec![0; stat.st_size as usize]; - syscall::file_op_mut_slice(syscall::number::SYS_READ, file.0, &mut data)?; - drop(file); - } - - // Set UID and GID are determined after resolving any hashbangs - let setuid = if stat.st_mode & syscall::flag::MODE_SETUID == syscall::flag::MODE_SETUID { - Some(stat.st_uid) - } else { - None - }; - - let setgid = if stat.st_mode & syscall::flag::MODE_SETGID == syscall::flag::MODE_SETGID { - Some(stat.st_gid) - } else { - None - }; - - // The argument list is limited to avoid using too much userspace stack - // This check is done last to allow all hashbangs to be resolved - // - // This should be based on the size of the userspace stack, divided - // by the cost of each argument, which should be usize * 2, with - // one additional argument added to represent the total size of the - // argument pointer array and potential padding - // - // A limit of 4095 would mean a stack of (4095 + 1) * 8 * 2 = 65536, or 64KB - if (args.len() + vars.len()) > 4095 { - return Err(Error::new(E2BIG)); - } - - let elf = match elf::Elf::from(&data) { - Ok(elf) => elf, - Err(err) => { - let contexts = context::contexts(); - if let Some(context_lock) = contexts.current() { - let context = context_lock.read(); - println!( - "{}: {}: fexec failed to execute {}: {}", - context.id.into(), - *context.name.read(), - fd.into(), - err - ); - } - return Err(Error::new(ENOEXEC)); - } - }; - - // `fexec_kernel` can recurse if an interpreter is found. We get the - // auxiliary vector from the first invocation, which is passed via an - // argument, or if this is the first one we create it. - let (auxv, phdr_grant) = if let Some((auxv, phdr_grant)) = auxv { - (auxv, phdr_grant) - } else { - let phdr_grant = match context::contexts().current().ok_or(Error::new(ESRCH))?.read().grants.write() { - grants => { - let size = elf.program_headers_size() * elf.program_header_count(); - let aligned_size = (size + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE; - - if aligned_size > MAX_PHDRS_SIZE { - return Err(Error::new(ENOMEM)); - } - - let phdrs_region = grants.find_free(aligned_size); - let grant = context::memory::Grant::map(phdrs_region.start_address(), aligned_size, PageFlags::new().write(true).user(true)); - - unsafe { - let dst = core::slice::from_raw_parts_mut(grant.start_address().data() as *mut u8, aligned_size); - dst[..size].copy_from_slice(&data[elf.program_headers()..elf.program_headers() + elf.program_headers_size() * elf.program_header_count()]); - } - - grant - } - }; - let mut auxv = Vec::with_capacity(3); - - auxv.push(AT_ENTRY); - auxv.push(elf.entry()); - auxv.push(AT_PHDR); - auxv.push(phdr_grant.start_address().data()); - auxv.push(AT_PHENT); - auxv.push(elf.program_headers_size()); - auxv.push(AT_PHNUM); - auxv.push(elf.program_header_count()); - - (auxv, phdr_grant) - }; - - // We check the validity of all loadable sections here - for segment in elf.segments() { - match segment.p_type { - program_header::PT_INTERP => { - //TODO: length restraint, parse interp earlier - let mut interp = vec![0; segment.p_memsz as usize]; - unsafe { - intrinsics::copy((elf.data.as_ptr() as usize + segment.p_offset as usize) as *const u8, - interp.as_mut_ptr(), - segment.p_filesz as usize); - } - - let mut i = 0; - while i < interp.len() { - if interp[i] == 0 { - break; - } - i += 1; - } - interp.truncate(i); - - let interp_str = str::from_utf8(&interp).map_err(|_| Error::new(EINVAL))?; - - let interp_fd = super::fs::open(interp_str, super::flag::O_RDONLY | super::flag::O_CLOEXEC)?; - - let mut args_vec = Vec::from(args); - //TODO: pass file handle in auxv - let name_override = name.into_boxed_str(); - args_vec[0] = name_override.clone().into(); - - // Drop variables, since fexec_kernel probably won't return - drop(elf); - drop(interp); - - return fexec_kernel( - interp_fd, - args_vec.into_boxed_slice(), - vars, - Some(name_override), - Some((auxv, phdr_grant)), - ); - }, - _ => (), - } - } - - // This is the point of no return, quite literaly. Any checks for validity need - // to be done before, and appropriate errors returned. Otherwise, we have nothing - // to return to. - fexec_noreturn(setuid, setgid, name.into_boxed_str(), data.into_boxed_slice(), phdr_grant, args, vars, auxv.into_boxed_slice()); -} -const MAX_PHDRS_SIZE: usize = PAGE_SIZE; - -pub fn fexec(fd: FileHandle, arg_ptrs: &[[usize; 2]], var_ptrs: &[[usize; 2]]) -> Result { - let mut args = Vec::new(); - for arg_ptr in arg_ptrs { - let arg = validate_slice(arg_ptr[0] as *const u8, arg_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - args.push(arg.to_vec().into_boxed_slice()); - } - - let mut vars = Vec::new(); - for var_ptr in var_ptrs { - let var = validate_slice(var_ptr[0] as *const u8, var_ptr[1])?; - // Argument must be moved into kernel space before exec unmaps all memory - vars.push(var.to_vec().into_boxed_slice()); - } - - // Neither arg_ptrs nor var_ptrs should be used after this point, the kernel - // now has owned copies in args and vars - - fexec_kernel(fd, args.into_boxed_slice(), vars.into_boxed_slice(), None, None) -} - pub fn exit(status: usize) -> ! { ptrace::breakpoint_callback(PTRACE_STOP_EXIT, Some(ptrace_event!(PTRACE_STOP_EXIT, status))); { - let context_lock = { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH)).expect("exit failed to find context"); - Arc::clone(&context_lock) - }; + let context_lock = context::current().expect("exit failed to find context"); - let mut close_files = Vec::new(); + let mut close_files; let pid = { let mut context = context_lock.write(); - { - let mut lock = context.files.write(); - if Arc::strong_count(&context.files) == 1 { - mem::swap(lock.deref_mut(), &mut close_files); - } - } - context.files = Arc::new(RwLock::new(Vec::new())); + close_files = Arc::try_unwrap(mem::take(&mut context.files)).map_or_else(|_| Vec::new(), RwLock::into_inner); context.id }; @@ -1298,54 +289,10 @@ pub fn kill(pid: ContextId, sig: usize) -> Result { pub fn mprotect(address: usize, size: usize, flags: MapFlags) -> Result { // println!("mprotect {:#X}, {}, {:#X}", address, size, flags); - let end_offset = size.checked_sub(1).ok_or(Error::new(EFAULT))?; - let end_address = address.checked_add(end_offset).ok_or(Error::new(EFAULT))?; - - let mut active_table = unsafe { ActivePageTable::new(TableKind::User) }; - - let flush_all = PageFlushAll::new(); - - let start_page = Page::containing_address(VirtualAddress::new(address)); - let end_page = Page::containing_address(VirtualAddress::new(end_address)); - for page in Page::range_inclusive(start_page, end_page) { - // Check if the page is actually mapped before trying to change the flags. - // FIXME can other processes change if a page is mapped beneath our feet? - let mut page_flags = if let Some(page_flags) = active_table.translate_page_flags(page) { - page_flags - } else { - flush_all.flush(); - return Err(Error::new(EFAULT)); - }; - if !page_flags.has_present() { - flush_all.flush(); - return Err(Error::new(EFAULT)); - } - - if flags.contains(PROT_EXEC) { - page_flags = page_flags.execute(true); - } else { - page_flags = page_flags.execute(false); - } - - if flags.contains(PROT_WRITE) { - //TODO: Not allowing gain of write privileges - } else { - page_flags = page_flags.write(false); - } + if address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 { return Err(Error::new(EINVAL)); } + if address.saturating_add(size) > crate::USER_END_OFFSET { return Err(Error::new(EFAULT)); } - if flags.contains(PROT_READ) { - //TODO: No flags for readable pages - } else { - //TODO: No flags for readable pages - } - - let flush = active_table.remap(page, page_flags); - flush_all.consume(flush); - } - - flush_all.flush(); - - Ok(0) + AddrSpace::current()?.write().mprotect(Page::containing_address(VirtualAddress::new(address)), size / PAGE_SIZE, flags).map(|()| 0) } pub fn setpgid(pid: ContextId, pgid: ContextId) -> Result { @@ -1377,24 +324,23 @@ pub fn setpgid(pid: ContextId, pgid: ContextId) -> Result { } pub fn sigaction(sig: usize, act_opt: Option<&SigAction>, oldact_opt: Option<&mut SigAction>, restorer: usize) -> Result { - if sig > 0 && sig <= 0x7F { - let contexts = context::contexts(); - let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; - let context = context_lock.read(); - let mut actions = context.actions.write(); - - if let Some(oldact) = oldact_opt { - *oldact = actions[sig].0; - } + if sig == 0 || sig > 0x7F { + return Err(Error::new(EINVAL)); + } + let contexts = context::contexts(); + let context_lock = contexts.current().ok_or(Error::new(ESRCH))?; + let context = context_lock.read(); + let mut actions = context.actions.write(); - if let Some(act) = act_opt { - actions[sig] = (*act, restorer); - } + if let Some(oldact) = oldact_opt { + *oldact = actions[sig].0; + } - Ok(0) - } else { - Err(Error::new(EINVAL)) + if let Some(act) = act_opt { + actions[sig] = (*act, restorer); } + + Ok(0) } pub fn sigprocmask(how: usize, mask_opt: Option<&[u64; 2]>, oldmask_opt: Option<&mut [u64; 2]>) -> Result { @@ -1629,3 +575,30 @@ pub fn waitpid(pid: ContextId, status_ptr: usize, flags: WaitFlags) -> Result ! { + assert_ne!(bootstrap.page_count, 0); + + { + let addr_space = Arc::clone(context::contexts().current() + .expect("expected a context to exist when executing init") + .read().addr_space() + .expect("expected bootstrap context to have an address space")); + + let mut addr_space = addr_space.write(); + let addr_space = &mut *addr_space; + + addr_space.grants.insert(context::memory::Grant::physmap( + bootstrap.base.clone(), + Page::containing_address(VirtualAddress::new(0)), + bootstrap.page_count, + PageFlags::new().user(true).write(true).execute(true), + &mut addr_space.table.utable, + PageFlushAll::new(), + ).expect("failed to physmap bootstrap memory")); + } + + #[cfg(target_arch = "x86_64")] + // Start in a minimal environment without any stack. + usermode(bootstrap.entry, 0, 0, 0); +} diff --git a/src/syscall/validate.rs b/src/syscall/validate.rs index 2aac27a466bfc46e48e5bfa340ca34c7fb55ea1d..51f7b5ef2d6987a5e7f36106ff28d6f386be4b82 100644 --- a/src/syscall/validate.rs +++ b/src/syscall/validate.rs @@ -1,24 +1,37 @@ +// TODO: Maybe stop handing out slices and instead use a wrapper type that supports copying etc. +// Invalid pages will cause page faults, which can be handled so that they are caught and EFAULT is +// returned. This will also make SMAP much, much, easier. c.f. Linux's copy_from_user, copy_to_user +// which are written in assembly and handle page faults. use core::{mem, slice, str}; -use crate::paging::{ActivePageTable, Page, VirtualAddress}; +use crate::context; +use crate::memory::PAGE_SIZE; +use crate::paging::{Page, TableKind, VirtualAddress}; use crate::syscall::error::*; +use alloc::sync::Arc; + fn validate(address: usize, size: usize, writable: bool) -> Result<()> { + if VirtualAddress::new(address.saturating_add(size)).kind() != TableKind::User { + return Err(Error::new(EFAULT)); + } + let end_offset = size.checked_sub(1).ok_or(Error::new(EFAULT))?; let end_address = address.checked_add(end_offset).ok_or(Error::new(EFAULT))?; - let active_table = unsafe { ActivePageTable::new(VirtualAddress::new(address).kind()) }; + let addr_space = Arc::clone(context::current()?.read().addr_space()?); + let addr_space = addr_space.read(); let start_page = Page::containing_address(VirtualAddress::new(address)); let end_page = Page::containing_address(VirtualAddress::new(end_address)); for page in Page::range_inclusive(start_page, end_page) { - if let Some(page_flags) = active_table.translate_page_flags(page) { - if ! page_flags.has_user() { + if let Some((_, flags)) = addr_space.table.utable.translate(page.start_address()) { + if !flags.has_user() { // println!("{:X}: Not usermode", page.start_address().data()); return Err(Error::new(EFAULT)); } - if writable && ! page_flags.has_write() { + if writable && !flags.has_write() { // println!("{:X}: Not writable {}", page.start_address().data(), writable); return Err(Error::new(EFAULT)); } @@ -96,3 +109,13 @@ pub fn validate_str(ptr: *const u8, len: usize) -> Result<&'static str> { let slice = validate_slice(ptr, len)?; str::from_utf8(slice).map_err(|_| Error::new(EINVAL)) } + +pub fn validate_region(address: usize, size: usize) -> Result<(Page, usize)> { + if address % PAGE_SIZE != 0 || size % PAGE_SIZE != 0 || size == 0 { + return Err(Error::new(EINVAL)); + } + if address.saturating_add(size) > crate::USER_END_OFFSET { + return Err(Error::new(EFAULT)); + } + Ok((Page::containing_address(VirtualAddress::new(address)), size / PAGE_SIZE)) +} diff --git a/syscall b/syscall index 0c98fbd16212282aeb3db17c991472885a9b79be..fac87ee3c74e5e504a74f2713301c1ddc7d43d17 160000 --- a/syscall +++ b/syscall @@ -1 +1 @@ -Subproject commit 0c98fbd16212282aeb3db17c991472885a9b79be +Subproject commit fac87ee3c74e5e504a74f2713301c1ddc7d43d17