From 11fdb3bb4693145e99eef02e6a1159fd62284f1f Mon Sep 17 00:00:00 2001
From: 4lDO2 <4lDO2@protonmail.com>
Date: Wed, 21 Jun 2023 19:28:39 +0200
Subject: [PATCH] Copy on writes to CoW pages.

---
 src/arch/x86_64/mod.rs        |  9 ++++
 src/arch/x86_64/paging/mod.rs |  5 +-
 src/context/memory.rs         | 95 ++++++++++++++++++++++++-----------
 src/syscall/mod.rs            |  2 +-
 src/syscall/process.rs        | 11 ++--
 5 files changed, 86 insertions(+), 36 deletions(-)

diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs
index 860513cd..5fbb64ea 100644
--- a/src/arch/x86_64/mod.rs
+++ b/src/arch/x86_64/mod.rs
@@ -1,3 +1,7 @@
+use crate::Bootstrap;
+
+use self::paging::PAGE_SIZE;
+
 #[macro_use]
 pub mod macros;
 
@@ -45,6 +49,7 @@ pub mod stop;
 
 pub mod time;
 
+use ::rmm::Arch;
 pub use ::rmm::X8664Arch as CurrentRmmArch;
 
 // Flags
@@ -78,3 +83,7 @@ pub unsafe extern "C" fn arch_copy_to_user(dst: usize, src: usize, len: usize) -
     ", options(noreturn));
 }
 pub use arch_copy_to_user as arch_copy_from_user;
+
+pub unsafe fn bootstrap_mem(bootstrap: &Bootstrap) -> &'static [u8] {
+    core::slice::from_raw_parts(CurrentRmmArch::phys_to_virt(bootstrap.base.start_address()).data() as *const u8, bootstrap.page_count * PAGE_SIZE)
+}
diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs
index a591fbb0..54d53b9b 100644
--- a/src/arch/x86_64/paging/mod.rs
+++ b/src/arch/x86_64/paging/mod.rs
@@ -159,6 +159,7 @@ pub fn page_fault_handler(stack: &mut InterruptStack, code: PageFaultError, faul
     let caused_by_kernel = !caused_by_user;
     let caused_by_write = code.contains(PageFaultError::WR);
     let caused_by_instr_fetch = code.contains(PageFaultError::ID);
+    let is_usercopy = usercopy_region.contains(&{ stack.iret.rip });
 
     let mode = match (caused_by_write, caused_by_instr_fetch) {
         (true, false) => AccessMode::Write,
@@ -172,7 +173,7 @@ pub fn page_fault_handler(stack: &mut InterruptStack, code: PageFaultError, faul
         return Err(Segv);
     }
 
-    if address_is_user {
+    if address_is_user && (caused_by_user || is_usercopy) {
         match try_correcting_page_tables(faulting_page, mode) {
             Ok(()) => return Ok(()),
             Err(PfError::Oom) => todo!("oom"),
@@ -181,7 +182,7 @@ pub fn page_fault_handler(stack: &mut InterruptStack, code: PageFaultError, faul
         }
     }
 
-    if address_is_user && caused_by_kernel && mode != AccessMode::InstrFetch && usercopy_region.contains(&{ stack.iret.rip }) {
+    if address_is_user && caused_by_kernel && mode != AccessMode::InstrFetch && is_usercopy {
         // We were inside a usercopy function that failed. This is handled by setting rax to a
         // nonzero value, and emulating the ret instruction.
         stack.scratch.rax = 1;
diff --git a/src/context/memory.rs b/src/context/memory.rs
index ef566b74..b00541fc 100644
--- a/src/context/memory.rs
+++ b/src/context/memory.rs
@@ -126,11 +126,11 @@ impl AddrSpace {
 
         for grant_span in regions {
             let grant = self.grants.remove(grant_span.base).expect("grant cannot magically disappear while we hold the lock!");
-            log::info!("Mprotecting {:#?} to {:#?} in {:#?}", grant, flags, grant_span);
+            //log::info!("Mprotecting {:#?} to {:#?} in {:#?}", grant, flags, grant_span);
             let intersection = grant_span.intersection(requested_span);
 
             let (before, mut grant, after) = grant.extract(intersection).expect("failed to extract grant");
-            log::info!("Sliced into\n\n{:#?}\n\n{:#?}\n\n{:#?}", before, grant, after);
+            //log::info!("Sliced into\n\n{:#?}\n\n{:#?}\n\n{:#?}", before, grant, after);
 
             if let Some(before) = before { self.grants.insert(before); }
             if let Some(after) = after { self.grants.insert(after); }
@@ -150,7 +150,7 @@ impl AddrSpace {
             // think), execute-only memory is also supported.
 
             grant.remap(mapper, &mut flusher, new_flags);
-            log::info!("Mprotect grant become {:#?}", grant);
+            //log::info!("Mprotect grant became {:#?}", grant);
             self.grants.insert(grant);
         }
         Ok(())
@@ -588,6 +588,13 @@ impl PageInfo {
         (self.cow_refcount.load(Ordering::Acquire) == 1).then_some(&*self.arc)
     }
 
+    pub fn remove_ref(self, cow: bool) {
+        if cow {
+            self.cow_refcount.fetch_sub(1, Ordering::Release);
+        }
+        drop(self.into_inner());
+    }
+
     fn into_inner(mut self) -> Arc<PageInfoInner> {
         let arc = unsafe { ManuallyDrop::take(&mut self.arc) };
         core::mem::forget(self);
@@ -617,7 +624,7 @@ pub struct PageInfoInner {
 impl Drop for PageInfoInner {
     #[track_caller]
     fn drop(&mut self) {
-        assert_eq!(*self.cow_refcount.get_mut(), 1);
+        assert_eq!(*self.cow_refcount.get_mut(), 0);
     }
 }
 
@@ -739,7 +746,7 @@ impl Grant {
             },
         })
     }
-    // TODO: This is limited to one page. Should it be (if some magic new proc: API is introduced).
+    // TODO: This is limited to one page. Should it be (if some magic new proc: API is introduced)?
     pub fn cow(
         src_address_space: Arc<RwLock<AddrSpace>>,
         src_base: Page,
@@ -753,11 +760,9 @@ impl Grant {
         src_pages: &[Option<PageInfo>],
     ) -> Result<Grant, Enomem> {
         let mut pages = try_new_vec_with_exact_size(page_count)?;
-        pages.resize_with(page_count, || None);
 
-        /*
         for page_idx in 0..page_count {
-            let src_page_info = src_pages[page_idx].as_ref().map(Arc::clone);
+            let src_page_info = src_pages[page_idx].as_ref().map(|pg| pg.ref_clone(true));
             let phys = src_page_info.as_ref().map(|pg| pg.phys.start_address());
             pages.push(src_page_info);
 
@@ -780,7 +785,6 @@ impl Grant {
 
             dst_flusher.consume(map_result);
         }
-        */
 
         Ok(Grant {
             base: dst_base,
@@ -813,7 +817,7 @@ impl Grant {
                 let Some(result) = mapper.remap(page.start_address(), flags) else {
                     continue;
                 };
-                log::info!("Remapped page {:?} (frame {:?})", page, Frame::containing_address(mapper.translate(page.start_address()).unwrap().0));
+                //log::info!("Remapped page {:?} (frame {:?})", page, Frame::containing_address(mapper.translate(page.start_address()).unwrap().0));
                 flusher.consume(result);
             }
         }
@@ -828,13 +832,11 @@ impl Grant {
         for (page_idx, page) in self.span().pages().enumerate() {
             match self.info.provider {
                 Provider::Allocated { ref mut pages } | Provider::External { pages: Some(ref mut pages), .. } => {
-                    let Some(page_info) = pages[page_idx].take().map(PageInfo::into_inner) else {
+                    let Some(page_info) = pages[page_idx].take() else {
                         continue;
                     };
-                    if is_cow {
-                        // TODO: Ordering
-                        page_info.cow_refcount.fetch_sub(1, Ordering::Release);
-                    }
+
+                    page_info.remove_ref(is_cow)
                 }
                 _ => (),
             }
@@ -1087,6 +1089,28 @@ pub enum PfError {
     NonfatalInternalError,
 }
 
+pub fn make_exclusive(page_slot: &mut Option<PageInfo>) -> Result<Frame, PfError> {
+    let old_page_opt = page_slot.take();
+    let new_page = page_slot.insert(PageInfo::try_new_exclusive().map_err(|_| PfError::Oom)?);
+
+    if let Some(old_page) = old_page_opt {
+        unsafe { copy_frame_to_frame_directly(new_page.phys.clone(), old_page.phys.clone()); }
+        old_page.remove_ref(true);
+    }
+
+    Ok(new_page.phys.clone())
+}
+
+pub unsafe fn copy_frame_to_frame_directly(dst: Frame, src: Frame) {
+    // Optimized exact-page-size copy function?
+    let dst = unsafe { RmmA::phys_to_virt(dst.start_address()).data() as *mut u8 };
+    let src = unsafe { RmmA::phys_to_virt(src.start_address()).data() as *const u8 };
+
+    unsafe {
+        dst.copy_from_nonoverlapping(src, PAGE_SIZE);
+    }
+}
+
 pub fn try_correcting_page_tables(faulting_page: Page, access: AccessMode) -> Result<(), PfError> {
     let Ok(addr_space) = AddrSpace::current() else {
         log::warn!("User page fault without address space being set.");
@@ -1122,9 +1146,7 @@ pub fn try_correcting_page_tables(faulting_page: Page, access: AccessMode) -> Re
                 match pages[pages_from_grant_start].as_ref().and_then(PageInfo::try_get_exclusively) {
                     Some(exclusively_owned) => exclusively_owned.phys.clone(),
                     // TODO: Option::get_or_try_insert?
-                    None => {
-                        pages[pages_from_grant_start].insert(PageInfo::try_new_exclusive().map_err(|_| PfError::Oom)?).phys.clone()
-                    }
+                    None => make_exclusive(&mut pages[pages_from_grant_start])?,
                 }
             }
             // TODO: the zeroed page?
@@ -1140,37 +1162,51 @@ pub fn try_correcting_page_tables(faulting_page: Page, access: AccessMode) -> Re
                 base.next_by(pages_from_grant_start)
             }
             Provider::External { cow, address_space: ref foreign_address_space, ref src_base, ref mut pages } => {
-                if let Some(page_info) = pages.as_ref().and_then(|pgs| pgs[pages_from_grant_start].as_ref()) {
-                    break 'get_frame page_info.phys.clone();
+                let pages = match pages {
+                    Some(pgs) => pgs,
+                    None => pages.insert(try_box_slice_new(|| None, grant_info.page_count).map_err(|_| PfError::Oom)?),
+                };
+
+                if let Some(page_info) = &pages[pages_from_grant_start] {
+                    // TODO: Deduplicate (kernel source) code?
+                    if !cow || access != AccessMode::Write {
+                        break 'get_frame page_info.phys.clone();
+                    } else {
+
+                        break 'get_frame make_exclusive(&mut pages[pages_from_grant_start])?;
+                    }
                 }
 
                 let guard = foreign_address_space.upgradeable_read();
                 let src_page = src_base.next_by(pages_from_grant_start);
 
-                let Some((_src_base, foreign_grant)) = guard.grants.contains(src_page) else {
+                let Some((owner_base, owner_grant)) = guard.grants.contains(src_page) else {
                     log::error!("Foreign grant did not exist at specified offset.");
                     return Err(PfError::NonfatalInternalError);
                 };
 
                 // TODO: Would nested grants provide any benefit?
                 // TODO: Use recursion?
-                let Provider::Allocated { pages: ref owner_pages } = foreign_grant.provider else {
+                let Provider::Allocated { pages: ref owner_pages } = owner_grant.provider else {
                     log::error!("Chained grants!");
                     return Err(PfError::NonfatalInternalError);
                 };
+                let owner_pages_from_grant_start = src_page.offset_from(owner_base);
 
-                let page_info = match &owner_pages[pages_from_grant_start] {
+                let page_info = match &owner_pages[owner_pages_from_grant_start] {
                     Some(page) => page.ref_clone(cow),
                     None => {
                         let mut guard = RwLockUpgradableGuard::upgrade(guard);
 
-                        let (owner_base, owner_grant) = guard.grants.contains_mut(src_page)
+                        let (owner_base_again, owner_grant) = guard.grants.contains_mut(src_page)
                             .expect("grant cannot disappear without write lock having existed");
 
+                        debug_assert_eq!(owner_base, owner_base_again);
+
                         let Provider::Allocated { pages: ref mut owner_pages } = owner_grant.provider else {
                             unreachable!("cannot have changed in the meantime");
                         };
-                        let owner_pages_from_grant_start = src_page.offset_from(owner_base);
+                        debug_assert!(owner_pages[owner_pages_from_grant_start].is_none());
 
                         owner_pages[owner_pages_from_grant_start].insert(PageInfo::try_new_exclusive().map_err(|_| PfError::Oom)?).ref_clone(cow)
                     }
@@ -1178,10 +1214,7 @@ pub fn try_correcting_page_tables(faulting_page: Page, access: AccessMode) -> Re
 
                 let frame = page_info.phys.clone();
 
-                (match pages {
-                    Some(pgs) => pgs,
-                    None => pages.insert(try_box_slice_new(|| None, grant_info.page_count).map_err(|_| PfError::Oom)?),
-                })[pages_from_grant_start] = Some(page_info);
+                pages[pages_from_grant_start] = Some(page_info);
 
                 frame
             }
@@ -1189,7 +1222,9 @@ pub fn try_correcting_page_tables(faulting_page: Page, access: AccessMode) -> Re
         }
     };
 
-    log::info!("Correcting {:?} => {:?} (base {:?} info {:?})", faulting_page, frame, grant_base, grant_info);
+    if super::context_id().into() == 3 {
+        log::info!("Correcting {:?} => {:?} (base {:?} info {:?})", faulting_page, frame, grant_base, grant_info);
+    }
     let Some(flush) = (unsafe { addr_space.table.utable.map_phys(faulting_page.start_address(), frame.start_address(), grant_flags) }) else {
         // TODO
         return Err(PfError::Oom);
diff --git a/src/syscall/mod.rs b/src/syscall/mod.rs
index decf88bd..2d6a1e44 100644
--- a/src/syscall/mod.rs
+++ b/src/syscall/mod.rs
@@ -179,7 +179,7 @@ pub fn syscall(a: usize, b: usize, c: usize, d: usize, e: usize, f: usize, stack
         let contexts = crate::context::contexts();
         if let Some(context_lock) = contexts.current() {
             let context = context_lock.read();
-            if context.name.contains("bootstrap") {
+            if true || context.name.contains("bootstrap") {
                 if a == SYS_CLOCK_GETTIME || a == SYS_YIELD {
                     false
                 } else if (a == SYS_WRITE || a == SYS_FSYNC) && (b == 1 || b == 2) {
diff --git a/src/syscall/process.rs b/src/syscall/process.rs
index 8d08d5e9..bbd82b97 100644
--- a/src/syscall/process.rs
+++ b/src/syscall/process.rs
@@ -13,7 +13,7 @@ use crate::Bootstrap;
 use crate::context;
 use crate::interrupt;
 use crate::paging::mapper::{InactiveFlusher, PageFlushAll};
-use crate::paging::{Page, PageFlags, VirtualAddress};
+use crate::paging::{Page, PageFlags, VirtualAddress, PAGE_SIZE};
 use crate::ptrace;
 use crate::start::usermode;
 use crate::syscall::data::SigAction;
@@ -593,8 +593,7 @@ pub unsafe fn usermode_bootstrap(bootstrap: &Bootstrap) -> ! {
 
         // TODO: Mark as owned and then support reclaiming the memory to the allocator if
         // deallocated?
-        addr_space.grants.insert(context::memory::Grant::physmap(
-            bootstrap.base.clone(),
+        addr_space.grants.insert(context::memory::Grant::zeroed(
             PageSpan::new(
                 Page::containing_address(VirtualAddress::new(0)),
                 bootstrap.page_count,
@@ -603,7 +602,13 @@ pub unsafe fn usermode_bootstrap(bootstrap: &Bootstrap) -> ! {
             &mut addr_space.table.utable,
             PageFlushAll::new(),
         ).expect("failed to physmap bootstrap memory"));
+
     }
+    // TODO: Not all arches do linear mapping
+    UserSliceWo::new(0, bootstrap.page_count * PAGE_SIZE)
+        .expect("failed to create bootstrap user slice")
+        .copy_from_slice(unsafe { crate::arch::bootstrap_mem(bootstrap) })
+        .expect("failed to copy memory to bootstrap");
 
     // Start in a minimal environment without any stack.
     usermode(bootstrap.entry, 0, 0, 0);
-- 
GitLab