From 777a82b50f13cdd2d2b17481b604ff39470b3232 Mon Sep 17 00:00:00 2001
From: 4lDO2 <>
Date: Thu, 7 Jul 2022 12:00:56 +0200
Subject: [PATCH] Refactor out clone, fix some fd leaks.

 src/platform/redox/ | 278 ++++++++++++++++++++++++++++++++++++
 src/platform/redox/  |  52 ++++---
 src/platform/redox/ | 240 +------------------------------
 src/platform/redox/   |  13 +-
 4 files changed, 324 insertions(+), 259 deletions(-)
 create mode 100644 src/platform/redox/

diff --git a/src/platform/redox/ b/src/platform/redox/
new file mode 100644
index 00000000..8877ef91
--- /dev/null
+++ b/src/platform/redox/
@@ -0,0 +1,278 @@
+use core::arch::global_asm;
+use core::mem::size_of;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use syscall::data::Map;
+use syscall::flag::{MapFlags, O_CLOEXEC};
+use syscall::error::{Error, Result, EINVAL, ENAMETOOLONG};
+use syscall::SIGCONT;
+use super::extra::{create_set_addr_space_buf, FdGuard};
+fn new_context() -> Result<(FdGuard, usize)> {
+    // Create a new context (fields such as uid/gid will be inherited from the current context).
+    let fd = FdGuard::new(syscall::open("thisproc:new/open_via_dup", O_CLOEXEC)?);
+    // Extract pid.
+    let mut buffer = [0_u8; 64];
+    let len = syscall::fpath(*fd, &mut buffer)?;
+    let buffer = buffer.get(..len).ok_or(Error::new(ENAMETOOLONG))?;
+    let colon_idx = buffer.iter().position(|c| *c == b':').ok_or(Error::new(EINVAL))?;
+    let slash_idx = buffer.iter().skip(colon_idx).position(|c| *c == b'/').ok_or(Error::new(EINVAL))? + colon_idx;
+    let pid_bytes = buffer.get(colon_idx + 1..slash_idx).ok_or(Error::new(EINVAL))?;
+    let pid_str = core::str::from_utf8(pid_bytes).map_err(|_| Error::new(EINVAL))?;
+    let pid = pid_str.parse::<usize>().map_err(|_| Error::new(EINVAL))?;
+    Ok((fd, pid))
+fn copy_str(cur_pid_fd: usize, new_pid_fd: usize, key: &str) -> Result<()> {
+    let cur_name_fd = FdGuard::new(syscall::dup(cur_pid_fd, key.as_bytes())?);
+    let new_name_fd = FdGuard::new(syscall::dup(new_pid_fd, key.as_bytes())?);
+    let mut buf = [0_u8; 256];
+    let len = syscall::read(*cur_name_fd, &mut buf)?;
+    let buf = buf.get(..len).ok_or(Error::new(ENAMETOOLONG))?;
+    syscall::write(*new_name_fd, &buf)?;
+    Ok(())
+#[cfg(target_arch = "x86_64")]
+fn copy_float_env_regs(cur_pid_fd: usize, new_pid_fd: usize) -> Result<()> {
+    // Copy environment registers.
+    {
+        let cur_env_regs_fd = FdGuard::new(syscall::dup(cur_pid_fd, b"regs/env")?);
+        let new_env_regs_fd = FdGuard::new(syscall::dup(new_pid_fd, b"regs/env")?);
+        let mut env_regs = syscall::EnvRegisters::default();
+        let _ = syscall::read(*cur_env_regs_fd, &mut env_regs)?;
+        let _ = syscall::write(*new_env_regs_fd, &env_regs)?;
+    }
+    // Copy float registers.
+    {
+        let cur_float_regs_fd = FdGuard::new(syscall::dup(cur_pid_fd, b"regs/float")?);
+        let new_float_regs_fd = FdGuard::new(syscall::dup(new_pid_fd, b"regs/float")?);
+        let mut float_regs = syscall::FloatRegisters::default();
+        let _ = syscall::read(*cur_float_regs_fd, &mut float_regs)?;
+        let _ = syscall::write(*new_float_regs_fd, &float_regs)?;
+    }
+    Ok(())
+/// Spawns a new context sharing the same address space as the current one (i.e. a new thread).
+pub unsafe fn pte_clone_impl(stack: *mut usize) -> Result<usize> {
+    let cur_pid_fd = FdGuard::new(syscall::open("thisproc:current/open_via_dup", O_CLOEXEC)?);
+    let (new_pid_fd, new_pid) = new_context()?;
+    // Allocate a new signal stack.
+    {
+        let sigstack_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"sigstack")?);
+        const SIGSTACK_SIZE: usize = 1024 * 256;
+        // TODO: Put sigstack at high addresses?
+        let target_sigstack = syscall::fmap(!0, &Map { address: 0, flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE, offset: 0, size: SIGSTACK_SIZE })? + SIGSTACK_SIZE;
+        let _ = syscall::write(*sigstack_fd, &usize::to_ne_bytes(target_sigstack))?;
+    }
+    copy_str(*cur_pid_fd, *new_pid_fd, "name")?;
+    copy_str(*cur_pid_fd, *new_pid_fd, "cwd")?;
+    // Reuse existing address space
+    {
+        let cur_addr_space_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"addrspace")?);
+        let new_addr_space_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-addrspace")?);
+        let buf = create_set_addr_space_buf(*cur_addr_space_fd, pte_clone_ret as usize, stack as usize);
+        let _ = syscall::write(*new_addr_space_sel_fd, &buf)?;
+    }
+    // Reuse file table
+    {
+        let cur_filetable_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"filetable")?);
+        let new_filetable_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-filetable")?);
+        let _ = syscall::write(*new_filetable_sel_fd, &usize::to_ne_bytes(*cur_filetable_fd))?;
+    }
+    copy_float_env_regs(*cur_pid_fd, *new_pid_fd)?;
+    // Unblock context. 
+    syscall::kill(new_pid, SIGCONT)?;
+    Ok(0)
+/// Spawns a new context which will not share the same address space as the current one. File
+/// descriptors from other schemes are reobtained with `dup`, and grants referencing such file
+/// descriptors are reobtained through `fmap`. Other mappings are kept but duplicated using CoW.
+pub fn fork_impl() -> Result<usize> {
+    unsafe {
+        Error::demux(fork_wrapper())
+    }
+fn fork_inner(initial_rsp: *mut usize) -> Result<usize> {
+    let new_pid = {
+        let cur_pid_fd = FdGuard::new(syscall::open("thisproc:current/open_via_dup", O_CLOEXEC)?);
+        let (new_pid_fd, new_pid) = new_context()?;
+        // Do not allocate new signal stack, but copy existing address (all memory will be re-mapped
+        // CoW later).
+        {
+            let cur_sigstack_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"sigstack")?);
+            let new_sigstack_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"sigstack")?);
+            let mut sigstack_buf = usize::to_ne_bytes(0);
+            let _ = syscall::read(*cur_sigstack_fd, &mut sigstack_buf);
+            let _ = syscall::write(*new_sigstack_fd, &sigstack_buf);
+        }
+        copy_str(*cur_pid_fd, *new_pid_fd, "name")?;
+        copy_str(*cur_pid_fd, *new_pid_fd, "cwd")?;
+        // CoW-duplicate address space.
+        {
+            let cur_addr_space_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"addrspace")?);
+            // FIXME: Find mappings which use external file descriptors
+            let new_addr_space_fd = FdGuard::new(syscall::dup(*cur_addr_space_fd, b"exclusive")?);
+            let mut buf = vec! [0_u8; 4096];
+            let mut bytes_read = 0;
+            loop {
+                let new_bytes_read = syscall::read(*cur_addr_space_fd, &mut buf[bytes_read..])?;
+                if new_bytes_read == 0 { break }
+                bytes_read += new_bytes_read;
+            }
+            let bytes = &buf[..bytes_read];
+            for struct_bytes in bytes.array_chunks::<{size_of::<usize>() * 4}>() {
+                let mut words = struct_bytes.array_chunks::<{size_of::<usize>()}>().copied().map(usize::from_ne_bytes);
+                let addr =;
+                let size =;
+                let flags =;
+                let offset =;
+                if flags & 0x8000_0000 == 0 {
+                    continue;
+                }
+                let map_flags = MapFlags::from_bits_truncate(flags);
+                let mapped_address = unsafe {
+                    let fd = FdGuard::new(syscall::dup(*cur_addr_space_fd, format!("grant-{:x}", addr).as_bytes())?);
+                    syscall::fmap(*fd, &syscall::Map { address: 0, size, flags: map_flags, offset })?
+                };
+                let mut buf = [0_u8; size_of::<usize>() * 4];
+                let mut chunks = buf.array_chunks_mut::<{size_of::<usize>()}>();
+                * = usize::to_ne_bytes(addr);
+                * = usize::to_ne_bytes(size);
+                * = usize::to_ne_bytes(map_flags.bits());
+                * = usize::to_ne_bytes(mapped_address);
+                let _ = syscall::write(*new_addr_space_fd, &buf)?;
+            }
+            let new_addr_space_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-addrspace")?);
+            let buf = create_set_addr_space_buf(*new_addr_space_fd, fork_ret as usize, initial_rsp as usize);
+            let _ = syscall::write(*new_addr_space_sel_fd, &buf)?;
+        }
+        // Copy existing files into new file table, but do not reuse the same file table (i.e. new
+        // parent FDs will not show up for the child).
+        {
+            let cur_filetable_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"filetable")?);
+            // TODO: Use cross_scheme_links or something similar to avoid copying the file table in the
+            // kernel.
+            let new_filetable_fd = FdGuard::new(syscall::dup(*cur_filetable_fd, b"copy")?);
+            let new_filetable_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-filetable")?);
+            let _ = syscall::write(*new_filetable_sel_fd, &usize::to_ne_bytes(*new_filetable_fd));
+        }
+        copy_float_env_regs(*cur_pid_fd, *new_pid_fd)?;
+        new_pid
+    };
+    // Unblock context.
+    syscall::kill(new_pid, SIGCONT)?;
+    Ok(new_pid)
+unsafe extern "sysv64" fn __relibc_internal_fork_impl(initial_rsp: *mut usize) -> usize {
+    Error::mux(fork_inner(initial_rsp))
+    .p2align 6
+    .globl fork_wrapper
+    .type fork_wrapper, @function
+    push rbp
+    mov rbp, rsp
+    push rbx
+    push rbp
+    push r12
+    push r13
+    push r14
+    push r15
+    mov rdi, rsp
+    call __relibc_internal_fork_impl
+    jmp 2f
+    xor rax, rax
+    pop r15
+    pop r14
+    pop r13
+    pop r12
+    pop rbp
+    pop rbx
+    pop rbp
+    ret
+    .size fork_wrapper, . - fork_wrapper
+    .globl pte_clone_ret
+    .type pte_clone_ret, @function
+    # Load registers
+    pop rax
+    pop rdi
+    pop rsi
+    pop rdx
+    pop rcx
+    pop r8
+    pop r9
+    # Call entry point
+    call rax
+    ret
+    .size pte_clone_ret, . - pte_clone_ret
+extern "sysv64" {
+    fn fork_wrapper() -> usize;
+    fn fork_ret();
+    fn pte_clone_ret();
diff --git a/src/platform/redox/ b/src/platform/redox/
index f2231068..77c03b7c 100644
--- a/src/platform/redox/
+++ b/src/platform/redox/
@@ -1,5 +1,5 @@
 use core::convert::TryFrom;
-use super::extra::FdGuard;
+use super::extra::{create_set_addr_space_buf, FdGuard};
 use alloc::{
     collections::{btree_map::Entry, BTreeMap},
@@ -16,7 +16,7 @@ use crate::fs::File;
 fn read_all(fd: usize, offset: Option<u64>, buf: &mut [u8]) -> Result<()> {
     if let Some(offset) = offset {
-        syscall::lseek(fd, offset as isize, syscall::SEEK_SET).unwrap();
+        syscall::lseek(fd, offset as isize, SEEK_SET)?;
     let mut total_bytes_read = 0;
@@ -29,6 +29,21 @@ fn read_all(fd: usize, offset: Option<u64>, buf: &mut [u8]) -> Result<()> {
+fn write_all(fd: usize, offset: Option<u64>, buf: &[u8]) -> Result<()> {
+    if let Some(offset) = offset {
+        syscall::lseek(fd, offset as isize, SEEK_SET)?;
+    }
+    let mut total_bytes_written = 0;
+    while total_bytes_written < buf.len() {
+        total_bytes_written += match syscall::write(fd, &buf[total_bytes_written..])? {
+            0 => return Err(Error::new(EIO)),
+            bytes_written => bytes_written,
+        }
+    }
+    Ok(())
 fn find_free_target_addr(tree: &BTreeMap<usize, usize>, size: usize) -> Option<usize> {
     let mut iterator = tree.iter().peekable();
@@ -55,6 +70,14 @@ const PAGE_SIZE: usize = 4096;
 const FD_ANONYMOUS: usize = !0;
 pub fn fexec_impl(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_envs_size_without_nul: usize) -> Result<usize> {
+    let addrspace_selection_fd = fexec_impl_inner(file, path, args, envs, args_envs_size_without_nul)?;
+    // Dropping this FD will cause the address space switch.
+    drop(addrspace_selection_fd);
+    unreachable!();
+fn fexec_impl_inner(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_envs_size_without_nul: usize) -> Result<FdGuard> {
     use goblin::elf64::{header::Header, program_header::program_header64::{ProgramHeader, PT_LOAD, PF_W, PF_X}};
     let fd = *file as usize;
@@ -65,7 +88,6 @@ pub fn fexec_impl(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_
     // some misalignments, and then execute the SYS_EXEC syscall to replace the program memory
     // entirely.
-    // TODO: setuid/setgid
     // TODO: Introduce RAII guards to all owned allocations so that no leaks occur in case of
     // errors.
@@ -146,9 +168,7 @@ pub fn fexec_impl(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_
     let mut push = |word: usize| {
         sp -= core::mem::size_of::<usize>();
-        let _ = syscall::lseek(*memory_fd, sp as isize, SEEK_SET)?;
-        let _ = syscall::write(*memory_fd, &usize::to_ne_bytes(word))?;
-        Ok(())
+        write_all(*memory_fd, Some(sp as u64), &usize::to_ne_bytes(word))
     let pheaders_size_aligned = (pheaders_size+PAGE_SIZE-1)/PAGE_SIZE*PAGE_SIZE;
@@ -156,8 +176,7 @@ pub fn fexec_impl(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_
     tree.insert(pheaders, pheaders_size_aligned);
     mprotect_remote(*grants_fd, pheaders, pheaders_size_aligned, MapFlags::PROT_READ)?;
-    syscall::lseek(*memory_fd, pheaders as isize, SEEK_SET).map_err(|_| Error::new(EIO))?;
-    syscall::write(*memory_fd, &phs).map_err(|_| Error::new(EIO))?;
+    write_all(*memory_fd, Some(pheaders as u64), &phs)?;
@@ -186,8 +205,7 @@ pub fn fexec_impl(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_
             if is_args { argc += 1; }
             push(target_args_env_address + offset)?;
-            syscall::lseek(*memory_fd, (target_args_env_address + offset) as isize, SEEK_SET).map_err(|_| Error::new(EIO))?;
-            let _ = syscall::write(*memory_fd, source_slice).map_err(|_| Error::new(EIO))?;
+            write_all(*memory_fd, Some((target_args_env_address + offset) as u64), source_slice)?;
             offset += source_slice.len() + 1;
@@ -197,21 +215,15 @@ pub fn fexec_impl(file: File, path: &[u8], args: &[&[u8]], envs: &[&[u8]], args_
     unsafe { crate::ld_so::tcb::Tcb::deactivate(); }
     // TODO: Restore old name if exec failed?
-    if let Ok(name_fd) = syscall::open("thisproc:current/name", O_WRONLY) {
-        let _ = syscall::write(name_fd, path);
-        let _ = syscall::close(name_fd);
+    if let Ok(name_fd) = syscall::open("thisproc:current/name", O_WRONLY).map(FdGuard::new) {
+        let _ = syscall::write(*name_fd, path);
-    drop(file);
     let addrspace_selection_fd = FdGuard::new(syscall::open("thisproc:current/current-addrspace", O_WRONLY)?);
-    let mut buf = [0_u8; 24];
-    buf[..8].copy_from_slice(&usize::to_ne_bytes(*grants_fd));
-    buf[8..16].copy_from_slice(&usize::to_ne_bytes(sp));
-    buf[16..24].copy_from_slice(&usize::to_ne_bytes(header.e_entry as usize));
+    let _ = syscall::write(*addrspace_selection_fd, &create_set_addr_space_buf(*grants_fd, header.e_entry as usize, sp));
-    let _ = syscall::write(*addrspace_selection_fd, &buf);
-    unreachable!();
+    Ok(addrspace_selection_fd)
 fn mprotect_remote(socket: usize, addr: usize, len: usize, flags: MapFlags) -> Result<()> {
     let mut grants_buf = [0_u8; 24];
diff --git a/src/platform/redox/ b/src/platform/redox/
index dc7f4686..3e3ba46d 100644
--- a/src/platform/redox/
+++ b/src/platform/redox/
@@ -1,10 +1,4 @@
-use core::{mem, ptr, slice};
-use core::arch::global_asm;
-use syscall::data::Map;
-use syscall::flag::{MapFlags, O_CLOEXEC};
-use syscall::error::{Error, Result, EINVAL, ENAMETOOLONG};
-use syscall::SIGCONT;
+use core::{mem::size_of, ptr, slice};
 use crate::platform::{sys::e, types::*};
@@ -84,237 +78,11 @@ impl Drop for FdGuard {
-fn new_context() -> Result<(FdGuard, usize)> {
-    // Create a new context (fields such as uid/gid will be inherited from the current context).
-    let fd = FdGuard::new(syscall::open("thisproc:new/open_via_dup", O_CLOEXEC)?);
-    // Extract pid.
-    let mut buffer = [0_u8; 64];
-    let len = syscall::fpath(*fd, &mut buffer)?;
-    let buffer = buffer.get(..len).ok_or(Error::new(ENAMETOOLONG))?;
-    let colon_idx = buffer.iter().position(|c| *c == b':').ok_or(Error::new(EINVAL))?;
-    let slash_idx = buffer.iter().skip(colon_idx).position(|c| *c == b'/').ok_or(Error::new(EINVAL))? + colon_idx;
-    let pid_bytes = buffer.get(colon_idx + 1..slash_idx).ok_or(Error::new(EINVAL))?;
-    let pid_str = core::str::from_utf8(pid_bytes).map_err(|_| Error::new(EINVAL))?;
-    let pid = pid_str.parse::<usize>().map_err(|_| Error::new(EINVAL))?;
-    Ok((fd, pid))
-fn copy_str(cur_pid_fd: usize, new_pid_fd: usize, key: &str) -> Result<()> {
-    let cur_name_fd = FdGuard::new(syscall::dup(cur_pid_fd, key.as_bytes())?);
-    let new_name_fd = FdGuard::new(syscall::dup(new_pid_fd, key.as_bytes())?);
-    let mut buf = [0_u8; 256];
-    let len = syscall::read(*cur_name_fd, &mut buf)?;
-    let buf = buf.get(..len).ok_or(Error::new(ENAMETOOLONG))?;
-    syscall::write(*new_name_fd, &buf)?;
-    Ok(())
-#[cfg(target_arch = "x86_64")]
-fn copy_float_env_regs(cur_pid_fd: usize, new_pid_fd: usize) -> Result<()> {
-    // Copy environment registers.
-    {
-        let cur_env_regs_fd = FdGuard::new(syscall::dup(cur_pid_fd, b"regs/env")?);
-        let new_env_regs_fd = FdGuard::new(syscall::dup(new_pid_fd, b"regs/env")?);
-        let mut env_regs = syscall::EnvRegisters::default();
-        let _ = syscall::read(*cur_env_regs_fd, &mut env_regs)?;
-        let _ = syscall::write(*new_env_regs_fd, &env_regs)?;
-    }
-    // Copy float registers.
-    {
-        let cur_float_regs_fd = FdGuard::new(syscall::dup(cur_pid_fd, b"regs/float")?);
-        let new_float_regs_fd = FdGuard::new(syscall::dup(new_pid_fd, b"regs/float")?);
-        let mut float_regs = syscall::FloatRegisters::default();
-        let _ = syscall::read(*cur_float_regs_fd, &mut float_regs)?;
-        let _ = syscall::write(*new_float_regs_fd, &float_regs)?;
-    }
-    Ok(())
-/// Spawns a new context sharing the same address space as the current one (i.e. a new thread).
-pub unsafe fn pte_clone_impl(stack: *mut usize) -> Result<usize> {
-    let cur_pid_fd = FdGuard::new(syscall::open("thisproc:current/open_via_dup", O_CLOEXEC)?);
-    let (new_pid_fd, new_pid) = new_context()?;
-    // Allocate a new signal stack.
-    {
-        let sigstack_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"sigstack")?);
-        const SIGSTACK_SIZE: usize = 1024 * 256;
-        // TODO: Put sigstack at high addresses?
-        let target_sigstack = syscall::fmap(!0, &Map { address: 0, flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE, offset: 0, size: SIGSTACK_SIZE })? + SIGSTACK_SIZE;
-        let _ = syscall::write(*sigstack_fd, &usize::to_ne_bytes(target_sigstack))?;
-    }
-    copy_str(*cur_pid_fd, *new_pid_fd, "name")?;
-    copy_str(*cur_pid_fd, *new_pid_fd, "cwd")?;
-    // Reuse existing address space
-    {
-        let cur_addr_space_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"addrspace")?);
-        let new_addr_space_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-addrspace")?);
-        let buf = create_set_addr_space_buf(*cur_addr_space_fd, pte_clone_ret as usize, stack as usize);
-        let _ = syscall::write(*new_addr_space_sel_fd, &buf)?;
-    }
-    // Reuse file table
-    {
-        let cur_filetable_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"filetable")?);
-        let new_filetable_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-filetable")?);
-        let _ = syscall::write(*new_filetable_sel_fd, &usize::to_ne_bytes(*cur_filetable_fd))?;
-    }
-    copy_float_env_regs(*cur_pid_fd, *new_pid_fd)?;
-    // Unblock context. 
-    syscall::kill(new_pid, SIGCONT);
-    Ok(0)
-fn create_set_addr_space_buf(space: usize, ip: usize, sp: usize) -> [u8; mem::size_of::<usize>() * 3] {
-    let mut buf = [0_u8; 3 * mem::size_of::<usize>()];
-    let mut chunks = buf.array_chunks_mut::<{mem::size_of::<usize>()}>();
+pub fn create_set_addr_space_buf(space: usize, ip: usize, sp: usize) -> [u8; size_of::<usize>() * 3] {
+    let mut buf = [0_u8; 3 * size_of::<usize>()];
+    let mut chunks = buf.array_chunks_mut::<{size_of::<usize>()}>();
     * = usize::to_ne_bytes(space);
     * = usize::to_ne_bytes(sp);
     * = usize::to_ne_bytes(ip);
-/// Spawns a new context which will not share the same address space as the current one. File
-/// descriptors from other schemes are reobtained with `dup`, and grants referencing such file
-/// descriptors are reobtained through `fmap`. Other mappings are kept but duplicated using CoW.
-pub fn fork_impl() -> Result<usize> {
-    unsafe {
-        Error::demux(fork_wrapper())
-    }
-fn fork_inner(initial_rsp: *mut usize) -> Result<usize> {
-    let new_pid = {
-        let cur_pid_fd = FdGuard::new(syscall::open("thisproc:current/open_via_dup", O_CLOEXEC)?);
-        let (new_pid_fd, new_pid) = new_context()?;
-        // Do not allocate new signal stack, but copy existing address (all memory will be re-mapped
-        // CoW later).
-        {
-            let cur_sigstack_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"sigstack")?);
-            let new_sigstack_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"sigstack")?);
-            let mut sigstack_buf = usize::to_ne_bytes(0);
-            let _ = syscall::read(*cur_sigstack_fd, &mut sigstack_buf);
-            let _ = syscall::write(*new_sigstack_fd, &sigstack_buf);
-        }
-        copy_str(*cur_pid_fd, *new_pid_fd, "name")?;
-        copy_str(*cur_pid_fd, *new_pid_fd, "cwd")?;
-        // CoW-duplicate address space.
-        {
-            let cur_addr_space_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"addrspace")?);
-            // FIXME: Find mappings which use external file descriptors
-            let new_addr_space_fd = FdGuard::new(syscall::dup(*cur_addr_space_fd, b"exclusive")?);
-            let new_addr_space_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-addrspace")?);
-            let buf = create_set_addr_space_buf(*new_addr_space_fd, fork_ret as usize, initial_rsp as usize);
-            let _ = syscall::write(*new_addr_space_sel_fd, &buf)?;
-        }
-        // Copy existing files into new file table, but do not reuse the same file table (i.e. new
-        // parent FDs will not show up for the child).
-        {
-            let cur_filetable_fd = FdGuard::new(syscall::dup(*cur_pid_fd, b"filetable")?);
-            // TODO: Use cross_scheme_links or something similar to avoid copying the file table in the
-            // kernel.
-            let new_filetable_fd = FdGuard::new(syscall::dup(*cur_filetable_fd, b"copy")?);
-            let new_filetable_sel_fd = FdGuard::new(syscall::dup(*new_pid_fd, b"current-filetable")?);
-            let _ = syscall::write(*new_filetable_sel_fd, &usize::to_ne_bytes(*new_filetable_fd));
-        }
-        copy_float_env_regs(*cur_pid_fd, *new_pid_fd)?;
-        new_pid
-    };
-    // Unblock context.
-    syscall::kill(new_pid, SIGCONT);
-    Ok(new_pid)
-unsafe extern "sysv64" fn __relibc_internal_fork_impl(initial_rsp: *mut usize) -> usize {
-    Error::mux(fork_inner(initial_rsp))
-    .p2align 6
-    .globl fork_wrapper
-    .type fork_wrapper, @function
-    push rbp
-    mov rbp, rsp
-    push rbx
-    push rbp
-    push r12
-    push r13
-    push r14
-    push r15
-    mov rdi, rsp
-    call __relibc_internal_fork_impl
-    jmp 2f
-    xor rax, rax
-    pop r15
-    pop r14
-    pop r13
-    pop r12
-    pop rbp
-    pop rbx
-    pop rbp
-    ret
-    .size fork_wrapper, . - fork_wrapper
-    .globl pte_clone_ret
-    .type pte_clone_ret, @function
-    # Load registers
-    pop rax
-    pop rdi
-    pop rsi
-    pop rdx
-    pop rcx
-    pop r8
-    pop r9
-    # Call entry point
-    call rax
-    ret
-    .size pte_clone_ret, . - pte_clone_ret
-extern "sysv64" {
-    fn fork_wrapper() -> usize;
-    fn fork_ret();
-    fn pte_clone_ret();
diff --git a/src/platform/redox/ b/src/platform/redox/
index a51b56dc..ab5deedb 100644
--- a/src/platform/redox/
+++ b/src/platform/redox/
@@ -34,6 +34,7 @@ use super::{errno, types::*, Pal, Read};
 static mut BRK_CUR: *mut c_void = ptr::null_mut();
 static mut BRK_END: *mut c_void = ptr::null_mut();
+mod clone;
 mod epoll;
 mod exec;
 mod extra;
@@ -355,6 +356,10 @@ impl Pal for Sys {
         // Close all O_CLOEXEC file descriptors. TODO: close_range?
+            // NOTE: This approach of implementing O_CLOEXEC will not work in multithreaded
+            // scenarios. While execve() is undefined according to POSIX if there exist sibling
+            // threads, it could still be allowed by keeping certain file descriptors and instead
+            // set the active file table.
             let name = CStr::from_bytes_with_nul(b"thisproc:current/filetable\0").expect("string should be valid");
             let files_fd = match File::open(name, fcntl::O_RDONLY) {
                 Ok(f) => f,
@@ -398,7 +403,9 @@ impl Pal for Sys {
             // TODO: Plus, at this point fexecve is not implemented (but specified in
             // POSIX.1-2008), and to avoid bad syscalls such as fpath, passing a file descriptor
             // would be better.
-            escalate_fd.write_all(path.to_bytes());
+            if escalate_fd.write_all(path.to_bytes()).is_err() {
+                return -1;
+            }
             // Second, we write the flattened args and envs with NUL characters separating
             // individual items.
@@ -455,7 +462,7 @@ impl Pal for Sys {
     fn fork() -> pid_t {
-        e(extra::fork_impl()) as pid_t
+        e(clone::fork_impl()) as pid_t
     fn fstat(fildes: c_int, buf: *mut stat) -> c_int {
@@ -938,7 +945,7 @@ impl Pal for Sys {
     #[cfg(target_arch = "x86_64")]
     unsafe fn pte_clone(stack: *mut usize) -> pid_t {
-        e(extra::pte_clone_impl(stack)) as pid_t
+        e(clone::pte_clone_impl(stack)) as pid_t
     fn read(fd: c_int, buf: &mut [u8]) -> ssize_t {