diff --git a/Cargo.lock b/Cargo.lock
index 6f2ce4c5c5274c5e3fc84824ba5b7f782c0325e6..34baa0b6abecafc77ec49e2247e1f1b91678d108 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -390,9 +390,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.4"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853"
+checksum = "62871f2d65009c0256aed1b9cfeeb8ac272833c404e13d53d400cd0dad7a2ac0"
 dependencies = [
  "bitflags",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index cc46f4547644f1de3c0074779e8c6daae9db2a3d..668f1ac578f9e125d2b8bfbe07a18c2369dc5f2e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -59,7 +59,7 @@ features = ["c_api"]
 sc = "0.2.3"
 
 [target.'cfg(target_os = "redox")'.dependencies]
-redox_syscall = "0.5.4"
+redox_syscall = "0.5.5"
 redox-rt = { path = "redox-rt" }
 redox-path = "0.2"
 redox_event = { git = "https://gitlab.redox-os.org/redox-os/event.git", default-features = false, features = ["redox_syscall"] }
diff --git a/include/bits/signal.h b/include/bits/signal.h
index 57832b5e3699c5cfdfa01b4d80faf64682e5a26d..56ffb0ec62fc615075a8a389c1d3f20c026dd698 100644
--- a/include/bits/signal.h
+++ b/include/bits/signal.h
@@ -5,9 +5,10 @@
 #define SIG_IGN ((void (*)(int))1)
 #define SIG_ERR ((void (*)(int))-1)
 
-struct siginfo;
 typedef struct siginfo siginfo_t;
 typedef unsigned long long sigset_t;
+typedef struct ucontext ucontext_t;
+typedef struct mcontext mcontext_t;
 
 struct sigaction {
   union {
diff --git a/redox-rt/src/arch/aarch64.rs b/redox-rt/src/arch/aarch64.rs
index 02ac5341534c374f7876e64f8bad112347b3686b..0e4dbbc76a924e4977865b51ef828d70f8a77ae5 100644
--- a/redox-rt/src/arch/aarch64.rs
+++ b/redox-rt/src/arch/aarch64.rs
@@ -1,10 +1,10 @@
-use core::mem::offset_of;
+use core::{mem::offset_of, ptr::NonNull};
 
 use syscall::{data::*, error::*};
 
 use crate::{
     proc::{fork_inner, FdGuard},
-    signal::{inner_c, RtSigarea, SigStack, PROC_CONTROL_STRUCT},
+    signal::{inner_c, PosixStackt, RtSigarea, SigStack, PROC_CONTROL_STRUCT},
     RtTcb, Tcb,
 };
 
@@ -19,11 +19,15 @@ pub struct SigArea {
     pub altstack_bottom: usize,
     pub tmp_x1_x2: [usize; 2],
     pub tmp_x3_x4: [usize; 2],
+    pub tmp_x5_x6: [usize; 2],
     pub tmp_sp: usize,
     pub onstack: u64,
     pub disable_signals_depth: u64,
     pub pctl: usize, // TODO: remove
     pub last_sig_was_restart: bool,
+    pub last_sigstack: Option<NonNull<SigStack>>,
+    pub tmp_rt_inf: RtSigInfo,
+    pub tmp_id_inf: u64,
 }
 #[repr(C)]
 #[derive(Debug, Default)]
@@ -150,75 +154,202 @@ asmfunction!(__relibc_internal_fork_ret: ["
     ret
 "] <= [child_hook = sym child_hook]);
 
+// https://devblogs.microsoft.com/oldnewthing/20220811-00/?p=106963
 asmfunction!(__relibc_internal_sigentry: ["
-    // old pc and x0 are saved in the sigcontrol struct
+    // Clear any active reservation.
+    clrex
+
+    // The old pc and x0 are saved in the sigcontrol struct.
     mrs x0, tpidr_el0 // ABI ptr
     ldr x0, [x0] // TCB ptr
 
-    // save x1-x3 and sp
+    // Save x1-x6 and sp
     stp x1, x2, [x0, #{tcb_sa_off} + {sa_tmp_x1_x2}]
     stp x3, x4, [x0, #{tcb_sa_off} + {sa_tmp_x3_x4}]
+    stp x5, x6, [x0, #{tcb_sa_off} + {sa_tmp_x5_x6}]
     mov x1, sp
     str x1, [x0, #{tcb_sa_off} + {sa_tmp_sp}]
 
-    sub x1, x1, 128
-    and x1, x1, -16
-    mov sp, x1
+    ldr x6, [x0, #{tcb_sa_off} + {sa_pctl}]
+1:
+    // Load x1 with the thread's bits
+    add x5, x0, #{tcb_sc_off} + {sc_word}
+    ldaxr x1, [x5]
+
+    // First check if there are standard thread signals,
+    and x4, x1, x1, lsr #32 // x4 := x1 & (x1 >> 32)
+    cbnz x4, 3f // jump if x4 != 0
+    clrex
+
+    // and if not, load process pending bitset.
+    add x5, x6, #{pctl_pending}
+    ldaxr x2, [x5]
+
+    // Check if there are standard proc signals:
+    lsr x3, x1, #32 // mask
+    and w3, w2, w3 // pending unblocked proc
+    cbz w3, 4f // skip 'fetch_andn' step if zero
+
+    // If there was one, find which one, and try clearing the bit (last value in x3, addr in x6)
+    // this picks the MSB rather than the LSB, unlike x86. POSIX does not require any specific
+    // ordering though.
+    clz w3, w3
+    mov w4, #31
+    sub w3, w4, w3
+    // x3 now contains the sig_idx
+
+    mov x4, #1
+    lsl x4, x4, x3 // bit to remove
+
+    sub x4, x2, x4 // bit was certainly set, so sub is allowed
+    // x4 is now the new mask to be set
+    add x5, x6, #{pctl_pending}
+
+    add x2, x5, #{pctl_sender_infos}
+    add x2, x2, w3, uxtb 3
+    ldar x2, [x2]
+
+    // Try clearing the bit, retrying on failure.
+    stxr w1, x4, [x5] // try setting pending set to x4, set w1 := 0 on success
+    cbnz w1, 1b // retry everything if this fails
+    mov x1, x3
+    b 2f
+4:
+    // Check for realtime signals, thread/proc.
+    clrex
+
+    // Load the pending set again. TODO: optimize this?
+    add x1, x6, #{pctl_pending}
+    ldaxr x2, [x1]
+    lsr x2, x2, #32
+
+    add x5, x0, #{tcb_sc_off} + {sc_word} + 8
+    ldar x1, [x5]
+
+    orr x2, x1, x2
+    and x2, x2, x2, lsr #32
+    cbz x2, 7f
+
+    rbit x3, x2
+    clz x3, x3
+    mov x4, #31
+    sub x2, x4, x3
+    // x2 now contains sig_idx - 32
+
+    // If realtime signal was directed at thread, handle it as an idempotent signal.
+    lsr x3, x1, x2
+    tbnz x3, #0, 5f
+
+    mov x5, x0
+    mov x4, x8
+    mov x8, #{SYS_SIGDEQUEUE}
+    mov x0, x1
+    add x1, x0, #{tcb_sa_off} + {sa_tmp_rt_inf}
+    svc 0
+    mov x0, x5
+    mov x8, x4
+    cbnz x0, 1b
+
+    b 2f
+5:
+    // A realtime signal was sent to this thread, try clearing its bit.
+    // x3 contains last rt signal word, x2 contains rt_idx
+    clrex
+
+    // Calculate the absolute sig_idx
+    add x1, x3, 32
+
+    // Load si_pid and si_uid
+    add x2, x0, #{tcb_sc_off} + {sc_sender_infos}
+    add x2, x2, w1, uxtb #3
+    ldar x2, [x2]
+
+    add x3, x0, #{tcb_sc_off} + {sc_word} + 8
+    ldxr x2, [x3]
+
+    // Calculate new mask
+    mov x4, #1
+    lsl x4, x4, x2
+    sub x2, x2, x4 // remove bit
+
+    stxr w5, x2, [x3]
+    cbnz w5, 1b
+    str x2, [x0, #{tcb_sa_off} + {sa_tmp_id_inf}]
+    b 2f
+3:
+    // A standard signal was sent to this thread, try clearing its bit.
+    clz x1, x1
+    mov x2, #31
+    sub x1, x2, x1
 
-    ldr x3, [x0, #{tcb_sa_off} + {sa_pctl}]
+    // Load si_pid and si_uid
+    add x2, x0, #{tcb_sc_off} + {sc_sender_infos}
+    add x2, x2, w1, uxtb #3
+    ldar x2, [x2]
 
-    // load x1 and x2 with each word (tearing between x1 and x2 can occur)
-    // acquire ordering
-    add x2, x0, #{tcb_sc_off} + {sc_word}
-    ldaxp x1, x2, [x2]
+    // Clear bit from mask
+    mov x3, #1
+    lsl x3, x3, x1
+    sub x4, x4, x3
 
-    // reduce them by ANDing the upper and lower 32 bits
-    and x1, x1, x1, lsr #32 // put result in lo half
-    and x2, x2, x2, lsl #32 // put result in hi half
-    orr x1, x1, x2 // combine them into the set of pending unblocked
+    // Try updating the mask
+    stxr w3, x1, [x5]
+    cbnz w3, 1b
 
-    // count trailing zeroes, to find signal bit
-    rbit x1, x1
-    clz x1, x1
-    mov x2, #32
-    sub x1, x2, x1
-
-    // TODO: NOT ATOMIC!
+    str x2, [x0, #{tcb_sa_off} + {sa_tmp_id_inf}]
+2:
+    ldr x3, [x0, #{tcb_sa_off} + {sa_pctl}]
+    add x2, x2, {pctl_actions}
     add x2, x3, w1, uxtb #4 // actions_base + sig_idx * sizeof Action
+    // TODO: NOT ATOMIC (tearing allowed between regs)!
     ldxp x2, x3, [x2]
+    clrex
+
+    // Calculate new sp wrt redzone and alignment
+    mov x4, sp
+    sub x4, x4, {REDZONE_SIZE}
+    and x4, x4, -{STACK_ALIGN}
+    mov sp, x4
 
     // skip sigaltstack step if SA_ONSTACK is clear
-    // tbz x2, #57, 2f
-2:
+    // tbz x2, #{SA_ONSTACK_BIT}, 2f
+
     ldr x2, [x0, #{tcb_sc_off} + {sc_saved_pc}]
     ldr x3, [x0, #{tcb_sc_off} + {sc_saved_x0}]
-    stp x2, x3, [sp], #-16
+    stp x2, x3, [sp, #-16]!
 
     ldr x2, [x0, #{tcb_sa_off} + {sa_tmp_sp}]
     mrs x3, nzcv
-    stp x2, x3, [sp], #-16
+    stp x2, x3, [sp, #-16]!
 
     ldp x2, x3, [x0, #{tcb_sa_off} + {sa_tmp_x1_x2}]
-    stp x2, x3, [sp], #-16
+    stp x2, x3, [sp, #-16]!
     ldp x3, x4, [x0, #{tcb_sa_off} + {sa_tmp_x3_x4}]
-    stp x4, x3, [sp], #-16
-    stp x6, x5, [sp], #-16
-    stp x8, x7, [sp], #-16
-    stp x10, x9, [sp], #-16
-    stp x12, x11, [sp], #-16
-    stp x14, x13, [sp], #-16
-    stp x16, x15, [sp], #-16
-    stp x18, x17, [sp], #-16
-    stp x20, x19, [sp], #-16
-    stp x22, x21, [sp], #-16
-    stp x24, x23, [sp], #-16
-    stp x26, x25, [sp], #-16
-    stp x28, x27, [sp], #-16
-    stp x30, x29, [sp], #-16
+    stp x4, x3, [sp, #-16]!
+    ldp x5, x6, [x0, #{tcb_sa_off} + {sa_tmp_x5_x6}]
+    stp x6, x5, [sp, #-16]!
+
+    stp x8, x7, [sp, #-16]!
+    stp x10, x9, [sp, #-16]!
+    stp x12, x11, [sp, #-16]!
+    stp x14, x13, [sp, #-16]!
+    stp x16, x15, [sp, #-16]!
+    stp x18, x17, [sp, #-16]!
+    stp x20, x19, [sp, #-16]!
+    stp x22, x21, [sp, #-16]!
+    stp x24, x23, [sp, #-16]!
+    stp x26, x25, [sp, #-16]!
+    stp x28, x27, [sp, #-16]!
+    stp x30, x29, [sp, #-16]!
+
+    str w1, [sp, #-4]
+    sub sp, sp, #64
 
     mov x0, sp
     bl {inner}
 
+    add sp, sp, #64
+
     ldp x30, x29, [sp], #16
     ldp x28, x27, [sp], #16
     ldp x26, x25, [sp], #16
@@ -234,28 +365,56 @@ asmfunction!(__relibc_internal_sigentry: ["
     ldp x6, x5, [sp], #16
     ldp x4, x3, [sp], #16
     ldp x2, x1, [sp], #16
+
     ldr x0, [sp, #8]
     msr nzcv, x0
 
+8:
     // x18 is reserved by ABI as 'platform register', so clobbering it should be safe.
     mov x18, sp
-    ldr x0, [sp], #16
+    ldr x0, [x18]
     mov sp, x0
-    mov x0, x18
 
-    ldp x18, x0, [x0]
+    ldp x18, x0, [x18, #16]
+    br x18
+7:
+    // Spurious signal, i.e. all bitsets were 0 at the time they were checked
+    clrex
+
+    ldr x1, [x0, #{tcb_sc_off} + {sc_flags}]
+    and x1, x1, ~1
+    str x1, [x0, #{tcb_sc_off} + {sc_flags}]
+
+    ldp x1, x2, [x0, #{tcb_sa_off} + {sa_tmp_x1_x2}]
+    ldp x3, x4, [x0, #{tcb_sa_off} + {sa_tmp_x3_x4}]
+    ldp x5, x6, [x0, #{tcb_sa_off} + {sa_tmp_x5_x6}]
+    ldr x18, [x0, #{tcb_sc_off} + {sc_saved_pc}]
+    ldr x0, [x0, #{tcb_sc_off} + {sc_saved_x0}]
     br x18
 "] <= [
+    pctl_pending = const (offset_of!(SigProcControl, pending)),
+    pctl_actions = const (offset_of!(SigProcControl, actions)),
+    pctl_sender_infos = const (offset_of!(SigProcControl, sender_infos)),
     tcb_sc_off = const (offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control)),
     tcb_sa_off = const (offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, arch)),
     sa_tmp_x1_x2 = const offset_of!(SigArea, tmp_x1_x2),
     sa_tmp_x3_x4 = const offset_of!(SigArea, tmp_x3_x4),
+    sa_tmp_x5_x6 = const offset_of!(SigArea, tmp_x5_x6),
     sa_tmp_sp = const offset_of!(SigArea, tmp_sp),
+    sa_tmp_rt_inf = const offset_of!(SigArea, tmp_rt_inf),
+    sa_tmp_id_inf = const offset_of!(SigArea, tmp_id_inf),
     sa_pctl = const offset_of!(SigArea, pctl),
     sc_saved_pc = const offset_of!(Sigcontrol, saved_ip),
     sc_saved_x0 = const offset_of!(Sigcontrol, saved_archdep_reg),
+    sc_sender_infos = const offset_of!(Sigcontrol, sender_infos),
     sc_word = const offset_of!(Sigcontrol, word),
+    sc_flags = const offset_of!(Sigcontrol, control_flags),
     inner = sym inner_c,
+
+    SA_ONSTACK_BIT = const 58, // (1 << 58) >> 32 = 0x0400_0000
+    SYS_SIGDEQUEUE = const syscall::SYS_SIGDEQUEUE,
+    STACK_ALIGN = const 16,
+    REDZONE_SIZE = const 128,
 ]);
 
 asmfunction!(__relibc_internal_rlct_clone_ret: ["
@@ -294,4 +453,10 @@ pub unsafe fn manually_enter_trampoline() {
     ", inout("x0") ip_location => _, out("lr") _);
 }
 
-pub unsafe fn arch_pre(stack: &mut SigStack, os: &mut SigArea) {}
+pub unsafe fn arch_pre(stack: &mut SigStack, os: &mut SigArea) -> PosixStackt {
+    PosixStackt {
+        sp: core::ptr::null_mut(), // TODO
+        size: 0,                   // TODO
+        flags: 0,                  // TODO
+    }
+}
diff --git a/redox-rt/src/arch/i686.rs b/redox-rt/src/arch/i686.rs
index 1f0cbdd9da857db164cd6106ce8f7ad58b94032a..71319f50d5f2e16f8dcfdd3a4b616214f22fdb3d 100644
--- a/redox-rt/src/arch/i686.rs
+++ b/redox-rt/src/arch/i686.rs
@@ -1,10 +1,10 @@
-use core::{mem::offset_of, sync::atomic::Ordering};
+use core::{mem::offset_of, ptr::NonNull, sync::atomic::Ordering};
 
 use syscall::*;
 
 use crate::{
     proc::{fork_inner, FdGuard},
-    signal::{inner_fastcall, RtSigarea, SigStack, PROC_CONTROL_STRUCT},
+    signal::{inner_fastcall, PosixStackt, RtSigarea, SigStack, PROC_CONTROL_STRUCT},
     RtTcb,
 };
 
@@ -22,9 +22,13 @@ pub struct SigArea {
     pub tmp_eax: usize,
     pub tmp_ecx: usize,
     pub tmp_edx: usize,
+    pub tmp_rt_inf: RtSigInfo,
+    pub tmp_id_inf: u64,
+    pub tmp_mm0: u64,
     pub pctl: usize, // TODO: reference pctl directly
     pub disable_signals_depth: u64,
     pub last_sig_was_restart: bool,
+    pub last_sigstack: Option<NonNull<SigStack>>,
 }
 #[derive(Debug, Default)]
 #[repr(C, align(16))]
@@ -81,11 +85,7 @@ unsafe extern "cdecl" fn fork_impl(initial_rsp: *mut usize) -> usize {
 
 unsafe extern "cdecl" fn child_hook(cur_filetable_fd: usize, new_pid_fd: usize) {
     let _ = syscall::close(cur_filetable_fd);
-    // TODO: Currently pidfd == threadfd, but this will not be the case later.
-    RtTcb::current()
-        .thr_fd
-        .get()
-        .write(Some(FdGuard::new(new_pid_fd)));
+    crate::child_hook_common(FdGuard::new(new_pid_fd));
 }
 
 asmfunction!(__relibc_internal_fork_wrapper -> usize: ["
@@ -148,41 +148,67 @@ asmfunction!(__relibc_internal_sigentry: ["
     mov ecx, gs:[{tcb_sa_off} + {sa_pctl}]
 
     // Read standard signal word - for the process
-    mov eax, [ecx + {pctl_word}]
+    mov eax, [ecx + {pctl_pending}]
     and eax, edx
-    jz 3f
     bsf eax, eax
+    jz 3f
+
+    // Read si_pid and si_uid, atomically.
+    movq gs:[{tcb_sa_off} + {sa_tmp_mm0}], mm0
+    movq mm0, [ecx + {pctl_sender_infos} + eax * 8]
+    movq gs:[{tcb_sa_off} + {sa_tmp_id_inf}], mm0
+    movq mm0, gs:[{tcb_sa_off} + {sa_tmp_mm0}]
 
     // Try clearing the pending bit, otherwise retry if another thread did that first
-    lock btr [ecx + {pctl_word}], eax
+    lock btr [ecx + {pctl_pending}], eax
     jnc 1b
     jmp 2f
 3:
     // Read realtime thread and process signal word together
-    mov edx, [ecx + {pctl_word} + 4]
+    mov edx, [ecx + {pctl_pending} + 4]
     mov eax, gs:[{tcb_sc_off} + {sc_word} + 8]
     or eax, edx
     and eax, gs:[{tcb_sc_off} + {sc_word} + 12]
     jz 7f // spurious signal
     bsf eax, eax
 
+    // If thread was specifically targeted, send the signal to it first.
     bt edx, eax
     jc 8f
 
-    lock btr [ecx + {pctl_word} + 4], eax
-    jnc 1b
-    add eax, 32
+    mov edx, ebx
+    lea ecx, [eax+32]
+    mov eax, {SYS_SIGDEQUEUE}
+    mov edx, gs:[0]
+    add edx, {tcb_sa_off} + {sa_tmp_rt_inf}
+    int 0x80
+    mov ebx, edx
+    test eax, eax
+    jnz 1b
+
+    mov eax, ecx
     jmp 2f
 8:
     add eax, 32
 9:
+    // Read si_pid and si_uid, atomically.
+    movq gs:[{tcb_sa_off} + {sa_tmp_mm0}], mm0
+    movq mm0, gs:[{tcb_sc_off} + {sc_sender_infos} + eax * 8]
+    movq gs:[{tcb_sa_off} + {sa_tmp_id_inf}], mm0
+    movq mm0, gs:[{tcb_sa_off} + {sa_tmp_mm0}]
+    mov edx, eax
+    shr edx, 5
+    mov ecx, eax
+    and ecx, 31
+    lock btr gs:[{tcb_sc_off} + {sc_word} + edx * 8], ecx
+
     add eax, 64
 2:
     and esp, -{STACK_ALIGN}
 
     mov edx, eax
     add edx, edx
-    bt dword ptr [{pctl} + {pctl_off_actions} + edx * 8 + 4], 28
+    bt dword ptr [{pctl} + {pctl_actions} + edx * 8 + 4], 28
     jnc 4f
 
     mov edx, gs:[{tcb_sa_off} + {sa_altstack_top}]
@@ -200,7 +226,7 @@ asmfunction!(__relibc_internal_sigentry: ["
     push dword ptr gs:[{tcb_sc_off} + {sc_saved_eflags}]
 
     push dword ptr gs:[{tcb_sa_off} + {sa_tmp_edx}]
-    push ecx
+    push dword ptr gs:[{tcb_sa_off} + {sa_tmp_ecx}]
     push dword ptr gs:[{tcb_sa_off} + {sa_tmp_eax}]
     push ebx
     push edi
@@ -210,14 +236,14 @@ asmfunction!(__relibc_internal_sigentry: ["
     sub esp, 2 * 4 + 29 * 16
     fxsave [esp]
 
-    push eax
-    sub esp, 3 * 4
+    mov [esp - 4], eax
+    sub esp, 48
 
     mov ecx, esp
     call {inner}
 
-    fxrstor [esp + 16]
-    add esp, 16 + 29 * 16 + 2 * 4
+    fxrstor [esp + 48]
+    add esp, 48 + 29 * 16 + 2 * 4
 
     pop ebp
     pop esi
@@ -262,6 +288,9 @@ __relibc_internal_sigentry_crit_third:
     sa_tmp_eax = const offset_of!(SigArea, tmp_eax),
     sa_tmp_ecx = const offset_of!(SigArea, tmp_ecx),
     sa_tmp_edx = const offset_of!(SigArea, tmp_edx),
+    sa_tmp_mm0 = const offset_of!(SigArea, tmp_mm0),
+    sa_tmp_rt_inf = const offset_of!(SigArea, tmp_rt_inf),
+    sa_tmp_id_inf = const offset_of!(SigArea, tmp_id_inf),
     sa_altstack_top = const offset_of!(SigArea, altstack_top),
     sa_altstack_bottom = const offset_of!(SigArea, altstack_bottom),
     sa_pctl = const offset_of!(SigArea, pctl),
@@ -269,12 +298,15 @@ __relibc_internal_sigentry_crit_third:
     sc_saved_eflags = const offset_of!(Sigcontrol, saved_archdep_reg),
     sc_saved_eip = const offset_of!(Sigcontrol, saved_ip),
     sc_word = const offset_of!(Sigcontrol, word),
+    sc_sender_infos = const offset_of!(Sigcontrol, sender_infos),
     tcb_sa_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, arch),
     tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control),
-    pctl_off_actions = const offset_of!(SigProcControl, actions),
-    pctl_word = const offset_of!(SigProcControl, pending),
+    pctl_actions = const offset_of!(SigProcControl, actions),
+    pctl_sender_infos = const offset_of!(SigProcControl, sender_infos),
+    pctl_pending = const offset_of!(SigProcControl, pending),
     pctl = sym PROC_CONTROL_STRUCT,
     STACK_ALIGN = const 16,
+    SYS_SIGDEQUEUE = const syscall::SYS_SIGDEQUEUE,
 ]);
 
 asmfunction!(__relibc_internal_rlct_clone_ret -> usize: ["
@@ -300,16 +332,21 @@ extern "C" {
     fn __relibc_internal_sigentry_crit_second();
     fn __relibc_internal_sigentry_crit_third();
 }
-pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) {
+pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) -> PosixStackt {
     if stack.regs.eip == __relibc_internal_sigentry_crit_first as usize {
         let stack_ptr = stack.regs.esp as *const usize;
         stack.regs.esp = stack_ptr.read();
         stack.regs.eip = stack_ptr.sub(1).read();
-    } else if stack.regs.eip == __relibc_internal_sigentry_crit_second as usize {
-        stack.regs.eip = area.tmp_eip;
-    } else if stack.regs.eip == __relibc_internal_sigentry_crit_third as usize {
+    } else if stack.regs.eip == __relibc_internal_sigentry_crit_second as usize
+        || stack.regs.eip == __relibc_internal_sigentry_crit_third as usize
+    {
         stack.regs.eip = area.tmp_eip;
     }
+    PosixStackt {
+        sp: stack.regs.esp as *mut (),
+        size: 0,  // TODO
+        flags: 0, // TODO
+    }
 }
 #[no_mangle]
 pub unsafe fn manually_enter_trampoline() {
diff --git a/redox-rt/src/arch/x86_64.rs b/redox-rt/src/arch/x86_64.rs
index d0d0f9bf047014161ba7137b3b0b460822f7d974..f1e5827ece01663e6ab1946b4b8c2e59c20465da 100644
--- a/redox-rt/src/arch/x86_64.rs
+++ b/redox-rt/src/arch/x86_64.rs
@@ -1,16 +1,21 @@
 use core::{
     mem::offset_of,
+    ptr::NonNull,
     sync::atomic::{AtomicU8, Ordering},
 };
 
 use syscall::{
     data::{SigProcControl, Sigcontrol},
     error::*,
+    RtSigInfo,
 };
 
 use crate::{
     proc::{fork_inner, FdGuard},
-    signal::{inner_c, RtSigarea, SigStack, PROC_CONTROL_STRUCT},
+    signal::{
+        get_sigaltstack, inner_c, PosixStackt, RtSigarea, SigStack, Sigaltstack,
+        PROC_CONTROL_STRUCT,
+    },
     RtTcb, Tcb,
 };
 
@@ -25,11 +30,16 @@ pub struct SigArea {
     pub tmp_rsp: usize,
     pub tmp_rax: usize,
     pub tmp_rdx: usize,
+    pub tmp_rdi: usize,
+    pub tmp_rsi: usize,
+    pub tmp_rt_inf: RtSigInfo,
+    pub tmp_id_inf: u64,
 
     pub altstack_top: usize,
     pub altstack_bottom: usize,
     pub disable_signals_depth: u64,
     pub last_sig_was_restart: bool,
+    pub last_sigstack: Option<NonNull<SigStack>>,
 }
 
 #[repr(C, align(16))]
@@ -91,11 +101,7 @@ unsafe extern "sysv64" fn fork_impl(initial_rsp: *mut usize) -> usize {
 
 unsafe extern "sysv64" fn child_hook(cur_filetable_fd: usize, new_pid_fd: usize) {
     let _ = syscall::close(cur_filetable_fd);
-    // TODO: Currently pidfd == threadfd, but this will not be the case later.
-    RtTcb::current()
-        .thr_fd
-        .get()
-        .write(Some(FdGuard::new(new_pid_fd)));
+    crate::child_hook_common(FdGuard::new(new_pid_fd));
 }
 
 asmfunction!(__relibc_internal_fork_wrapper -> usize: ["
@@ -170,6 +176,8 @@ asmfunction!(__relibc_internal_sigentry: ["
     mov fs:[{tcb_sa_off} + {sa_tmp_rsp}], rsp
     mov fs:[{tcb_sa_off} + {sa_tmp_rax}], rax
     mov fs:[{tcb_sa_off} + {sa_tmp_rdx}], rdx
+    mov fs:[{tcb_sa_off} + {sa_tmp_rdi}], rdi
+    mov fs:[{tcb_sa_off} + {sa_tmp_rsi}], rsi
 
     // First, select signal, always pick first available bit
 1:
@@ -188,7 +196,10 @@ asmfunction!(__relibc_internal_sigentry: ["
     and eax, edx
     bsf eax, eax
     jz 8f
+    lea rdi, [rip + {pctl} + {pctl_off_sender_infos}]
+    mov rdi, [rdi + rax * 8]
     lock btr [rip + {pctl} + {pctl_off_pending}], eax
+    mov fs:[{tcb_sa_off} + {sa_tmp_id_inf}], rdi
     jc 9f
 8:
     // Read second signal word - both process and thread simultaneously.
@@ -201,15 +212,24 @@ asmfunction!(__relibc_internal_sigentry: ["
     jz 7f
 
     bt edx, eax // check if signal was sent to thread specifically
-    jc 2f // then continue as usual
-
-    // otherwise, try clearing pending
-    lock btr [rip + {pctl} + {pctl_off_pending}], eax
-    jnc 1b
+    jc 2f // if so, continue as usual
+
+    // otherwise, try (competitively) dequeueing realtime signal
+    mov esi, eax
+    mov eax, {SYS_SIGDEQUEUE}
+    mov rdi, fs:[0]
+    add rdi, {tcb_sa_off} + {sa_tmp_rt_inf} // out pointer of dequeued realtime sig
+    syscall
+    test eax, eax
+    jnz 1b // assumes error can only be EAGAIN
+    lea eax, [esi + 32]
+    jmp 9f
 2:
     mov edx, eax
     shr edx, 5
+    mov rdi, fs:[{tcb_sc_off} + {sc_sender_infos} + eax * 8]
     lock btr fs:[{tcb_sc_off} + {sc_word} + edx * 4], eax
+    mov fs:[{tcb_sa_off} + {sa_tmp_id_inf}], rdi
     add eax, 64 // indicate signal was targeted at thread
 9:
     sub rsp, {REDZONE_SIZE}
@@ -220,9 +240,12 @@ asmfunction!(__relibc_internal_sigentry: ["
     // skip the sigaltstack logic.
     lea rdx, [rip + {pctl} + {pctl_off_actions}]
 
-    // LEA doesn't support x16, so just do two x8s.
-    lea rdx, [rdx + 8 * rax]
-    lea rdx, [rdx + 8 * rax]
+    mov ecx, eax
+    and ecx, 63
+
+    // LEA doesn't support 16x, so just do two x8s.
+    lea rdx, [rdx + 8 * rcx]
+    lea rdx, [rdx + 8 * rcx]
 
     bt qword ptr [rdx], {SA_ONSTACK_BIT}
     jnc 4f
@@ -247,8 +270,8 @@ asmfunction!(__relibc_internal_sigentry: ["
     push fs:[{tcb_sc_off} + {sc_saved_rip}]
     push fs:[{tcb_sc_off} + {sc_saved_rflags}]
 
-    push rdi
-    push rsi
+    push fs:[{tcb_sa_off} + {sa_tmp_rdi}]
+    push fs:[{tcb_sa_off} + {sa_tmp_rsi}]
     push fs:[{tcb_sa_off} + {sa_tmp_rdx}]
     push rcx
     push fs:[{tcb_sa_off} + {sa_tmp_rax}]
@@ -287,13 +310,13 @@ asmfunction!(__relibc_internal_sigentry: ["
     vextractf128 [rsp + 16], ymm14, 1
     vextractf128 [rsp], ymm15, 1
 5:
-    push rax // selected signal
-    sub rsp, 8
+    mov [rsp - 4], eax
+    sub rsp, 64 // alloc space for ucontext fields
 
     mov rdi, rsp
     call {inner}
 
-    add rsp, 16
+    add rsp, 64
 
     fxrstor64 [rsp + 16 * 16]
 
@@ -383,21 +406,28 @@ __relibc_internal_sigentry_crit_third:
     sa_tmp_rsp = const offset_of!(SigArea, tmp_rsp),
     sa_tmp_rax = const offset_of!(SigArea, tmp_rax),
     sa_tmp_rdx = const offset_of!(SigArea, tmp_rdx),
+    sa_tmp_rdi = const offset_of!(SigArea, tmp_rdi),
+    sa_tmp_rsi = const offset_of!(SigArea, tmp_rsi),
+    sa_tmp_rt_inf = const offset_of!(SigArea, tmp_rt_inf),
+    sa_tmp_id_inf = const offset_of!(SigArea, tmp_id_inf),
     sa_altstack_top = const offset_of!(SigArea, altstack_top),
     sa_altstack_bottom = const offset_of!(SigArea, altstack_bottom),
     sc_saved_rflags = const offset_of!(Sigcontrol, saved_archdep_reg),
     sc_saved_rip = const offset_of!(Sigcontrol, saved_ip),
     sc_word = const offset_of!(Sigcontrol, word),
+    sc_sender_infos = const offset_of!(Sigcontrol, sender_infos),
     sc_control = const offset_of!(Sigcontrol, control_flags),
     tcb_sa_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, arch),
     tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control),
     pctl_off_actions = const offset_of!(SigProcControl, actions),
     pctl_off_pending = const offset_of!(SigProcControl, pending),
+    pctl_off_sender_infos = const offset_of!(SigProcControl, sender_infos),
     pctl = sym PROC_CONTROL_STRUCT,
     supports_avx = sym SUPPORTS_AVX,
     REDZONE_SIZE = const 128,
     STACK_ALIGN = const 16,
     SA_ONSTACK_BIT = const 58, // (1 << 58) >> 32 = 0x0400_0000
+    SYS_SIGDEQUEUE = const syscall::SYS_SIGDEQUEUE,
 ]);
 
 extern "C" {
@@ -405,7 +435,8 @@ extern "C" {
     fn __relibc_internal_sigentry_crit_second();
     fn __relibc_internal_sigentry_crit_third();
 }
-pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) {
+/// Fixes some edge cases, and calculates the value for uc_stack.
+pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) -> PosixStackt {
     // It is impossible to update RSP and RIP atomically on x86_64, without using IRETQ, which is
     // almost as slow as calling a SIGRETURN syscall would be. Instead, we abuse the fact that
     // signals are disabled in the prologue of the signal trampoline, which allows us to emulate
@@ -419,18 +450,20 @@ pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) {
         let stack_ptr = stack.regs.rsp as *const usize;
         stack.regs.rsp = stack_ptr.read();
         stack.regs.rip = stack_ptr.sub(1).read();
-    } else if stack.regs.rip == __relibc_internal_sigentry_crit_second as usize {
+    } else if stack.regs.rip == __relibc_internal_sigentry_crit_second as usize
+        || stack.regs.rip == __relibc_internal_sigentry_crit_third as usize
+    {
         // Almost finished, just reexecute the jump before tmp_rip is overwritten by this
         // deeper-level signal.
         stack.regs.rip = area.tmp_rip;
-    } else if stack.regs.rip == __relibc_internal_sigentry_crit_third as usize {
-        stack.regs.rip = area.tmp_rip;
     }
+
+    get_sigaltstack(area, stack.regs.rsp).into()
 }
 
 pub(crate) static SUPPORTS_AVX: AtomicU8 = AtomicU8::new(0);
 
-// __relibc will be prepended to the name, so mangling is fine
+// __relibc will be prepended to the name, so no_mangle is fine
 #[no_mangle]
 pub unsafe fn manually_enter_trampoline() {
     let c = &Tcb::current().unwrap().os_specific.control;
diff --git a/redox-rt/src/lib.rs b/redox-rt/src/lib.rs
index 106fadc264738dc804a7cc1276f9b8d9408e8f9a..fa25e97a934d1b456e5117f599fb32a3206d5a3f 100644
--- a/redox-rt/src/lib.rs
+++ b/redox-rt/src/lib.rs
@@ -9,7 +9,7 @@
 )]
 #![forbid(unreachable_patterns)]
 
-use core::cell::UnsafeCell;
+use core::cell::{SyncUnsafeCell, UnsafeCell};
 
 use generic_rt::{ExpectTlsFree, GenericTcb};
 use syscall::{Sigcontrol, O_CLOEXEC};
@@ -120,13 +120,13 @@ pub unsafe fn tcb_activate(tcb: &RtTcb, tls_end_and_tcb_start: usize, _tls_len:
 }
 
 /// Initialize redox-rt in situations where relibc is not used
-pub fn initialize_freestanding() {
+pub unsafe fn initialize_freestanding() {
     // TODO: This code is a hack! Integrate the ld_so TCB code into generic-rt, and then use that
     // (this function will need pointers to the ELF structs normally passed in auxvs), so the TCB
     // is initialized properly.
 
     // TODO: TLS
-    let page = unsafe {
+    let page = {
         &mut *(syscall::fmap(
             !0,
             &syscall::Map {
@@ -155,4 +155,20 @@ pub fn initialize_freestanding() {
         let abi_ptr = core::ptr::addr_of_mut!(page.tcb_ptr);
         core::arch::asm!("msr tpidr_el0, {}", in(reg) abi_ptr);
     }
+    initialize();
+}
+pub unsafe fn initialize() {
+    THIS_PID
+        .get()
+        .write(Some(syscall::getpid().unwrap().try_into().unwrap()).unwrap());
+}
+
+static THIS_PID: SyncUnsafeCell<u32> = SyncUnsafeCell::new(0);
+
+unsafe fn child_hook_common(new_pid_fd: FdGuard) {
+    // TODO: Currently pidfd == threadfd, but this will not be the case later.
+    RtTcb::current().thr_fd.get().write(Some(new_pid_fd));
+    THIS_PID
+        .get()
+        .write(Some(syscall::getpid().unwrap().try_into().unwrap()).unwrap());
 }
diff --git a/redox-rt/src/signal.rs b/redox-rt/src/signal.rs
index 00bd3474c0cb4843da5ee7184dc54bd460c7f5a9..345232cad2052aacd502e0cb27816e393a415542 100644
--- a/redox-rt/src/signal.rs
+++ b/redox-rt/src/signal.rs
@@ -1,14 +1,17 @@
 use core::{
     cell::{Cell, UnsafeCell},
-    ffi::c_int,
-    sync::atomic::{AtomicUsize, Ordering},
+    ffi::{c_int, c_void},
+    mem::MaybeUninit,
+    ptr::NonNull,
+    sync::atomic::{AtomicU8, AtomicUsize, Ordering},
 };
 
 use syscall::{
-    data::AtomicU64, Error, RawAction, Result, SetSighandlerData, SigProcControl, Sigcontrol,
-    SigcontrolFlags, EINVAL, ENOMEM, EPERM, SIGABRT, SIGBUS, SIGCHLD, SIGCONT, SIGFPE, SIGILL,
-    SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG,
-    SIGWINCH, SIGXCPU, SIGXFSZ,
+    data::AtomicU64, Error, NonatomicUsize, RawAction, Result, RtSigInfo, SenderInfo,
+    SetSighandlerData, SigProcControl, Sigcontrol, SigcontrolFlags, TimeSpec, EAGAIN, EINTR,
+    EINVAL, ENOMEM, EPERM, SIGABRT, SIGBUS, SIGCHLD, SIGCONT, SIGFPE, SIGILL, SIGKILL, SIGQUIT,
+    SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGWINCH, SIGXCPU,
+    SIGXFSZ,
 };
 
 use crate::{arch::*, proc::FdGuard, sync::Mutex, RtTcb, Tcb};
@@ -22,39 +25,116 @@ pub fn sighandler_function() -> usize {
     __relibc_internal_sigentry as usize
 }
 
+/// ucontext_t representation
 #[repr(C)]
 pub struct SigStack {
     #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
-    _pad: [usize; 1], // pad to 16 bytes alignment
+    _pad: [usize; 1], // pad from 7*8 to 64
 
     #[cfg(target_arch = "x86")]
-    _pad: [usize; 3], // pad to 16 bytes alignment
+    _pad: [usize; 3], // pad from 9*4 to 12*4
 
-    sig_num: usize,
+    pub link: *mut SigStack,
+
+    pub old_stack: PosixStackt,
+    pub old_mask: u64,
+    pub(crate) sival: usize,
+    pub(crate) sig_code: u32,
+    pub(crate) sig_num: u32,
 
     // x86_64: 864 bytes
     // i686: 512 bytes
     // aarch64: 272 bytes (SIMD TODO)
     pub regs: ArchIntRegs,
 }
+#[repr(C)]
+pub struct PosixStackt {
+    pub sp: *mut (),
+    pub flags: i32,
+    pub size: usize,
+}
+
+pub const SS_ONSTACK: usize = 1;
+pub const SS_DISABLE: usize = 2;
+
+impl From<Sigaltstack> for PosixStackt {
+    fn from(value: Sigaltstack) -> Self {
+        match value {
+            Sigaltstack::Disabled => PosixStackt {
+                sp: core::ptr::null_mut(),
+                size: 0,
+                flags: SS_DISABLE.try_into().unwrap(),
+            },
+            Sigaltstack::Enabled {
+                onstack,
+                base,
+                size,
+            } => PosixStackt {
+                sp: base.cast(),
+                size,
+                flags: if onstack {
+                    SS_ONSTACK.try_into().unwrap()
+                } else {
+                    0
+                },
+            },
+        }
+    }
+}
+
+#[repr(C)]
+// TODO: This struct is for practical reasons locked to Linux's ABI, but avoid redefining
+// it here. Alternatively, check at compile time that the structs are equivalent.
+pub struct SiginfoAbi {
+    pub si_signo: i32,
+    pub si_errno: i32,
+    pub si_code: i32,
+    pub si_pid: i32,      // pid_t
+    pub si_uid: i32,      // uid_t
+    pub si_addr: *mut (), // *mut c_void
+    pub si_status: i32,
+    pub si_value: usize, // sigval
+}
 
 #[inline(always)]
 unsafe fn inner(stack: &mut SigStack) {
     let os = &Tcb::current().unwrap().os_specific;
+
+    let stack_ptr = NonNull::from(&mut *stack);
+    stack.link = core::mem::replace(&mut (*os.arch.get()).last_sigstack, Some(stack_ptr))
+        .map_or_else(core::ptr::null_mut, |x| x.as_ptr());
+
     let signals_were_disabled = (*os.arch.get()).disable_signals_depth > 0;
 
-    let _targeted_thread_not_process = stack.sig_num >= 64;
+    let targeted_thread_not_process = stack.sig_num >= 64;
     stack.sig_num %= 64;
 
     // asm counts from 0
     stack.sig_num += 1;
-    arch_pre(stack, &mut *os.arch.get());
+
+    let (sender_pid, sender_uid) = {
+        let area = &mut *os.arch.get();
+
+        // Undefined if the signal was not realtime
+        stack.sival = area.tmp_rt_inf.arg;
+
+        stack.old_stack = arch_pre(stack, area);
+
+        if (stack.sig_num - 1) / 32 == 1 && !targeted_thread_not_process {
+            stack.sig_code = area.tmp_rt_inf.code as u32;
+            (area.tmp_rt_inf.pid, area.tmp_rt_inf.uid)
+        } else {
+            stack.sig_code = 0; // TODO: SI_USER constant?
+                                // TODO: Handle SIGCHLD. Maybe that should always be queued though?
+            let inf = SenderInfo::from_raw(area.tmp_id_inf);
+            (inf.pid, inf.ruid)
+        }
+    };
 
     let sigaction = {
         let guard = SIGACTIONS_LOCK.lock();
-        let action = convert_old(&PROC_CONTROL_STRUCT.actions[stack.sig_num - 1]);
+        let action = convert_old(&PROC_CONTROL_STRUCT.actions[stack.sig_num as usize - 1]);
         if action.flags.contains(SigactionFlags::RESETHAND) {
-            // TODO: other things that must be set
             drop(guard);
             sigaction(
                 stack.sig_num as u8,
@@ -75,34 +155,21 @@ unsafe fn inner(stack: &mut SigStack) {
             panic!("ctl {:x?} signal {}", os.control, stack.sig_num)
         }
         SigactionKind::Default => {
-            syscall::exit(stack.sig_num);
+            syscall::exit(stack.sig_num as usize);
             unreachable!();
         }
         SigactionKind::Handled { handler } => handler,
     };
 
     // Set sigmask to sa_mask and unmark the signal as pending.
-    let prev_sigallow_lo = os.control.word[0].load(Ordering::Relaxed) >> 32;
-    let prev_sigallow_hi = os.control.word[1].load(Ordering::Relaxed) >> 32;
-    let prev_sigallow = prev_sigallow_lo | (prev_sigallow_hi << 32);
+    let prev_sigallow = get_allowset_raw(&os.control.word);
 
     let mut sigallow_inside = !sigaction.mask & prev_sigallow;
     if !sigaction.flags.contains(SigactionFlags::NODEFER) {
         sigallow_inside &= !sig_bit(stack.sig_num);
     }
-    let sigallow_inside_lo = sigallow_inside & 0xffff_ffff;
-    let sigallow_inside_hi = sigallow_inside >> 32;
 
-    //let _ = syscall::write(1, &alloc::format!("WORD0 {:x?}\n", os.control.word).as_bytes());
-    let prev_w0 = os.control.word[0].fetch_add(
-        (sigallow_inside_lo << 32).wrapping_sub(prev_sigallow_lo << 32),
-        Ordering::Relaxed,
-    );
-    let prev_w1 = os.control.word[1].fetch_add(
-        (sigallow_inside_hi << 32).wrapping_sub(prev_sigallow_hi << 32),
-        Ordering::Relaxed,
-    );
-    //let _ = syscall::write(1, &alloc::format!("WORD1 {:x?}\n", os.control.word).as_bytes());
+    let _pending_when_sa_mask = set_allowset_raw(&os.control.word, prev_sigallow, sigallow_inside);
 
     // TODO: If sa_mask caused signals to be unblocked, deliver one or all of those first?
 
@@ -114,21 +181,30 @@ unsafe fn inner(stack: &mut SigStack) {
     );
     core::sync::atomic::compiler_fence(Ordering::Acquire);
 
+    stack.old_mask = prev_sigallow;
+
     // Call handler, either sa_handler or sa_siginfo depending on flag.
     if sigaction.flags.contains(SigactionFlags::SIGINFO)
         && let Some(sigaction) = handler.sigaction
     {
-        //let _ = syscall::write(1, alloc::format!("SIGACTION {:p}\n", sigaction).as_bytes());
+        let info = SiginfoAbi {
+            si_signo: stack.sig_num as c_int,
+            si_addr: core::ptr::null_mut(),
+            si_code: stack.sig_code as i32,
+            si_errno: 0,
+            si_pid: sender_pid as i32,
+            si_status: 0,
+            si_uid: sender_uid as i32,
+            si_value: stack.sival,
+        };
         sigaction(
             stack.sig_num as c_int,
-            core::ptr::null_mut(),
-            core::ptr::null_mut(),
+            core::ptr::addr_of!(info).cast(),
+            stack as *mut SigStack as *mut (),
         );
     } else if let Some(handler) = handler.handler {
-        //let _ = syscall::write(1, alloc::format!("HANDLER {:p}\n", handler).as_bytes());
         handler(stack.sig_num as c_int);
     }
-    //let _ = syscall::write(1, alloc::format!("RETURNED HANDLER\n").as_bytes());
 
     // Disable signals while we modify the sigmask again
     control_flags.store(
@@ -138,25 +214,22 @@ unsafe fn inner(stack: &mut SigStack) {
     core::sync::atomic::compiler_fence(Ordering::Acquire);
 
     // Update allowset again.
-    //let _ = syscall::write(1, &alloc::format!("WORD2 {:x?}\n", os.control.word).as_bytes());
 
-    let prev_w0 = os.control.word[0].fetch_add(
-        (prev_sigallow_lo << 32).wrapping_sub(sigallow_inside_lo << 32),
-        Ordering::Relaxed,
-    );
-    let prev_w1 = os.control.word[1].fetch_add(
-        (prev_sigallow_hi << 32).wrapping_sub(sigallow_inside_hi << 32),
-        Ordering::Relaxed,
-    );
-    //let _ = syscall::write(1, &alloc::format!("WORD3 {:x?}\n", os.control.word).as_bytes());
+    let new_mask = stack.old_mask;
+    let old_mask = get_allowset_raw(&os.control.word);
+
+    let _pending_when_restored_mask = set_allowset_raw(&os.control.word, old_mask, new_mask);
 
     // TODO: If resetting the sigmask caused signals to be unblocked, then should they be delivered
     // here? And would it be possible to tail-call-optimize that?
 
-    //let _ = syscall::write(1, alloc::format!("will return to {:x?}\n", stack.regs.eip).as_bytes());
-
     (*os.arch.get()).last_sig_was_restart = shall_restart;
 
+    // TODO: Support setting uc_link to jump back to a different context?
+    (*os.arch.get()).last_sigstack = NonNull::new(stack.link);
+
+    // TODO: Support restoring uc_stack?
+
     // And re-enable them again
     if !signals_were_disabled {
         core::sync::atomic::compiler_fence(Ordering::Release);
@@ -177,64 +250,67 @@ pub(crate) unsafe extern "fastcall" fn inner_fastcall(stack: usize) {
 
 pub fn get_sigmask() -> Result<u64> {
     let mut mask = 0;
-    modify_sigmask(Some(&mut mask), Option::<fn(u32, bool) -> u32>::None)?;
+    modify_sigmask(Some(&mut mask), Option::<fn(u64) -> u64>::None)?;
     Ok(mask)
 }
 pub fn set_sigmask(new: Option<u64>, old: Option<&mut u64>) -> Result<()> {
-    modify_sigmask(
-        old,
-        new.map(move |newmask| move |_, upper| if upper { newmask >> 32 } else { newmask } as u32),
-    )
+    modify_sigmask(old, new.map(move |newmask| move |_| newmask))
 }
 pub fn or_sigmask(new: Option<u64>, old: Option<&mut u64>) -> Result<()> {
     // Parsing nightmare... :)
     modify_sigmask(
         old,
-        new.map(move |newmask| {
-            move |oldmask, upper| oldmask | if upper { newmask >> 32 } else { newmask } as u32
-        }),
+        new.map(move |newmask| move |oldmask| oldmask | newmask),
     )
 }
 pub fn andn_sigmask(new: Option<u64>, old: Option<&mut u64>) -> Result<()> {
     modify_sigmask(
         old,
-        new.map(move |newmask| {
-            move |oldmask, upper| oldmask & !if upper { newmask >> 32 } else { newmask } as u32
-        }),
+        new.map(move |newmask| move |oldmask| oldmask & !newmask),
     )
 }
-fn modify_sigmask(old: Option<&mut u64>, op: Option<impl FnMut(u32, bool) -> u32>) -> Result<()> {
+fn get_allowset_raw(words: &[AtomicU64; 2]) -> u64 {
+    (words[0].load(Ordering::Relaxed) >> 32) | ((words[1].load(Ordering::Relaxed) >> 32) << 32)
+}
+/// Sets mask from old to new, returning what was pending at the time.
+fn set_allowset_raw(words: &[AtomicU64; 2], old: u64, new: u64) -> u64 {
+    // This assumes *only this thread* can change the allowset. If this rule is broken, the use of
+    // fetch_add will corrupt the words entirely. fetch_add is very efficient on x86, being
+    // generated as LOCK XADD which is the fastest RMW instruction AFAIK.
+    let prev_w0 = words[0].fetch_add(
+        ((new & 0xffff_ffff) << 32).wrapping_sub((old & 0xffff_ffff) << 32),
+        Ordering::Relaxed,
+    ) & 0xffff_ffff;
+    let prev_w1 = words[1].fetch_add(
+        ((new >> 32) << 32).wrapping_sub((old >> 32) << 32),
+        Ordering::Relaxed,
+    ) & 0xffff_ffff;
+
+    prev_w0 | (prev_w1 << 32)
+}
+fn modify_sigmask(old: Option<&mut u64>, op: Option<impl FnOnce(u64) -> u64>) -> Result<()> {
     let _guard = tmp_disable_signals();
     let ctl = current_sigctl();
 
-    let words = ctl.word.each_ref().map(|w| w.load(Ordering::Relaxed));
+    let prev = get_allowset_raw(&ctl.word);
 
     if let Some(old) = old {
-        *old = !combine_allowset(words);
+        *old = !prev;
     }
-    let Some(mut op) = op else {
+    let Some(op) = op else {
         return Ok(());
     };
 
-    let mut can_raise = 0;
+    let next = !op(!prev);
 
-    for i in 0..2 {
-        let old_allow_bits = words[i] & 0xffff_ffff_0000_0000;
-        let new_allow_bits = u64::from(!op(!((old_allow_bits >> 32) as u32), i == 1)) << 32;
-
-        let old_word = ctl.word[i].fetch_add(
-            new_allow_bits.wrapping_sub(old_allow_bits),
-            Ordering::Relaxed,
-        );
-        can_raise |= ((old_word & 0xffff_ffff) & (new_allow_bits >> 32)) << (i * 32);
-    }
+    let pending = set_allowset_raw(&ctl.word, prev, next);
 
     // POSIX requires that at least one pending unblocked signal be delivered before
-    // pthread_sigmask returns, if there is one. Deliver the lowest-numbered one.
-    if can_raise != 0 {
-        let signal = can_raise.trailing_zeros() + 1;
-        // TODO
-        crate::sys::posix_kill_thread(**RtTcb::current().thread_fd(), signal);
+    // pthread_sigmask returns, if there is one.
+    if pending != 0 {
+        unsafe {
+            manually_enter_trampoline();
+        }
     }
 
     Ok(())
@@ -443,13 +519,14 @@ pub(crate) static PROC_CONTROL_STRUCT: SigProcControl = SigProcControl {
             user_data: AtomicU64::new(0),
         }
     }; 64],
+    sender_infos: [const { AtomicU64::new(0) }; 32],
 };
 
 fn combine_allowset([lo, hi]: [u64; 2]) -> u64 {
     (lo >> 32) | ((hi >> 32) << 32)
 }
 
-const fn sig_bit(sig: usize) -> u64 {
+const fn sig_bit(sig: u32) -> u64 {
     //assert_ne!(sig, 32);
     //assert_ne!(sig, 0);
     1 << (sig - 1)
@@ -527,22 +604,27 @@ pub enum Sigaltstack {
         size: usize,
     },
 }
-pub unsafe fn sigaltstack(
-    new: Option<&Sigaltstack>,
-    old_out: Option<&mut Sigaltstack>,
-) -> Result<()> {
-    let _g = tmp_disable_signals();
-    let tcb = &mut *Tcb::current().unwrap().os_specific.arch.get();
 
-    let old = if tcb.altstack_bottom == 0 && tcb.altstack_top == usize::MAX {
+pub(crate) fn get_sigaltstack(tcb: &SigArea, sp: usize) -> Sigaltstack {
+    if tcb.altstack_bottom == 0 && tcb.altstack_top == usize::MAX {
         Sigaltstack::Disabled
     } else {
         Sigaltstack::Enabled {
             base: tcb.altstack_bottom as *mut (),
             size: tcb.altstack_top - tcb.altstack_bottom,
-            onstack: (tcb.altstack_bottom..tcb.altstack_top).contains(&crate::arch::current_sp()),
+            onstack: (tcb.altstack_bottom..tcb.altstack_top).contains(&sp),
         }
-    };
+    }
+}
+
+pub unsafe fn sigaltstack(
+    new: Option<&Sigaltstack>,
+    old_out: Option<&mut Sigaltstack>,
+) -> Result<()> {
+    let _g = tmp_disable_signals();
+    let tcb = &mut *Tcb::current().unwrap().os_specific.arch.get();
+
+    let old = get_sigaltstack(tcb, crate::arch::current_sp());
 
     if matches!(old, Sigaltstack::Enabled { onstack: true, .. }) && new != Some(&old) {
         return Err(Error::new(EPERM));
@@ -575,13 +657,170 @@ pub unsafe fn sigaltstack(
     Ok(())
 }
 
-pub const MIN_SIGALTSTACK_SIZE: usize = 8192;
+pub const MIN_SIGALTSTACK_SIZE: usize = 2048;
 
-pub fn currently_pending() -> u64 {
+pub fn currently_pending_blocked() -> u64 {
     let control = &unsafe { Tcb::current().unwrap() }.os_specific.control;
     let w0 = control.word[0].load(Ordering::Relaxed);
     let w1 = control.word[1].load(Ordering::Relaxed);
-    let pending_blocked_lo = w0 & !(w0 >> 32);
-    let pending_unblocked_hi = w1 & !(w0 >> 32);
-    pending_blocked_lo | (pending_unblocked_hi << 32)
+    let allow = (w0 >> 32) | ((w1 >> 32) << 32);
+    let thread_pending = (w0 & 0xffff_ffff) | ((w1 >> 32) & 0xffff_ffff);
+    let proc_pending = PROC_CONTROL_STRUCT.pending.load(Ordering::Relaxed);
+
+    core::sync::atomic::fence(Ordering::Acquire); // TODO: Correct ordering?
+
+    (thread_pending | proc_pending) & !allow
+}
+pub enum Unreachable {}
+
+pub fn await_signal_async(inner_allowset: u64) -> Result<Unreachable> {
+    let _guard = tmp_disable_signals();
+    let control = &unsafe { Tcb::current().unwrap() }.os_specific.control;
+
+    let old_allowset = get_allowset_raw(&control.word);
+    set_allowset_raw(&control.word, old_allowset, inner_allowset);
+
+    let res = syscall::nanosleep(
+        &TimeSpec {
+            tv_sec: i64::MAX,
+            tv_nsec: 0,
+        },
+        &mut TimeSpec::default(),
+    );
+    set_allowset_raw(&control.word, inner_allowset, old_allowset);
+
+    if res == Err(Error::new(EINTR)) {
+        unsafe {
+            manually_enter_trampoline();
+        }
+    }
+
+    res?;
+    unreachable!()
+}
+// TODO: deadline-based API
+pub fn await_signal_sync(inner_allowset: u64, timeout: Option<&TimeSpec>) -> Result<SiginfoAbi> {
+    let _guard = tmp_disable_signals();
+    let control = &unsafe { Tcb::current().unwrap() }.os_specific.control;
+
+    let old_allowset = get_allowset_raw(&control.word);
+    let proc_pending = PROC_CONTROL_STRUCT.pending.load(Ordering::Acquire);
+    let thread_pending = set_allowset_raw(&control.word, old_allowset, inner_allowset);
+
+    // Check if there are already signals matching the requested set, before waiting.
+    if let Some(info) = try_claim_multiple(proc_pending, thread_pending, inner_allowset, control) {
+        // TODO: RAII
+        set_allowset_raw(&control.word, inner_allowset, old_allowset);
+        return Ok(info);
+    }
+
+    let res = match timeout {
+        Some(t) => syscall::nanosleep(&t, &mut TimeSpec::default()),
+        None => syscall::nanosleep(
+            &TimeSpec {
+                tv_sec: i64::MAX,
+                tv_nsec: 0,
+            },
+            &mut TimeSpec::default(),
+        ),
+    };
+
+    let thread_pending = set_allowset_raw(&control.word, inner_allowset, old_allowset);
+    let proc_pending = PROC_CONTROL_STRUCT.pending.load(Ordering::Acquire);
+
+    if let Err(error) = res
+        && error.errno != EINTR
+    {
+        return Err(error);
+    }
+
+    // Then check if there were any signals left after waiting.
+    try_claim_multiple(proc_pending, thread_pending, inner_allowset, control)
+        // Normally ETIMEDOUT but not for sigtimedwait.
+        .ok_or(Error::new(EAGAIN))
+}
+fn try_claim_multiple(
+    mut proc_pending: u64,
+    mut thread_pending: u64,
+    allowset: u64,
+    control: &Sigcontrol,
+) -> Option<SiginfoAbi> {
+    while (proc_pending | thread_pending) & allowset != 0 {
+        let sig_idx = ((proc_pending | thread_pending) & allowset).trailing_zeros();
+        if thread_pending & allowset & (1 << sig_idx) != 0
+            && let Some(res) = try_claim_single(sig_idx, Some(control))
+        {
+            return Some(res);
+        }
+        thread_pending &= !(1 << sig_idx);
+        if proc_pending & allowset & (1 << sig_idx) != 0
+            && let Some(res) = try_claim_single(sig_idx, None)
+        {
+            return Some(res);
+        }
+        proc_pending &= !(1 << sig_idx);
+    }
+    None
+}
+fn try_claim_single(sig_idx: u32, thread_control: Option<&Sigcontrol>) -> Option<SiginfoAbi> {
+    let sig_group = sig_idx / 32;
+
+    if sig_group == 1 && thread_control.is_none() {
+        // Queued (realtime) signal
+        let mut ret = MaybeUninit::<RtSigInfo>::uninit();
+        let rt_inf = unsafe {
+            syscall::syscall2(
+                syscall::SYS_SIGDEQUEUE,
+                ret.as_mut_ptr() as usize,
+                sig_idx as usize - 32,
+            )
+            .ok()?;
+            ret.assume_init()
+        };
+        Some(SiginfoAbi {
+            si_signo: sig_idx as i32 + 1,
+            si_errno: 0,
+            si_code: rt_inf.code,
+            si_pid: rt_inf.pid as i32,
+            si_uid: rt_inf.uid as i32,
+            si_status: 0,
+            si_value: rt_inf.arg,
+            si_addr: core::ptr::null_mut(),
+        })
+    } else {
+        // Idempotent (standard or thread realtime) signal
+        let info = SenderInfo::from_raw(match thread_control {
+            Some(ctl) => {
+                // Only this thread can clear pending bits, so this will always succeed.
+                let info = ctl.sender_infos[sig_idx as usize].load(Ordering::Acquire);
+                // TODO: Ordering
+                ctl.word[sig_group as usize].fetch_and(!(1 << (sig_idx % 32)), Ordering::Release);
+                info
+            }
+            None => {
+                let info =
+                    PROC_CONTROL_STRUCT.sender_infos[sig_idx as usize].load(Ordering::Acquire);
+                if PROC_CONTROL_STRUCT
+                    .pending
+                    .fetch_and(!(1 << sig_idx), Ordering::Release)
+                    & (1 << sig_idx)
+                    == 0
+                {
+                    // already claimed
+                    return None;
+                }
+                info
+            }
+        });
+        Some(SiginfoAbi {
+            si_signo: sig_idx as i32 + 1,
+            si_errno: 0,
+            si_code: 0, // TODO: SI_USER const?
+            si_pid: info.pid as i32,
+            si_uid: info.ruid as i32,
+            si_status: 0,
+            si_value: 0, // undefined
+            si_addr: core::ptr::null_mut(),
+        })
+    }
 }
diff --git a/redox-rt/src/sys.rs b/redox-rt/src/sys.rs
index 2e2db371234cd8bd82496c19d2dd24336f4a3333..45779f6b8995fe3082d776cdce295e6b8599d5cf 100644
--- a/redox-rt/src/sys.rs
+++ b/redox-rt/src/sys.rs
@@ -1,6 +1,8 @@
+use core::ptr::addr_of;
+
 use syscall::{
     error::{Error, Result, EINTR},
-    TimeSpec,
+    RtSigInfo, TimeSpec,
 };
 
 use crate::{arch::manually_enter_trampoline, proc::FdGuard, signal::tmp_disable_signals, Tcb};
@@ -44,6 +46,26 @@ pub fn posix_kill(pid: usize, sig: usize) -> Result<()> {
     }
 }
 #[inline]
+pub fn posix_sigqueue(pid: usize, sig: usize, arg: usize) -> Result<()> {
+    let siginf = RtSigInfo {
+        arg,
+        code: -1, // TODO: SI_QUEUE constant
+        uid: 0,   // TODO
+        pid: posix_getpid(),
+    };
+    match wrapper(false, || unsafe {
+        syscall::syscall3(syscall::SYS_SIGENQUEUE, pid, sig, addr_of!(siginf) as usize)
+    }) {
+        Ok(_) | Err(Error { errno: EINTR }) => Ok(()),
+        Err(error) => Err(error),
+    }
+}
+#[inline]
+pub fn posix_getpid() -> u32 {
+    // SAFETY: read-only except during program/fork child initialization
+    unsafe { crate::THIS_PID.get().read() }
+}
+#[inline]
 pub fn posix_killpg(pgrp: usize, sig: usize) -> Result<()> {
     match wrapper(false, || syscall::kill(usize::wrapping_neg(pgrp), sig)) {
         Ok(_) | Err(Error { errno: EINTR }) => Ok(()),
diff --git a/redox-rt/src/thread.rs b/redox-rt/src/thread.rs
index 664588ab7768fef65fe73a4d162afc83e5f1e747..1791732a226ce446f7133ea6d9d10e15b4ccc0e6 100644
--- a/redox-rt/src/thread.rs
+++ b/redox-rt/src/thread.rs
@@ -1,6 +1,8 @@
+use core::mem::size_of;
+
 use syscall::{Result, O_CLOEXEC};
 
-use crate::{arch::*, proc::*, RtTcb};
+use crate::{arch::*, proc::*, signal::tmp_disable_signals, RtTcb};
 
 /// Spawns a new context sharing the same address space as the current one (i.e. a new thread).
 pub unsafe fn rlct_clone_impl(stack: *mut usize) -> Result<FdGuard> {
@@ -48,10 +50,20 @@ pub unsafe fn rlct_clone_impl(stack: *mut usize) -> Result<FdGuard> {
     Ok(new_thr_fd)
 }
 
-pub fn exit_this_thread() -> ! {
-    let thread_fd = RtTcb::current().thread_fd();
+pub unsafe fn exit_this_thread(stack_base: *mut (), stack_size: usize) -> ! {
+    let _guard = tmp_disable_signals();
+
+    let tcb = RtTcb::current();
+    let thread_fd = tcb.thread_fd();
+
+    let _ = syscall::funmap(tcb as *const RtTcb as usize, syscall::PAGE_SIZE);
+
     // TODO: modify interface so it writes directly to the thread fd?
     let status_fd = syscall::dup(**thread_fd, b"status").unwrap();
-    syscall::write(status_fd, &usize::MAX.to_ne_bytes()).unwrap();
+    let mut buf = [0; size_of::<usize>() * 3];
+    plain::slice_from_mut_bytes(&mut buf)
+        .unwrap()
+        .copy_from_slice(&[usize::MAX, stack_base as usize, stack_size]);
+    syscall::write(status_fd, &buf).unwrap();
     unreachable!()
 }
diff --git a/src/header/signal/linux.rs b/src/header/signal/linux.rs
index fb23a0e99bfc3aa4c2e152fad270a4a852c2433d..217c1ff076125674945425168f63fbe642da5d03 100644
--- a/src/header/signal/linux.rs
+++ b/src/header/signal/linux.rs
@@ -76,3 +76,6 @@ pub const SS_DISABLE: usize = 2;
 // Those two should be updated from kernel headers
 pub const MINSIGSTKSZ: usize = 2048;
 pub const SIGSTKSZ: usize = 8096;
+
+pub const SI_QUEUE: i32 = -1;
+pub const SI_USER: i32 = 0;
diff --git a/src/header/signal/mod.rs b/src/header/signal/mod.rs
index ffde0d51662a555ebee2d695a67c352a074a385a..a8785a88072434fab37aedca4e405fb89c45e3fa 100644
--- a/src/header/signal/mod.rs
+++ b/src/header/signal/mod.rs
@@ -5,14 +5,15 @@ use core::{mem, ptr};
 use cbitset::BitSet;
 
 use crate::{
-    error::{self, Errno, ResultExt},
+    error::{Errno, ResultExt},
+    c_str::CStr,
     header::{errno, time::timespec},
     platform::{self, types::*, Pal, PalSignal, Sys},
 };
 
 pub use self::sys::*;
 
-use super::errno::EFAULT;
+use super::{errno::EFAULT, unistd};
 
 #[cfg(target_os = "linux")]
 #[path = "linux.rs"]
@@ -54,19 +55,30 @@ pub struct sigaltstack {
     pub ss_size: size_t,
 }
 
+// FIXME: This struct is wrong on Linux
 #[repr(C)]
-#[derive(Clone, Debug)]
+#[derive(Clone, Copy)]
 pub struct siginfo {
     pub si_signo: c_int,
     pub si_errno: c_int,
     pub si_code: c_int,
-    _padding: [c_int; 29],
-    _si_align: [usize; 0],
+    pub si_pid: pid_t,
+    pub si_uid: uid_t,
+    pub si_addr: *mut c_void,
+    pub si_status: c_int,
+    pub si_value: sigval,
 }
 
 #[no_mangle]
 pub extern "C" fn _cbindgen_export_siginfo(a: siginfo) {}
 
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub union sigval {
+    pub sival_int: c_int,
+    pub sival_ptr: *mut c_void,
+}
+
 /// cbindgen:ignore
 pub type sigset_t = c_ulonglong;
 /// cbindgen:ignore
@@ -78,6 +90,12 @@ pub type stack_t = sigaltstack;
 pub extern "C" fn kill(pid: pid_t, sig: c_int) -> c_int {
     Sys::kill(pid, sig)
 }
+#[no_mangle]
+pub extern "C" fn sigqueue(pid: pid_t, sig: c_int, val: sigval) -> c_int {
+    Sys::sigqueue(pid, sig, val)
+        .map(|()| 0)
+        .or_minus_one_errno()
+}
 
 #[no_mangle]
 pub extern "C" fn killpg(pgrp: pid_t, sig: c_int) -> c_int {
@@ -126,7 +144,9 @@ pub unsafe extern "C" fn sigaction(
 
 #[no_mangle]
 pub unsafe extern "C" fn sigaddset(set: *mut sigset_t, signo: c_int) -> c_int {
-    if signo <= 0 || signo as usize > NSIG {
+    if signo <= 0 || signo as usize > NSIG.max(SIGRTMAX)
+    /* TODO */
+    {
         platform::ERRNO.set(errno::EINVAL);
         return -1;
     }
@@ -146,7 +166,9 @@ pub unsafe extern "C" fn sigaltstack(ss: *const stack_t, old_ss: *mut stack_t) -
 
 #[no_mangle]
 pub unsafe extern "C" fn sigdelset(set: *mut sigset_t, signo: c_int) -> c_int {
-    if signo <= 0 || signo as usize > NSIG {
+    if signo <= 0 || signo as usize > NSIG.max(SIGRTMAX)
+    /* TODO */
+    {
         platform::ERRNO.set(errno::EINVAL);
         return -1;
     }
@@ -343,7 +365,7 @@ pub unsafe extern "C" fn sigset(
 
 #[no_mangle]
 pub unsafe extern "C" fn sigsuspend(sigmask: *const sigset_t) -> c_int {
-    Sys::sigsuspend(&*sigmask)
+    Err(Sys::sigsuspend(&*sigmask)).or_minus_one_errno()
 }
 
 #[no_mangle]
@@ -360,13 +382,22 @@ pub unsafe extern "C" fn sigwait(set: *const sigset_t, sig: *mut c_int) -> c_int
 #[no_mangle]
 pub unsafe extern "C" fn sigtimedwait(
     set: *const sigset_t,
-    sig: *mut siginfo_t,
+    // s/siginfo_t/siginfo due to https://github.com/mozilla/cbindgen/issues/621
+    sig: *mut siginfo,
+    // POSIX leaves behavior unspecified if this is NULL, but on both Linux and Redox, NULL is used
+    // to differentiate between sigtimedwait and sigwaitinfo internally
     tp: *const timespec,
 ) -> c_int {
-    Sys::sigtimedwait(set, sig, tp)
+    Sys::sigtimedwait(&*set, sig.as_mut(), tp.as_ref())
+        .map(|()| 0)
+        .or_minus_one_errno()
+}
+#[no_mangle]
+pub unsafe extern "C" fn sigwaitinfo(set: *const sigset_t, sig: *mut siginfo_t) -> c_int {
+    sigtimedwait(set, sig, core::ptr::null())
 }
 
-pub const _signal_strings: [&str; 32] = [
+pub(crate) const SIGNAL_STRINGS: [&str; 32] = [
     "Unknown signal\0",
     "Hangup\0",
     "Interrupt\0",
@@ -400,3 +431,55 @@ pub const _signal_strings: [&str; 32] = [
     "Power failure\0",
     "Bad system call\0",
 ];
+#[no_mangle]
+pub unsafe extern "C" fn psignal(sig: c_int, prefix: *const c_char) {
+    let c_description = usize::try_from(sig)
+        .ok()
+        .and_then(|idx| SIGNAL_STRINGS.get(idx))
+        .unwrap_or(&SIGNAL_STRINGS[0]);
+    let description = &c_description[..c_description.len() - 1];
+    let prefix = CStr::from_ptr(prefix).to_string_lossy();
+    // TODO: stack vec or print directly?
+    let string = alloc::format!("{prefix}:{description}\n");
+    // TODO: better internal libc API?
+    let _ = unistd::write(
+        unistd::STDERR_FILENO,
+        string.as_bytes().as_ptr().cast(),
+        string.as_bytes().len(),
+    );
+}
+#[no_mangle]
+pub unsafe extern "C" fn psiginfo(info: *const siginfo_t, prefix: *const c_char) {
+    let siginfo_t {
+        si_code,
+        si_signo,
+        si_pid,
+        si_uid,
+        si_errno,
+        si_addr,
+        si_status,
+        si_value,
+    } = &*info;
+    let sival_ptr = si_value.sival_ptr;
+    let prefix = CStr::from_ptr(prefix).to_string_lossy();
+    // TODO: stack vec or print directly?
+    let string = alloc::format!(
+        "{prefix}:siginfo_t {{
+    si_code: {si_code}
+    si_signo: {si_signo}
+    si_pid: {si_pid}
+    si_uid: {si_uid}
+    si_errno: {si_errno}
+    si_addr: {si_addr:p}
+    si_status: {si_status}
+    si_value: {sival_ptr:p}
+}}
+"
+    );
+    // TODO: better internal libc API?
+    let _ = unistd::write(
+        unistd::STDERR_FILENO,
+        string.as_bytes().as_ptr().cast(),
+        string.as_bytes().len(),
+    );
+}
diff --git a/src/header/signal/redox.rs b/src/header/signal/redox.rs
index 689b9da3bfb6fd68677944803b9771cacb04cca3..b0c7ed33fcdfda7e8c2916d758971c74e54301b9 100644
--- a/src/header/signal/redox.rs
+++ b/src/header/signal/redox.rs
@@ -1,5 +1,9 @@
 use core::arch::global_asm;
 
+use redox_rt::signal::SiginfoAbi;
+
+use super::{siginfo_t, sigset_t, stack_t};
+
 pub const SIGHUP: usize = 1;
 pub const SIGINT: usize = 2;
 pub const SIGQUIT: usize = 3;
@@ -48,6 +52,64 @@ pub const SA_NOCLDSTOP: usize = 0x4000_0000;
 pub const SS_ONSTACK: usize = 0x00000001;
 pub const SS_DISABLE: usize = 0x00000002;
 
-// TODO: It's just a guess based on Linux
+const _: () = {
+    if SS_ONSTACK != redox_rt::signal::SS_ONSTACK {
+        panic!();
+    }
+    if SS_DISABLE != redox_rt::signal::SS_DISABLE {
+        panic!();
+    }
+    if MINSIGSTKSZ != redox_rt::signal::MIN_SIGALTSTACK_SIZE {
+        panic!();
+    }
+};
+
+// should include both SigStack size, and some extra room for the libc handler
 pub const MINSIGSTKSZ: usize = 2048;
+
 pub const SIGSTKSZ: usize = 8096;
+
+pub const SI_QUEUE: i32 = -1;
+pub const SI_USER: i32 = 0;
+
+pub(crate) type ucontext_t = ucontext;
+pub(crate) type mcontext_t = mcontext;
+
+#[repr(C)]
+pub struct ucontext {
+    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
+    _pad: [usize; 1], // pad from 7*8 to 64
+
+    #[cfg(target_arch = "x86")]
+    _pad: [usize; 3], // pad from 9*4 to 12*4
+
+    pub uc_link: *mut ucontext_t,
+    pub uc_stack: stack_t,
+    pub uc_sigmask: sigset_t,
+    _sival: usize,
+    _sigcode: u32,
+    _signum: u32,
+    pub uc_mcontext: mcontext_t,
+}
+
+#[repr(C)]
+pub struct mcontext {
+    #[cfg(target_arch = "x86")]
+    _opaque: [u8; 512],
+    #[cfg(target_arch = "x86_64")]
+    _opaque: [u8; 864],
+    #[cfg(target_arch = "aarch64")]
+    _opaque: [u8; 272],
+}
+#[no_mangle]
+pub extern "C" fn __completely_unused_cbindgen_workaround_fn_ucontext_mcontext(
+    a: *const ucontext_t,
+    b: *const mcontext_t,
+) {
+}
+
+impl From<SiginfoAbi> for siginfo_t {
+    fn from(value: SiginfoAbi) -> Self {
+        unsafe { core::mem::transmute(value) }
+    }
+}
diff --git a/src/header/string/mod.rs b/src/header/string/mod.rs
index 0d16d6d2d127f6e4c501ec8d516ba0823572b8b0..9ddc6cb37abf2a3dc0e9c8984280ec3919b6f9e0 100644
--- a/src/header/string/mod.rs
+++ b/src/header/string/mod.rs
@@ -353,9 +353,9 @@ pub unsafe extern "C" fn strrchr(s: *const c_char, c: c_int) -> *mut c_char {
 
 #[no_mangle]
 pub unsafe extern "C" fn strsignal(sig: c_int) -> *mut c_char {
-    signal::_signal_strings
+    signal::SIGNAL_STRINGS
         .get(sig as usize)
-        .unwrap_or(&signal::_signal_strings[0]) // Unknown signal message
+        .unwrap_or(&signal::SIGNAL_STRINGS[0]) // Unknown signal message
         .as_ptr() as *mut c_char
 }
 
diff --git a/src/ld_so/start.rs b/src/ld_so/start.rs
index 6e2b45426fcb8e8da34689ceab5115c5afb58bcb..d0cf2c942fc439b36a0b58741240c57e6779c73d 100644
--- a/src/ld_so/start.rs
+++ b/src/ld_so/start.rs
@@ -141,6 +141,7 @@ fn resolve_path_name(
     }
     None
 }
+// TODO: Make unsafe
 #[no_mangle]
 pub extern "C" fn relibc_ld_so_start(sp: &'static mut Stack, ld_entry: usize) -> usize {
     // We get the arguments, the environment, and the auxilary vector
@@ -184,7 +185,9 @@ pub extern "C" fn relibc_ld_so_start(sp: &'static mut Stack, ld_entry: usize) ->
     }
 
     // TODO: Fix memory leak, although minimal.
-    crate::platform::init(auxv.clone());
+    unsafe {
+        crate::platform::init(auxv.clone());
+    }
 
     // Some variables that will be overridden by environment and auxiliary vectors
     let ld_library_path = envs.get("LD_LIBRARY_PATH").map(|s| s.to_owned());
diff --git a/src/platform/linux/mod.rs b/src/platform/linux/mod.rs
index 0646f1b78c434719313ee00336de7b091c3fb41f..b258b52da5e586d0a943cfa960592509e2ea7c46 100644
--- a/src/platform/linux/mod.rs
+++ b/src/platform/linux/mod.rs
@@ -157,7 +157,7 @@ impl Pal for Sys {
         }
         loop {}
     }
-    fn exit_thread() -> ! {
+    unsafe fn exit_thread(_stack_base: *mut (), _stack_size: usize) -> ! {
         // TODO
         Self::exit(0)
     }
diff --git a/src/platform/linux/signal.rs b/src/platform/linux/signal.rs
index d25a6139efc7597e7417b543c43b06f1b453d0c0..7a379e06af65d94d7ee6a88daa5c46a34273bb81 100644
--- a/src/platform/linux/signal.rs
+++ b/src/platform/linux/signal.rs
@@ -1,4 +1,5 @@
-use core::mem;
+use crate::header::signal::sigval;
+use core::{mem, ptr::addr_of};
 
 use super::{
     super::{types::*, PalSignal},
@@ -7,12 +8,54 @@ use super::{
 use crate::{
     error::Errno,
     header::{
-        signal::{sigaction, siginfo_t, sigset_t, stack_t, NSIG, SA_RESTORER},
+        signal::{sigaction, siginfo_t, sigset_t, stack_t, NSIG, SA_RESTORER, SI_QUEUE},
         sys_time::itimerval,
         time::timespec,
     },
 };
 
+// Mirrors the ucontext_t struct from the libc crate on Linux.
+#[repr(C)]
+pub struct ucontext_t {
+    pub uc_flags: c_ulong,
+    pub uc_link: *mut ucontext_t,
+    pub uc_stack: stack_t,
+    pub uc_mcontext: mcontext_t,
+    pub uc_sigmask: sigset_t,
+    __private: [u8; 512],
+}
+#[repr(C)]
+pub struct _libc_fpstate {
+    pub cwd: u16,
+    pub swd: u16,
+    pub ftw: u16,
+    pub fop: u16,
+    pub rip: u64,
+    pub rdp: u64,
+    pub mxcsr: u32,
+    pub mxcr_mask: u32,
+    pub _st: [_libc_fpxreg; 8],
+    pub _xmm: [_libc_xmmreg; 16],
+    __private: [u64; 12],
+}
+#[repr(C)]
+pub struct _libc_fpxreg {
+    pub significand: [u16; 4],
+    pub exponent: u16,
+    __private: [u16; 3],
+}
+
+#[repr(C)]
+pub struct _libc_xmmreg {
+    pub element: [u32; 4],
+}
+#[repr(C)]
+pub struct mcontext_t {
+    pub gregs: [i64; 23], // TODO: greg_t?
+    pub fpregs: *mut _libc_fpstate,
+    __private: [u64; 8],
+}
+
 impl PalSignal for Sys {
     unsafe fn getitimer(which: c_int, out: *mut itimerval) -> c_int {
         e(syscall!(GETITIMER, which, out)) as c_int
@@ -21,6 +64,19 @@ impl PalSignal for Sys {
     fn kill(pid: pid_t, sig: c_int) -> c_int {
         e(unsafe { syscall!(KILL, pid, sig) }) as c_int
     }
+    fn sigqueue(pid: pid_t, sig: c_int, val: sigval) -> Result<(), Errno> {
+        let info = siginfo_t {
+            si_addr: core::ptr::null_mut(),
+            si_code: SI_QUEUE,
+            si_errno: 0,
+            si_pid: 0, // TODO: GETPID?
+            si_signo: sig,
+            si_status: 0,
+            si_uid: 0, // TODO: GETUID?
+            si_value: val,
+        };
+        e_raw(unsafe { syscall!(RT_SIGQUEUEINFO, pid, sig, addr_of!(info)) }).map(|_| ())
+    }
 
     fn killpg(pgrp: pid_t, sig: c_int) -> c_int {
         e(unsafe { syscall!(KILL, -(pgrp as isize) as pid_t, sig) }) as c_int
@@ -99,15 +155,27 @@ impl PalSignal for Sys {
         .map(|_| ())
     }
 
-    fn sigsuspend(set: &sigset_t) -> c_int {
-        unsafe { e(syscall!(RT_SIGSUSPEND, set as *const sigset_t, NSIG / 8)) as c_int }
+    fn sigsuspend(mask: &sigset_t) -> Errno {
+        unsafe {
+            e_raw(syscall!(RT_SIGSUSPEND, mask as *const sigset_t, NSIG / 8))
+                .expect_err("must fail")
+        }
     }
 
-    unsafe fn sigtimedwait(
-        set: *const sigset_t,
-        sig: *mut siginfo_t,
-        tp: *const timespec,
-    ) -> c_int {
-        e(syscall!(RT_SIGTIMEDWAIT, set, sig, tp, NSIG / 8)) as c_int
+    fn sigtimedwait(
+        set: &sigset_t,
+        sig: Option<&mut siginfo_t>,
+        tp: Option<&timespec>,
+    ) -> Result<(), Errno> {
+        unsafe {
+            e_raw(syscall!(
+                RT_SIGTIMEDWAIT,
+                set as *const _,
+                sig.map_or_else(core::ptr::null_mut, |s| s as *mut _),
+                tp.map_or_else(core::ptr::null, |t| t as *const _),
+                NSIG / 8
+            ))
+            .map(|_| ())
+        }
     }
 }
diff --git a/src/platform/mod.rs b/src/platform/mod.rs
index 2b4071dd84dad5af1c241849e24428aa87aa9aae..9fd855a7da06f8df89b81748388f835567f8745b 100644
--- a/src/platform/mod.rs
+++ b/src/platform/mod.rs
@@ -283,15 +283,17 @@ pub fn get_auxv(auxvs: &[[usize; 2]], key: usize) -> Option<usize> {
 
 #[cold]
 #[cfg(target_os = "redox")]
-pub fn init(auxvs: Box<[[usize; 2]]>) {
+// SAFETY: Must only be called when only one thread exists.
+pub unsafe fn init(auxvs: Box<[[usize; 2]]>) {
+    redox_rt::initialize();
+
     use self::auxv_defs::*;
 
     if let (Some(cwd_ptr), Some(cwd_len)) = (
         get_auxv(&auxvs, AT_REDOX_INITIAL_CWD_PTR),
         get_auxv(&auxvs, AT_REDOX_INITIAL_CWD_LEN),
     ) {
-        let cwd_bytes: &'static [u8] =
-            unsafe { core::slice::from_raw_parts(cwd_ptr as *const u8, cwd_len) };
+        let cwd_bytes: &'static [u8] = core::slice::from_raw_parts(cwd_ptr as *const u8, cwd_len);
         if let Ok(cwd) = core::str::from_utf8(cwd_bytes) {
             self::sys::path::set_cwd_manual(cwd.into());
         }
diff --git a/src/platform/pal/mod.rs b/src/platform/pal/mod.rs
index 3a234dfec37848f242580f44584bd386fc56cd08..09dea6cba96fb5e7e78b5f7a52803b14e32b329b 100644
--- a/src/platform/pal/mod.rs
+++ b/src/platform/pal/mod.rs
@@ -55,7 +55,7 @@ pub trait Pal {
 
     fn exit(status: c_int) -> !;
 
-    fn exit_thread() -> !;
+    unsafe fn exit_thread(stack_base: *mut (), stack_size: usize) -> !;
 
     fn fchdir(fildes: c_int) -> c_int;
 
diff --git a/src/platform/pal/signal.rs b/src/platform/pal/signal.rs
index e661eef4b7b7e69c11e71008a3153788e5a5c3b6..7a01780ac15301d57028803ce22ace6c870f2714 100644
--- a/src/platform/pal/signal.rs
+++ b/src/platform/pal/signal.rs
@@ -2,7 +2,7 @@ use super::super::{types::*, Pal};
 use crate::{
     error::Errno,
     header::{
-        signal::{sigaction, siginfo_t, sigset_t, stack_t},
+        signal::{sigaction, siginfo_t, sigset_t, sigval, stack_t},
         sys_time::itimerval,
         time::timespec,
     },
@@ -13,6 +13,8 @@ pub trait PalSignal: Pal {
 
     fn kill(pid: pid_t, sig: c_int) -> c_int;
 
+    fn sigqueue(pid: pid_t, sig: c_int, val: sigval) -> Result<(), Errno>;
+
     fn killpg(pgrp: pid_t, sig: c_int) -> c_int;
 
     fn raise(sig: c_int) -> Result<(), Errno>;
@@ -35,8 +37,11 @@ pub trait PalSignal: Pal {
         oset: Option<&mut sigset_t>,
     ) -> Result<(), Errno>;
 
-    fn sigsuspend(set: &sigset_t) -> c_int;
+    fn sigsuspend(mask: &sigset_t) -> Errno; // always fails
 
-    unsafe fn sigtimedwait(set: *const sigset_t, sig: *mut siginfo_t, tp: *const timespec)
-        -> c_int;
+    fn sigtimedwait(
+        set: &sigset_t,
+        sig: Option<&mut siginfo_t>,
+        tp: Option<&timespec>,
+    ) -> Result<(), Errno>;
 }
diff --git a/src/platform/redox/mod.rs b/src/platform/redox/mod.rs
index 95c7b89847d340da8413ac7fe3fe11001aed79e7..57482b4d9bc472a9ce4b0a34e3634ea9d912a57e 100644
--- a/src/platform/redox/mod.rs
+++ b/src/platform/redox/mod.rs
@@ -470,7 +470,7 @@ impl Pal for Sys {
     }
 
     fn getpid() -> pid_t {
-        e(syscall::getpid()) as pid_t
+        redox_rt::sys::posix_getpid() as pid_t
     }
 
     fn getppid() -> pid_t {
@@ -1137,7 +1137,7 @@ impl Pal for Sys {
         (unsafe { syscall::syscall5(syscall::number::SYS_GETPID, !0, !0, !0, !0, !0) }).is_ok()
     }
 
-    fn exit_thread() -> ! {
-        redox_rt::thread::exit_this_thread()
+    unsafe fn exit_thread(stack_base: *mut (), stack_size: usize) -> ! {
+        redox_rt::thread::exit_this_thread(stack_base, stack_size)
     }
 }
diff --git a/src/platform/redox/signal.rs b/src/platform/redox/signal.rs
index f6dfc9747b066913acbab36db5a3768be67224d3..0ae511bbec63407e17a7f85a5f4bc7e657955cac 100644
--- a/src/platform/redox/signal.rs
+++ b/src/platform/redox/signal.rs
@@ -1,5 +1,7 @@
-use core::mem;
-use redox_rt::signal::{Sigaction, SigactionFlags, SigactionKind, Sigaltstack, SignalHandler};
+use core::mem::{self, offset_of};
+use redox_rt::signal::{
+    PosixStackt, SigStack, Sigaction, SigactionFlags, SigactionKind, Sigaltstack, SignalHandler,
+};
 use syscall::{self, Result};
 
 use super::{
@@ -11,8 +13,8 @@ use crate::{
     header::{
         errno::{EINVAL, ENOSYS},
         signal::{
-            sigaction, siginfo_t, sigset_t, stack_t, SA_SIGINFO, SIG_BLOCK, SIG_DFL, SIG_IGN,
-            SIG_SETMASK, SIG_UNBLOCK, SS_DISABLE, SS_ONSTACK,
+            sigaction, siginfo_t, sigset_t, sigval, stack_t, ucontext_t, SA_SIGINFO, SIG_BLOCK,
+            SIG_DFL, SIG_IGN, SIG_SETMASK, SIG_UNBLOCK, SS_DISABLE, SS_ONSTACK,
         },
         sys_time::{itimerval, ITIMER_REAL},
         time::timespec,
@@ -20,6 +22,28 @@ use crate::{
     platform::ERRNO,
 };
 
+const _: () = {
+    #[track_caller]
+    const fn assert_eq(a: usize, b: usize) {
+        if a != b {
+            panic!("compile-time struct verification failed");
+        }
+    }
+    assert_eq(offset_of!(ucontext_t, uc_link), offset_of!(SigStack, link));
+    assert_eq(
+        offset_of!(ucontext_t, uc_stack),
+        offset_of!(SigStack, old_stack),
+    );
+    assert_eq(
+        offset_of!(ucontext_t, uc_sigmask),
+        offset_of!(SigStack, old_mask),
+    );
+    assert_eq(
+        offset_of!(ucontext_t, uc_mcontext),
+        offset_of!(SigStack, regs),
+    );
+};
+
 impl PalSignal for Sys {
     unsafe fn getitimer(which: c_int, out: *mut itimerval) -> c_int {
         let path = match which {
@@ -55,6 +79,13 @@ impl PalSignal for Sys {
     fn kill(pid: pid_t, sig: c_int) -> c_int {
         e(redox_rt::sys::posix_kill(pid as usize, sig as usize).map(|()| 0)) as c_int
     }
+    fn sigqueue(pid: pid_t, sig: c_int, val: sigval) -> Result<(), Errno> {
+        Ok(redox_rt::sys::posix_sigqueue(
+            pid as usize,
+            sig as usize,
+            unsafe { val.sival_ptr } as usize,
+        )?)
+    }
 
     fn killpg(pgrp: pid_t, sig: c_int) -> c_int {
         e(redox_rt::sys::posix_killpg(pgrp as usize, sig as usize).map(|()| 0)) as c_int
@@ -204,28 +235,18 @@ impl PalSignal for Sys {
         redox_rt::signal::sigaltstack(new.as_ref(), old.as_mut())?;
 
         if let (Some(old_c_stack), Some(old)) = (old_c, old) {
-            *old_c_stack = match old {
-                Sigaltstack::Disabled => stack_t {
-                    ss_sp: core::ptr::null_mut(),
-                    ss_size: 0,
-                    ss_flags: SS_DISABLE.try_into().unwrap(),
-                },
-                Sigaltstack::Enabled {
-                    onstack,
-                    base,
-                    size,
-                } => stack_t {
-                    ss_sp: base.cast(),
-                    ss_size: size,
-                    ss_flags: SS_ONSTACK.try_into().unwrap(),
-                },
+            let c_stack = PosixStackt::from(old);
+            *old_c_stack = stack_t {
+                ss_sp: c_stack.sp.cast(),
+                ss_size: c_stack.size,
+                ss_flags: c_stack.flags,
             };
         }
         Ok(())
     }
 
     fn sigpending(set: &mut sigset_t) -> Result<(), Errno> {
-        *set = redox_rt::signal::currently_pending();
+        *set = redox_rt::signal::currently_pending_blocked();
         Ok(())
     }
 
@@ -243,28 +264,27 @@ impl PalSignal for Sys {
         })
     }
 
-    fn sigsuspend(set: &sigset_t) -> c_int {
-        //TODO: correct implementation
-        let mut oset = sigset_t::default();
-        if let Err(err) = redox_rt::signal::set_sigmask(Some(*set), Some(&mut oset)) {
-            Errno::from(err).sync();
-            return -1;
-        }
-        //TODO: wait for signal
-        Self::sched_yield();
-        if let Err(err) = redox_rt::signal::set_sigmask(Some(oset), None) {
-            Errno::from(err).sync();
-            return -1;
+    fn sigsuspend(mask: &sigset_t) -> Errno {
+        match redox_rt::signal::await_signal_async(!*mask) {
+            Ok(_) => unreachable!(),
+            Err(err) => err.into(),
         }
-        0
     }
 
-    unsafe fn sigtimedwait(
-        set: *const sigset_t,
-        sig: *mut siginfo_t,
-        tp: *const timespec,
-    ) -> c_int {
-        ERRNO.set(ENOSYS);
-        -1
+    fn sigtimedwait(
+        set: &sigset_t,
+        info_out: Option<&mut siginfo_t>,
+        timeout: Option<&timespec>,
+    ) -> Result<(), Errno> {
+        // TODO: deadline-based API
+        let timeout = timeout.map(|timeout| syscall::TimeSpec {
+            tv_sec: timeout.tv_sec,
+            tv_nsec: timeout.tv_nsec as _,
+        });
+        let info = redox_rt::signal::await_signal_sync(*set, timeout.as_ref())?.into();
+        if let Some(out) = info_out {
+            *out = info;
+        }
+        Ok(())
     }
 }
diff --git a/src/pthread/mod.rs b/src/pthread/mod.rs
index 2d2fc7b20fbb6c4ac0776384a7ecdce997bbd127..d7b97fe1defd3ffa1ca20a07357098484a753b7d 100644
--- a/src/pthread/mod.rs
+++ b/src/pthread/mod.rs
@@ -2,7 +2,7 @@
 
 use core::{
     cell::{Cell, UnsafeCell},
-    mem::MaybeUninit,
+    mem::{offset_of, MaybeUninit},
     ptr::{addr_of, NonNull},
     sync::atomic::{AtomicBool, AtomicUsize, Ordering},
 };
@@ -22,8 +22,6 @@ use crate::{
 
 use crate::sync::{waitval::Waitval, Mutex};
 
-const MAIN_PTHREAD_ID: usize = 1;
-
 /// Called only by the main thread, as part of relibc_start.
 pub unsafe fn init() {
     Tcb::current()
@@ -269,7 +267,7 @@ pub unsafe fn join(thread: &Pthread) -> Result<Retval, Errno> {
 pub unsafe fn detach(thread: &Pthread) -> Result<(), Errno> {
     thread
         .flags
-        .fetch_or(PthreadFlags::DETACHED.bits(), Ordering::Release);
+        .fetch_or(PthreadFlags::DETACHED.bits(), Ordering::AcqRel);
     Ok(())
 }
 
@@ -294,6 +292,8 @@ pub unsafe fn exit_current_thread(retval: Retval) -> ! {
     header::tls::run_all_destructors();
 
     let this = current_thread().expect("failed to obtain current thread when exiting");
+    let stack_base = this.stack_base;
+    let stack_size = this.stack_size;
 
     if this.flags.load(Ordering::Acquire) & PthreadFlags::DETACHED.bits() != 0 {
         // When detached, the thread state no longer makes any sense, and can immediately be
@@ -304,12 +304,12 @@ pub unsafe fn exit_current_thread(retval: Retval) -> ! {
         this.waitval.post(retval);
     }
 
-    Sys::exit_thread()
+    Sys::exit_thread(stack_base.cast(), stack_size)
 }
 
 unsafe fn dealloc_thread(thread: &Pthread) {
+    // TODO: How should this be handled on Linux?
     OS_TID_TO_PTHREAD.lock().remove(&thread.os_tid.get().read());
-    //drop(Box::from_raw(thread as *const Pthread as *mut Pthread));
 }
 pub const SIGRT_RLCT_CANCEL: usize = 33;
 pub const SIGRT_RLCT_TIMER: usize = 34;
diff --git a/tests/Makefile b/tests/Makefile
index 774a03e86aed7f0df60f94ebd01e85c9ac867924..73bb54dc20cd46b48e48957e34f274b166875a8a 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -159,10 +159,12 @@ NAMES=\
 	$(EXPECT_NAMES) \
 	dirent/main \
 	pty/forkpty \
+	psignal \
 	pwd \
 	sa_restart \
 	sigchld \
 	stdio/ctermid \
+	sigqueue \
 	stdio/tempnam \
 	stdio/tmpnam \
 	stdlib/bsearch \
diff --git a/tests/psignal.c b/tests/psignal.c
new file mode 100644
index 0000000000000000000000000000000000000000..375d1b0552084dc1f81c88a74bd60b06111d3800
--- /dev/null
+++ b/tests/psignal.c
@@ -0,0 +1,22 @@
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "test_helpers.h"
+
+int main(void) {
+    puts("------psignal------");
+    psignal(SIGUSR1, "a prefix");
+    puts("------  end  ------");
+    puts("------psiginfo-----");
+    siginfo_t info = { 0 };
+    info.si_code = SI_USER;
+    info.si_pid = 42;
+    info.si_uid = 1337;
+    info.si_addr = (void *)0xdeadbeef;
+    info.si_value.sival_ptr = (void *)0xfedface;
+    psiginfo(&info, "another prefix");
+    puts("------        -----");
+}
diff --git a/tests/sigaction.c b/tests/sigaction.c
index 245ad3b7ccc0080070c51eaa10012bdddbc3766c..b332f3902d1e7ea3f57a11490e2919e35425f854 100644
--- a/tests/sigaction.c
+++ b/tests/sigaction.c
@@ -1,4 +1,6 @@
+#include <assert.h>
 #include <signal.h>
+#include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -7,23 +9,46 @@
 #include "test_helpers.h"
 
 void handler1(int sig) {
-    ERROR_IF(handler, sig, != SIGUSR1);
-    puts("Signal handler1 called!");
+    assert(sig == SIGUSR1);
+    char *str = "Signal handler1 called!\n";
+    write(STDOUT_FILENO, str, strlen(str));
 }
 
-void handler2(int sig) {
-    ERROR_IF(handler, sig, != SIGUSR1);
-    puts("Signal handler2 called!");
+sigset_t the_set = { 0 };
+
+void handler2(int sig, siginfo_t *info, void *context_raw) {
+    assert(sig == SIGUSR1);
+    char *str = "Signal handler2 called!\n";
+    write(STDOUT_FILENO, str, strlen(str));
+
+    assert(info != NULL);
+    assert(info->si_signo == SIGUSR1);
+#ifndef __linux
+    // TODO: SI_TKILL?
+    assert(info->si_code == SI_USER);
+    assert(info->si_pid == getpid());
+    assert(info->si_uid == getuid());
+#endif
+
+    ucontext_t *context = context_raw;
+    assert(context != NULL);
+#ifndef __linux__ // TODO
+    assert(memcmp(&context->uc_sigmask, &the_set, sizeof(sigset_t)));
+    assert(context->uc_link == NULL);
+#endif
 }
 
 int main(void) {
 	struct sigaction sa1 = { .sa_handler = handler1 };
-    struct sigaction sa2 = { .sa_handler = handler2 };
+    struct sigaction sa2 = { .sa_sigaction = handler2, .sa_flags = SA_SIGINFO };
     struct sigaction saold = {0};
 
 	sigemptyset(&sa1.sa_mask);
     sigemptyset(&sa2.sa_mask);
 
+    int status = sigprocmask(SIG_SETMASK, NULL, &the_set);
+    ERROR_IF(sigprocmask, status, == -1);
+
     int rcode = sigaction(SIGUSR1, &sa1, NULL);
     ERROR_IF(signal, rcode, != 0);
 
diff --git a/tests/sigqueue.c b/tests/sigqueue.c
new file mode 100644
index 0000000000000000000000000000000000000000..d4afe0a8628de35a077ecaf6d7b686e655d51661
--- /dev/null
+++ b/tests/sigqueue.c
@@ -0,0 +1,120 @@
+#include <assert.h>
+#include <signal.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+
+#include "test_helpers.h"
+
+#define THE_SIG SIGRTMIN
+
+volatile sig_atomic_t num = 0;
+
+int parent;
+
+void validate(int sig, const siginfo_t *info) {
+  assert(sig == THE_SIG);
+  assert(info != NULL);
+  assert(info->si_signo == THE_SIG);
+  assert(info->si_value.sival_int == num);
+  assert(info->si_code == SI_QUEUE);
+  assert(info->si_pid == parent);
+}
+
+void action(int sig, siginfo_t *info, void *context) {
+  (void)context;
+  assert(context != NULL);
+  validate(sig, info);
+  write(1, "action\n", 7);
+  num++;
+}
+
+int main(void) {
+  int status, fds[2];
+
+  status = pipe(fds);
+  ERROR_IF(pipe, status, == -1);
+
+  parent = getpid();
+  assert(parent != 0);
+
+  sigset_t set, mask;
+  status = sigfillset(&mask);
+  ERROR_IF(sigfillset, status, == -1);
+  status = sigdelset(&mask, SIGSEGV);
+  ERROR_IF(sigdelset, status, == -1);
+  status = sigdelset(&mask, SIGBUS);
+  ERROR_IF(sigdelset, status, == -1);
+  status = sigdelset(&mask, SIGILL);
+  ERROR_IF(sigdelset, status, == -1);
+  status = sigdelset(&mask, SIGFPE);
+  ERROR_IF(sigdelset, status, == -1);
+  status = sigdelset(&mask, SIGINT);
+  ERROR_IF(sigdelset, status, == -1);
+  status = sigprocmask(SIG_SETMASK, &mask, NULL);
+  ERROR_IF(sigprocmask, status, == -1);
+
+  status = sigemptyset(&set);
+  ERROR_IF(sigemptyset, status, == -1);
+  status = sigaddset(&set, THE_SIG);
+  ERROR_IF(sigaddset, status, == -1);
+
+  sigset_t empty_set;
+  status = sigemptyset(&empty_set);
+  ERROR_IF(sigemptyset, status, == -1);
+
+  int child = fork();
+  ERROR_IF(fork, child, == -1);
+
+  status = close(fds[child == 0 ? 0 : 1]);
+  ERROR_IF(close, status, == -1);
+
+  struct sigaction sa;
+  memcpy(&sa.sa_mask, &set, sizeof (sigset_t));
+  sa.sa_flags = SA_SIGINFO;
+  sa.sa_sigaction = action;
+
+  status = sigaction(THE_SIG, &sa, NULL);
+  ERROR_IF(sigaction, status, == -1);
+
+  if (child == 0) {
+    assert(num == 0);
+    siginfo_t info;
+    struct timespec t = (struct timespec){ .tv_sec = 1, .tv_nsec = 0 };
+    status = sigtimedwait(&set, &info, &t);
+    ERROR_IF(sigtimedwait, status, == -1);
+    validate(THE_SIG, &info);
+    assert(num == 0); // ensure no signal handler ran
+
+    num++;
+
+    // TODO: check status
+    status = sigsuspend(&empty_set);
+    if (status == -1) {
+        perror("error in sigsuspend");
+        puts("[EINTR] is usually expected");
+    }
+
+    assert(num == 2); // ensure signal handler ran
+
+    status = sigprocmask(SIG_SETMASK, &empty_set, NULL);
+    ERROR_IF(sigprocmask, status, == -1);
+
+    while (num < 31) {}
+
+    status = write(fds[1], "A", 1);
+    ERROR_IF(write, status, == -1);
+  } else {
+    for (int n = 0; n <= 31; n++) {
+      status = sigqueue(child, THE_SIG, (union sigval){ .sival_int = n });
+      ERROR_IF(sigqueue, status, == -1);
+    }
+    char buf[1];
+    status = read(fds[0], buf, 1);
+    ERROR_IF(read, status, == -1);
+  }
+
+  return 0;
+}