diff --git a/redox-rt/src/arch/x86_64.rs b/redox-rt/src/arch/x86_64.rs
index 2d9fb84be4cd2bbeb0ae2d2352427ba0663fde82..cba03eb0f309d847ec5f97cf4bc16a5b46824bca 100644
--- a/redox-rt/src/arch/x86_64.rs
+++ b/redox-rt/src/arch/x86_64.rs
@@ -1,3 +1,7 @@
+use core::mem::offset_of;
+use core::sync::atomic::AtomicU8;
+
+use syscall::data::Sigcontrol;
 use syscall::error::*;
 
 use crate::proc::{fork_inner, FdGuard};
@@ -7,6 +11,14 @@ use crate::signal::inner_c;
 pub(crate) const STACK_TOP: usize = 1 << 47;
 pub(crate) const STACK_SIZE: usize = 1024 * 1024;
 
+// NOTE: MUST MATCH TCB STRUCT
+pub struct SigArea {
+    altstack_top: usize,
+    altstack_bottom: usize,
+    tmp: usize,
+    _rsvd: usize,
+}
+
 /// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS
 /// is already initialized as if it was a thread.
 pub unsafe fn deactivate_tcb(open_via_dup: usize) -> Result<()> {
@@ -90,30 +102,84 @@ asmfunction!(__relibc_internal_fork_ret: ["
     pop rbp
     ret
 "] <= []);
-// TODO: is the memset necessary?
-asmfunction!(__relibc_internal_sigentry_fxsave: ["
-    sub rsp, 4096
+asmfunction!(__relibc_internal_rlct_clone_ret -> usize: ["
+    # Load registers
+    pop rax
+    pop rdi
+    pop rsi
+    pop rdx
+    pop rcx
+    pop r8
+    pop r9
 
-    fxsave64 [rsp]
+    sub rsp, 8
 
-    mov rdi, rsp
-    call {inner}
+    mov DWORD PTR [rsp], 0x00001F80
+    ldmxcsr [rsp]
+    mov WORD PTR [rsp], 0x037F
+    fldcw [rsp]
 
-    fxrstor64 [rsp]
-    add rsp, 4096
+    add rsp, 8
+
+    # Call entry point
+    call rax
+
+    ret
+"] <= []);
 
-    //mov eax, {{SYS_SIGRETURN}}
-    syscall
-"] <= [inner = sym inner_c]);
-asmfunction!(__relibc_internal_sigentry_xsave: ["
-    sub rsp, 4096
+asmfunction!(__relibc_internal_sigentry: ["
+    // Get offset to TCB
+    mov rax, gs:[0]
+
+    // If current RSP is above altstack region, switch to altstack
+    mov rdx, [rax + {tcb_sa_off} + {sa_altstack_top}]
+    cmp rdx, rsp
+    cmova rsp, rdx
+
+    // If current RSP is below altstack region, also switch to altstack
+    mov rdx, [rax + {tcb_sa_off} + {sa_altstack_bottom}]
+    cmp rdx, rax
+    cmovbe rsp, rdx
+
+    // Otherwise, the altstack is already active. The sigaltstack being disabled, is equivalent
+    // to setting 'top' to usize::MAX and 'bottom' to 0.
+    //
+    // Now that we have a stack, we can finally start initializing the signal stack!
+
+    push ss
+    push [rax + {tcb_sc_off} + {sc_saved_rsp}]
+    push [rax + {tcb_sc_off} + {sc_saved_rflags}]
+    push cs
+    push [rax + {tcb_sc_off} + {sc_saved_rip}]
+
+    push rdi
+    push rsi
+    push [rax + {tcb_sc_off} + {sc_saved_rdx}]
+    push rcx
+    push [rax + {tcb_sc_off} + {sc_saved_rax}]
+    push r8
+    push r9
+    push r10
+    push r11
+    push rbx
+    push rbp
+    push r12
+    push r13
+    push r14
+    push r15
+
+    sub rsp, 4096 + 32
 
     cld
     mov rdi, rsp
     xor eax, eax
-    mov ecx, 4096
+    mov ecx, 4096 + 32
     rep stosb
 
+    // TODO: self-modifying?
+    cmp byte ptr [{supports_xsave}], 0
+    je 3f
+
     mov eax, 0xffffffff
     mov edx, eax
     xsave [rsp]
@@ -124,33 +190,51 @@ asmfunction!(__relibc_internal_sigentry_xsave: ["
     mov eax, 0xffffffff
     mov edx, eax
     xrstor [rsp]
-    add rsp, 4096
 
-    //mov eax, {{SYS_SIGRETURN}}
-    syscall
-"] <= [inner = sym inner_c]);
-
-asmfunction!(__relibc_internal_rlct_clone_ret -> usize: ["
-    # Load registers
+    add rsp, 4096 + 32
+2:
+    pop r15
+    pop r14
+    pop r13
+    pop r12
+    pop rbp
+    pop rbx
+    pop r11
+    pop r10
+    pop r9
+    pop r8
     pop rax
-    pop rdi
-    pop rsi
-    pop rdx
     pop rcx
-    pop r8
-    pop r9
-
-    sub rsp, 8
-
-    mov DWORD PTR [rsp], 0x00001F80
-    ldmxcsr [rsp]
-    mov WORD PTR [rsp], 0x037F
-    fldcw [rsp]
+    pop rdx
+    pop rsi
+    pop rdi
 
+    pop gs:[{tcb_sa_off} + {sa_tmp}]
     add rsp, 8
+    popfq
+    pop rsp
+    jmp gs:[{tcb_sa_off} + {sa_tmp}]
+3:
+    fxsave64 [rsp]
 
-    # Call entry point
-    call rax
+    mov rdi, rsp
+    call {inner}
 
-    ret
-"] <= []);
+    fxrstor64 [rsp]
+    jmp 2b
+"] <= [
+    inner = sym inner_c,
+    sa_tmp = const offset_of!(SigArea, tmp),
+    sa_altstack_top = const offset_of!(SigArea, altstack_top),
+    sa_altstack_bottom = const offset_of!(SigArea, altstack_bottom),
+    sc_saved_rax = const offset_of!(Sigcontrol, saved_scratch_a),
+    sc_saved_rdx = const offset_of!(Sigcontrol, saved_scratch_b),
+    sc_saved_rflags = const offset_of!(Sigcontrol, saved_flags),
+    sc_saved_rip = const offset_of!(Sigcontrol, saved_ip),
+    sc_saved_rsp = const offset_of!(Sigcontrol, saved_sp),
+    tcb_sa_off = const 0, // FIXME
+    tcb_sc_off = const 0, // FIXME
+    supports_xsave = sym SUPPORTS_XSAVE,
+]);
+
+static SUPPORTS_XSAVE: AtomicU8 = AtomicU8::new(0); // FIXME
diff --git a/redox-rt/src/signal.rs b/redox-rt/src/signal.rs
index 676f839bcd4b47881dc4488e88799eff3647bc93..3097a7e4c0cec61c3d0c8505ad18115a1c6c74f6 100644
--- a/redox-rt/src/signal.rs
+++ b/redox-rt/src/signal.rs
@@ -2,7 +2,7 @@ use core::cell::Cell;
 use core::ffi::c_int;
 use core::sync::atomic::{AtomicU64, Ordering};
 
-use syscall::{Error, Result, Sigcontrol, EINVAL, SIGCHLD, SIGCONT, SIGKILL, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGWINCH};
+use syscall::{Error, IntRegisters, Result, Sigcontrol, EINVAL, SIGCHLD, SIGCONT, SIGKILL, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, SIGWINCH};
 
 use crate::arch::*;
 use crate::sync::Mutex;
@@ -14,13 +14,13 @@ pub fn sighandler_function() -> usize {
     #[cfg(target_arch = "x86_64")]
     // Check OSXSAVE bit
     // TODO: HWCAP?
-    if CPUID_EAX1_ECX.load(core::sync::atomic::Ordering::Relaxed) & (1 << 27) != 0 {
+    /*if CPUID_EAX1_ECX.load(core::sync::atomic::Ordering::Relaxed) & (1 << 27) != 0 {
         __relibc_internal_sigentry_xsave as usize
     } else {
         __relibc_internal_sigentry_fxsave as usize
-    }
+    }*/
 
-    #[cfg(any(target_arch = "x86", target_arch = "aarch64"))]
+    //#[cfg(any(target_arch = "x86", target_arch = "aarch64"))]
     {
         __relibc_internal_sigentry as usize
     }
@@ -28,14 +28,17 @@ pub fn sighandler_function() -> usize {
 
 #[repr(C)]
 pub struct SigStack {
+    sa_handler: usize,
+    sig_num: usize,
+
     #[cfg(target_arch = "x86_64")]
     fx: [u8; 4096],
 
     #[cfg(target_arch = "x86")]
     fx: [u8; 512],
 
-    sa_handler: usize,
-    sig_num: usize,
+    _pad: [usize; 4], // pad to 192 = 3 * 64 bytes
+    regs: IntRegisters, // 160 bytes currently
 }
 
 #[inline(always)]
diff --git a/src/ld_so/dso.rs b/src/ld_so/dso.rs
index 0bd7a566a60c171d10547979b138703c76517e4e..135ec59dc0ad5785c3718f902eed83b0b94fb961 100644
--- a/src/ld_so/dso.rs
+++ b/src/ld_so/dso.rs
@@ -1,7 +1,7 @@
 use super::{
     debug::{RTLDDebug, _r_debug},
     linker::Symbol,
-    tcb::{round_up, Master},
+    tcb::Master,
 };
 use crate::{
     header::{errno::STR_ERROR, sys_mman},
@@ -170,7 +170,7 @@ impl DSO {
             for ph in elf.program_headers.iter() {
                 let voff = ph.p_vaddr % ph.p_align;
                 let vaddr = (ph.p_vaddr - voff) as usize;
-                let vsize = round_up((ph.p_memsz + voff) as usize, ph.p_align as usize);
+                let vsize = ((ph.p_memsz + voff) as usize).next_multiple_of(ph.p_align as usize);
 
                 match ph.p_type {
                     program_header::PT_DYNAMIC => {
@@ -253,7 +253,7 @@ impl DSO {
         for ph in elf.program_headers.iter() {
             let voff = ph.p_vaddr % ph.p_align;
             let vaddr = (ph.p_vaddr - voff) as usize;
-            let vsize = round_up((ph.p_memsz + voff) as usize, ph.p_align as usize);
+            let vsize = ((ph.p_memsz + voff) as usize).next_multiple_of(ph.p_align as usize);
 
             match ph.p_type {
                 program_header::PT_LOAD => {
diff --git a/src/ld_so/linker.rs b/src/ld_so/linker.rs
index 6e76e53b848405f100cf0ebd4c357316ca2cbdb4..983715cbfdb2fa8aa5bf2376294d56202fc6d2a0 100644
--- a/src/ld_so/linker.rs
+++ b/src/ld_so/linker.rs
@@ -27,7 +27,7 @@ use super::{
     callbacks::LinkerCallbacks,
     debug::{RTLDState, _dl_debug_state, _r_debug},
     dso::{is_pie_enabled, DSO},
-    tcb::{round_up, Master, Tcb},
+    tcb::{Master, Tcb},
     ExpectTlsFree, PATH_SEP,
 };
 
@@ -453,7 +453,7 @@ impl Linker {
             {
                 let voff = ph.p_vaddr % ph.p_align;
                 let vaddr = (ph.p_vaddr - voff) as usize;
-                let vsize = round_up((ph.p_memsz + voff) as usize, ph.p_align as usize);
+                let vsize = ((ph.p_memsz + voff) as usize).next_multiple_of(ph.p_align as usize);
                 let mut prot = 0;
                 if ph.p_flags & program_header::PF_R == program_header::PF_R {
                     prot |= sys_mman::PROT_READ;
diff --git a/src/ld_so/tcb.rs b/src/ld_so/tcb.rs
index ecd8fef49be9014954e767396973adf1cd5f2e9b..784131c8be2158929cc3c0a6395a461286f85ea6 100644
--- a/src/ld_so/tcb.rs
+++ b/src/ld_so/tcb.rs
@@ -55,14 +55,19 @@ pub struct Tcb {
     /// Underlying pthread_t struct, pthread_self() returns &self.pthread
     pub pthread: Pthread,
     #[cfg(target_os = "redox")]
-    pub sigcontrol: Sigcontrol,
+    pub sigcontrol: RtSigarea,
+}
+#[derive(Debug, Default)]
+pub struct RtSigarea {
+    pub control: Sigcontrol,
+    pub internal: [usize; 4],
 }
 
 impl Tcb {
     /// Create a new TCB
     pub unsafe fn new(size: usize) -> Result<&'static mut Self> {
         let page_size = Sys::getpagesize();
-        let (abi_page, tls, tcb_page) = Self::os_new(round_up(size, page_size))?;
+        let (abi_page, tls, tcb_page) = Self::os_new(size.next_multiple_of(page_size))?;
 
         let tcb_ptr = tcb_page.as_mut_ptr() as *mut Self;
         trace!("New TCB: {:p}", tcb_ptr);
@@ -326,7 +331,3 @@ impl Tcb {
         let _ = syscall::close(file);
     }
 }
-
-pub fn round_up(value: usize, alignment: usize) -> usize {
-    return (value + alignment - 1) & (!(alignment - 1));
-}
diff --git a/src/start.rs b/src/start.rs
index 540ddb6d12cae536a45111660e204d0694e4b5dc..d0301b1b2b3af0967f3fb520fb551e01182f9661 100644
--- a/src/start.rs
+++ b/src/start.rs
@@ -158,7 +158,7 @@ pub unsafe extern "C" fn relibc_start(sp: &'static Stack) -> ! {
             tcb.linker_ptr = Box::into_raw(Box::new(Mutex::new(linker)));
         }
         #[cfg(target_os = "redox")]
-        redox_rt::signal::setup_sighandler(&tcb.sigcontrol);
+        redox_rt::signal::setup_sighandler(&tcb.sigcontrol.control);
     }
 
     // Set up argc and argv