diff --git a/redox-rt/src/arch/i686.rs b/redox-rt/src/arch/i686.rs
index fce06454c1583eac1aac6f4f15fa0630c17bd302..80296b835a31bab94ba1767e79bcc9f681f71bf5 100644
--- a/redox-rt/src/arch/i686.rs
+++ b/redox-rt/src/arch/i686.rs
@@ -22,21 +22,24 @@ pub struct SigArea {
     pub disable_signals_depth: u64,
 }
 #[derive(Debug, Default)]
-#[repr(C)]
+#[repr(C, align(16))]
 pub struct ArchIntRegs {
-    pub _pad: [usize; 2], // make size divisible by 16
-
-    pub ebp: usize,
-    pub esi: usize,
-    pub edi: usize,
-    pub ebx: usize,
-    pub eax: usize,
-    pub ecx: usize,
-    pub edx: usize,
-
-    pub eflags: usize,
-    pub eip: usize,
-    pub esp: usize,
+    pub fxsave: [u16; 29],
+
+    // ensure fxsave region is 16 byte aligned
+    pub _pad: [usize; 2], // fxsave "available" +0
+
+    pub ebp: usize, // fxsave "available" +8
+    pub esi: usize, // avail +12
+    pub edi: usize, // avail +16
+    pub ebx: usize, // avail +20
+    pub eax: usize, // avail +24
+    pub ecx: usize, // avail +28
+    pub edx: usize, // avail +32
+
+    pub eflags: usize, // avail +36
+    pub eip: usize, // avail +40
+    pub esp: usize, // avail +44
 }
 
 /// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS
@@ -131,14 +134,12 @@ asmfunction!(__relibc_internal_sigentry: ["
     // Read first signal word
     mov eax, gs:[{tcb_sc_off} + {sc_word}]
     and eax, gs:[{tcb_sc_off} + {sc_word} + 4]
-    and eax, {SIGW0_PENDING_MASK}
     bsf eax, eax
     jnz 2f
 
     // Read second signal word
     mov eax, gs:[{tcb_sc_off} + {sc_word} + 8]
     and eax, gs:[{tcb_sc_off} + {sc_word} + 12]
-    and eax, {SIGW1_PENDING_MASK}
     bsf eax, eax
     jz 7f
     add eax, 32
@@ -172,17 +173,17 @@ asmfunction!(__relibc_internal_sigentry: ["
     push esi
     push ebp
 
-    sub esp, 8
+    sub esp, 2 * 4 + 29 * 16
+    fxsave [esp]
 
     push eax
-    sub esp, 12 + 512
-    fxsave [esp]
+    sub esp, 3 * 4
 
     mov ecx, esp
     call {inner}
 
-    fxrstor [esp]
-    add esp, 512 + 12 + 4 + 8
+    fxrstor [esp + 16]
+    add esp, 16 + 29 * 16 + 2 * 4
 
     pop ebp
     pop esi
@@ -219,8 +220,6 @@ __relibc_internal_sigentry_crit_second:
     tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control),
     pctl_off_actions = const offset_of!(SigProcControl, actions),
     pctl = sym PROC_CONTROL_STRUCT,
-    SIGW0_PENDING_MASK = const !0,
-    SIGW1_PENDING_MASK = const !0,
     STACK_ALIGN = const 16,
 ]);
 
diff --git a/redox-rt/src/arch/x86_64.rs b/redox-rt/src/arch/x86_64.rs
index fc425eecf5a4fc27fcac4b95336a24eb71456d17..0b3ffa68dbf9ae030eb7c33ea46b44e8e794e099 100644
--- a/redox-rt/src/arch/x86_64.rs
+++ b/redox-rt/src/arch/x86_64.rs
@@ -28,18 +28,18 @@ pub struct SigArea {
     pub pctl: usize, // TODO: find out how to correctly reference that static
 }
 
-#[repr(C)]
+#[repr(C, align(16))]
 #[derive(Debug, Default)]
 pub struct ArchIntRegs {
-    _pad: [usize; 2], // ensure size is divisible by 32
-
-    pub r15: usize,
-    pub r14: usize,
-    pub r13: usize,
-    pub r12: usize,
-    pub rbp: usize,
-    pub rbx: usize,
-    pub r11: usize,
+    pub ymm_upper: [u128; 16],
+    pub fxsave: [u128; 29],
+    pub r15: usize, // fxsave "available" +0
+    pub r14: usize, // available +8
+    pub r13: usize, // available +16
+    pub r12: usize, // available +24
+    pub rbp: usize, // available +32
+    pub rbx: usize, // available +40
+    pub r11: usize, // outside fxsave, and so on
     pub r10: usize,
     pub r9: usize,
     pub r8: usize,
@@ -174,7 +174,6 @@ asmfunction!(__relibc_internal_sigentry: ["
     mov rdx, rax
     shr rdx, 32
     and eax, edx
-    and eax, {SIGW0_PENDING_MASK}
     bsf eax, eax
     jnz 2f
 
@@ -183,9 +182,8 @@ asmfunction!(__relibc_internal_sigentry: ["
     mov rdx, rax
     shr rdx, 32
     and eax, edx
-    and eax, {SIGW1_PENDING_MASK}
     bsf eax, eax
-    jz 7f
+    jz 6f
     add eax, 32
 2:
     sub rsp, {REDZONE_SIZE}
@@ -238,35 +236,63 @@ asmfunction!(__relibc_internal_sigentry: ["
     push r13
     push r14
     push r15
-    sub rsp, 16
+    sub rsp, (29 + 16) * 16 // fxsave region minus available bytes
+    fxsave64 [rsp + 16 * 16]
 
+    // TODO: self-modifying?
+    cmp byte ptr [rip + {supports_avx}], 0
+    je 5f
+
+    // Prefer vextractf128 over vextracti128 since the former only requires AVX version 1.
+    vextractf128 [rsp + 15 * 16], ymm0, 1
+    vextractf128 [rsp + 14 * 16], ymm1, 1
+    vextractf128 [rsp + 13 * 16], ymm2, 1
+    vextractf128 [rsp + 12 * 16], ymm3, 1
+    vextractf128 [rsp + 11 * 16], ymm4, 1
+    vextractf128 [rsp + 10 * 16], ymm5, 1
+    vextractf128 [rsp + 9 * 16], ymm6, 1
+    vextractf128 [rsp + 8 * 16], ymm7, 1
+    vextractf128 [rsp + 7 * 16], ymm8, 1
+    vextractf128 [rsp + 6 * 16], ymm9, 1
+    vextractf128 [rsp + 5 * 16], ymm10, 1
+    vextractf128 [rsp + 4 * 16], ymm11, 1
+    vextractf128 [rsp + 3 * 16], ymm12, 1
+    vextractf128 [rsp + 2 * 16], ymm13, 1
+    vextractf128 [rsp + 16], ymm14, 1
+    vextractf128 [rsp], ymm15, 1
+5:
     push rax // selected signal
+    sub rsp, 8
 
-    sub rsp, 4096 + 24
-
-    cld
     mov rdi, rsp
-    xor eax, eax
-    mov ecx, 4096 + 24
-    rep stosb
+    call {inner}
 
-    // TODO: self-modifying?
-    cmp byte ptr [rip + {supports_xsave}], 0
-    je 6f
+    add rsp, 16
 
-    mov eax, 0xffffffff
-    mov edx, eax
-    xsave [rsp]
+    fxrstor64 [rsp]
 
-    mov rdi, rsp
-    call {inner}
+    cmp byte ptr [rip + {supports_avx}], 0
+    je 6f
 
-    mov eax, 0xffffffff
-    mov edx, eax
-    xrstor [rsp]
+    vinsertf128 ymm0, ymm0, [rsp + 15 * 16], 1
+    vinsertf128 ymm1, ymm1, [rsp + 14 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 13 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 12 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 11 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 10 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 9 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 8 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 7 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 6 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 5 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 4 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 3 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 2 * 16], 1
+    vinsertf128 ymm2, ymm2, [rsp + 16], 1
+    vinsertf128 ymm2, ymm2, [rsp], 1
+6:
+    add rsp, (29 + 16) * 16
 
-5:
-    add rsp, 4096 + 32 + 16
     pop r15
     pop r14
     pop r13
@@ -299,14 +325,6 @@ __relibc_internal_sigentry_crit_first:
 __relibc_internal_sigentry_crit_second:
     jmp qword ptr fs:[{tcb_sa_off} + {sa_tmp_rip}]
 6:
-    fxsave64 [rsp]
-
-    mov rdi, rsp
-    call {inner}
-
-    fxrstor64 [rsp]
-    jmp 5b
-7:
     ud2
     // Spurious signal
 "] <= [
@@ -325,11 +343,9 @@ __relibc_internal_sigentry_crit_second:
     pctl_off_actions = const offset_of!(SigProcControl, actions),
     //pctl = sym PROC_CONTROL_STRUCT,
     sa_off_pctl = const offset_of!(SigArea, pctl),
-    supports_xsave = sym SUPPORTS_XSAVE,
-    SIGW0_PENDING_MASK = const !0,
-    SIGW1_PENDING_MASK = const !0,
+    supports_avx = sym SUPPORTS_AVX,
     REDZONE_SIZE = const 128,
-    STACK_ALIGN = const 64, // if xsave is used
+    STACK_ALIGN = const 16,
 ]);
 
 extern "C" {
@@ -357,7 +373,7 @@ pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) {
     }
 }
 
-static SUPPORTS_XSAVE: AtomicU8 = AtomicU8::new(1); // FIXME
+static SUPPORTS_AVX: AtomicU8 = AtomicU8::new(1); // FIXME
 
 pub unsafe fn manually_enter_trampoline() {
     let c = &Tcb::current().unwrap().os_specific.control;
diff --git a/redox-rt/src/signal.rs b/redox-rt/src/signal.rs
index 1d51f162865c4ffa8d81f138276683d90e42997e..f99f39ebce7c465db390bfae5129db68f5ee7330 100644
--- a/redox-rt/src/signal.rs
+++ b/redox-rt/src/signal.rs
@@ -12,39 +12,24 @@ use crate::sync::Mutex;
 static CPUID_EAX1_ECX: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0);
 
 pub fn sighandler_function() -> usize {
-    //#[cfg(target_arch = "x86_64")]
-    // Check OSXSAVE bit
     // TODO: HWCAP?
-    /*if CPUID_EAX1_ECX.load(core::sync::atomic::Ordering::Relaxed) & (1 << 27) != 0 {
-        __relibc_internal_sigentry_xsave as usize
-    } else {
-        __relibc_internal_sigentry_fxsave as usize
-    }*/
 
-    //#[cfg(any(target_arch = "x86", target_arch = "aarch64"))]
-    {
-        __relibc_internal_sigentry as usize
-    }
+    __relibc_internal_sigentry as usize
 }
 
 #[repr(C)]
 pub struct SigStack {
-    #[cfg(target_arch = "x86_64")]
-    fx: [u8; 4096], // 64 byte aligned
+    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
+    _pad: [usize; 1], // pad to 16 bytes alignment
 
     #[cfg(target_arch = "x86")]
-    fx: [u8; 512], // 16 byte aligned
-
-    #[cfg(target_arch = "x86_64")]
-    _pad: [usize; 3], // pad to 192 = 3 * 64 = 168 + 24 bytes
-
-    #[cfg(target_arch = "x86")]
-    _pad: [usize; 3], // pad to 64 = 4 * 16 = 52 + 12 bytes
+    _pad: [usize; 3], // pad to 16 bytes alignment
 
     sig_num: usize,
 
-    // x86_64: 160 bytes
-    // i686: 48 bytes
+    // x86_64: 864 bytes
+    // i686: 512 bytes
+    // aarch64: 272 bytes (SIMD TODO)
     pub regs: ArchIntRegs,
 }
 
@@ -261,6 +246,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction
     let _sigguard = tmp_disable_signals();
     let ctl = current_sigctl();
 
+    let _guard = SIGACTIONS_LOCK.lock();
+
     let action = &PROC_CONTROL_STRUCT.actions[usize::from(signal) - 1];
 
     if let Some(old) = old {
@@ -298,7 +285,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction
             (new.mask, new.flags, explicit_handler)
         }
     };
-    action.first.store((handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32), Ordering::Relaxed);
+    let new_first = (handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32);
+    action.first.store(new_first, Ordering::Relaxed);
     action.user_data.store(mask, Ordering::Relaxed);
 
     Ok(())