diff --git a/redox-rt/src/arch/i686.rs b/redox-rt/src/arch/i686.rs index fce06454c1583eac1aac6f4f15fa0630c17bd302..80296b835a31bab94ba1767e79bcc9f681f71bf5 100644 --- a/redox-rt/src/arch/i686.rs +++ b/redox-rt/src/arch/i686.rs @@ -22,21 +22,24 @@ pub struct SigArea { pub disable_signals_depth: u64, } #[derive(Debug, Default)] -#[repr(C)] +#[repr(C, align(16))] pub struct ArchIntRegs { - pub _pad: [usize; 2], // make size divisible by 16 - - pub ebp: usize, - pub esi: usize, - pub edi: usize, - pub ebx: usize, - pub eax: usize, - pub ecx: usize, - pub edx: usize, - - pub eflags: usize, - pub eip: usize, - pub esp: usize, + pub fxsave: [u16; 29], + + // ensure fxsave region is 16 byte aligned + pub _pad: [usize; 2], // fxsave "available" +0 + + pub ebp: usize, // fxsave "available" +8 + pub esi: usize, // avail +12 + pub edi: usize, // avail +16 + pub ebx: usize, // avail +20 + pub eax: usize, // avail +24 + pub ecx: usize, // avail +28 + pub edx: usize, // avail +32 + + pub eflags: usize, // avail +36 + pub eip: usize, // avail +40 + pub esp: usize, // avail +44 } /// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS @@ -131,14 +134,12 @@ asmfunction!(__relibc_internal_sigentry: [" // Read first signal word mov eax, gs:[{tcb_sc_off} + {sc_word}] and eax, gs:[{tcb_sc_off} + {sc_word} + 4] - and eax, {SIGW0_PENDING_MASK} bsf eax, eax jnz 2f // Read second signal word mov eax, gs:[{tcb_sc_off} + {sc_word} + 8] and eax, gs:[{tcb_sc_off} + {sc_word} + 12] - and eax, {SIGW1_PENDING_MASK} bsf eax, eax jz 7f add eax, 32 @@ -172,17 +173,17 @@ asmfunction!(__relibc_internal_sigentry: [" push esi push ebp - sub esp, 8 + sub esp, 2 * 4 + 29 * 16 + fxsave [esp] push eax - sub esp, 12 + 512 - fxsave [esp] + sub esp, 3 * 4 mov ecx, esp call {inner} - fxrstor [esp] - add esp, 512 + 12 + 4 + 8 + fxrstor [esp + 16] + add esp, 16 + 29 * 16 + 2 * 4 pop ebp pop esi @@ -219,8 +220,6 @@ __relibc_internal_sigentry_crit_second: tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control), pctl_off_actions = const offset_of!(SigProcControl, actions), pctl = sym PROC_CONTROL_STRUCT, - SIGW0_PENDING_MASK = const !0, - SIGW1_PENDING_MASK = const !0, STACK_ALIGN = const 16, ]); diff --git a/redox-rt/src/arch/x86_64.rs b/redox-rt/src/arch/x86_64.rs index fc425eecf5a4fc27fcac4b95336a24eb71456d17..0b3ffa68dbf9ae030eb7c33ea46b44e8e794e099 100644 --- a/redox-rt/src/arch/x86_64.rs +++ b/redox-rt/src/arch/x86_64.rs @@ -28,18 +28,18 @@ pub struct SigArea { pub pctl: usize, // TODO: find out how to correctly reference that static } -#[repr(C)] +#[repr(C, align(16))] #[derive(Debug, Default)] pub struct ArchIntRegs { - _pad: [usize; 2], // ensure size is divisible by 32 - - pub r15: usize, - pub r14: usize, - pub r13: usize, - pub r12: usize, - pub rbp: usize, - pub rbx: usize, - pub r11: usize, + pub ymm_upper: [u128; 16], + pub fxsave: [u128; 29], + pub r15: usize, // fxsave "available" +0 + pub r14: usize, // available +8 + pub r13: usize, // available +16 + pub r12: usize, // available +24 + pub rbp: usize, // available +32 + pub rbx: usize, // available +40 + pub r11: usize, // outside fxsave, and so on pub r10: usize, pub r9: usize, pub r8: usize, @@ -174,7 +174,6 @@ asmfunction!(__relibc_internal_sigentry: [" mov rdx, rax shr rdx, 32 and eax, edx - and eax, {SIGW0_PENDING_MASK} bsf eax, eax jnz 2f @@ -183,9 +182,8 @@ asmfunction!(__relibc_internal_sigentry: [" mov rdx, rax shr rdx, 32 and eax, edx - and eax, {SIGW1_PENDING_MASK} bsf eax, eax - jz 7f + jz 6f add eax, 32 2: sub rsp, {REDZONE_SIZE} @@ -238,35 +236,63 @@ asmfunction!(__relibc_internal_sigentry: [" push r13 push r14 push r15 - sub rsp, 16 + sub rsp, (29 + 16) * 16 // fxsave region minus available bytes + fxsave64 [rsp + 16 * 16] + // TODO: self-modifying? + cmp byte ptr [rip + {supports_avx}], 0 + je 5f + + // Prefer vextractf128 over vextracti128 since the former only requires AVX version 1. + vextractf128 [rsp + 15 * 16], ymm0, 1 + vextractf128 [rsp + 14 * 16], ymm1, 1 + vextractf128 [rsp + 13 * 16], ymm2, 1 + vextractf128 [rsp + 12 * 16], ymm3, 1 + vextractf128 [rsp + 11 * 16], ymm4, 1 + vextractf128 [rsp + 10 * 16], ymm5, 1 + vextractf128 [rsp + 9 * 16], ymm6, 1 + vextractf128 [rsp + 8 * 16], ymm7, 1 + vextractf128 [rsp + 7 * 16], ymm8, 1 + vextractf128 [rsp + 6 * 16], ymm9, 1 + vextractf128 [rsp + 5 * 16], ymm10, 1 + vextractf128 [rsp + 4 * 16], ymm11, 1 + vextractf128 [rsp + 3 * 16], ymm12, 1 + vextractf128 [rsp + 2 * 16], ymm13, 1 + vextractf128 [rsp + 16], ymm14, 1 + vextractf128 [rsp], ymm15, 1 +5: push rax // selected signal + sub rsp, 8 - sub rsp, 4096 + 24 - - cld mov rdi, rsp - xor eax, eax - mov ecx, 4096 + 24 - rep stosb + call {inner} - // TODO: self-modifying? - cmp byte ptr [rip + {supports_xsave}], 0 - je 6f + add rsp, 16 - mov eax, 0xffffffff - mov edx, eax - xsave [rsp] + fxrstor64 [rsp] - mov rdi, rsp - call {inner} + cmp byte ptr [rip + {supports_avx}], 0 + je 6f - mov eax, 0xffffffff - mov edx, eax - xrstor [rsp] + vinsertf128 ymm0, ymm0, [rsp + 15 * 16], 1 + vinsertf128 ymm1, ymm1, [rsp + 14 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 13 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 12 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 11 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 10 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 9 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 8 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 7 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 6 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 5 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 4 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 3 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 2 * 16], 1 + vinsertf128 ymm2, ymm2, [rsp + 16], 1 + vinsertf128 ymm2, ymm2, [rsp], 1 +6: + add rsp, (29 + 16) * 16 -5: - add rsp, 4096 + 32 + 16 pop r15 pop r14 pop r13 @@ -299,14 +325,6 @@ __relibc_internal_sigentry_crit_first: __relibc_internal_sigentry_crit_second: jmp qword ptr fs:[{tcb_sa_off} + {sa_tmp_rip}] 6: - fxsave64 [rsp] - - mov rdi, rsp - call {inner} - - fxrstor64 [rsp] - jmp 5b -7: ud2 // Spurious signal "] <= [ @@ -325,11 +343,9 @@ __relibc_internal_sigentry_crit_second: pctl_off_actions = const offset_of!(SigProcControl, actions), //pctl = sym PROC_CONTROL_STRUCT, sa_off_pctl = const offset_of!(SigArea, pctl), - supports_xsave = sym SUPPORTS_XSAVE, - SIGW0_PENDING_MASK = const !0, - SIGW1_PENDING_MASK = const !0, + supports_avx = sym SUPPORTS_AVX, REDZONE_SIZE = const 128, - STACK_ALIGN = const 64, // if xsave is used + STACK_ALIGN = const 16, ]); extern "C" { @@ -357,7 +373,7 @@ pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) { } } -static SUPPORTS_XSAVE: AtomicU8 = AtomicU8::new(1); // FIXME +static SUPPORTS_AVX: AtomicU8 = AtomicU8::new(1); // FIXME pub unsafe fn manually_enter_trampoline() { let c = &Tcb::current().unwrap().os_specific.control; diff --git a/redox-rt/src/signal.rs b/redox-rt/src/signal.rs index 1d51f162865c4ffa8d81f138276683d90e42997e..f99f39ebce7c465db390bfae5129db68f5ee7330 100644 --- a/redox-rt/src/signal.rs +++ b/redox-rt/src/signal.rs @@ -12,39 +12,24 @@ use crate::sync::Mutex; static CPUID_EAX1_ECX: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0); pub fn sighandler_function() -> usize { - //#[cfg(target_arch = "x86_64")] - // Check OSXSAVE bit // TODO: HWCAP? - /*if CPUID_EAX1_ECX.load(core::sync::atomic::Ordering::Relaxed) & (1 << 27) != 0 { - __relibc_internal_sigentry_xsave as usize - } else { - __relibc_internal_sigentry_fxsave as usize - }*/ - //#[cfg(any(target_arch = "x86", target_arch = "aarch64"))] - { - __relibc_internal_sigentry as usize - } + __relibc_internal_sigentry as usize } #[repr(C)] pub struct SigStack { - #[cfg(target_arch = "x86_64")] - fx: [u8; 4096], // 64 byte aligned + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + _pad: [usize; 1], // pad to 16 bytes alignment #[cfg(target_arch = "x86")] - fx: [u8; 512], // 16 byte aligned - - #[cfg(target_arch = "x86_64")] - _pad: [usize; 3], // pad to 192 = 3 * 64 = 168 + 24 bytes - - #[cfg(target_arch = "x86")] - _pad: [usize; 3], // pad to 64 = 4 * 16 = 52 + 12 bytes + _pad: [usize; 3], // pad to 16 bytes alignment sig_num: usize, - // x86_64: 160 bytes - // i686: 48 bytes + // x86_64: 864 bytes + // i686: 512 bytes + // aarch64: 272 bytes (SIMD TODO) pub regs: ArchIntRegs, } @@ -261,6 +246,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction let _sigguard = tmp_disable_signals(); let ctl = current_sigctl(); + let _guard = SIGACTIONS_LOCK.lock(); + let action = &PROC_CONTROL_STRUCT.actions[usize::from(signal) - 1]; if let Some(old) = old { @@ -298,7 +285,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction (new.mask, new.flags, explicit_handler) } }; - action.first.store((handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32), Ordering::Relaxed); + let new_first = (handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32); + action.first.store(new_first, Ordering::Relaxed); action.user_data.store(mask, Ordering::Relaxed); Ok(())