Skip to content
Snippets Groups Projects
Verified Commit 4ee878c4 authored by Jacob Lorentzon's avatar Jacob Lorentzon
Browse files

Always use fxsave, store 'YMM_UPPER' manually.

parent 2fe1d614
No related branches found
No related tags found
No related merge requests found
...@@ -22,21 +22,24 @@ pub struct SigArea { ...@@ -22,21 +22,24 @@ pub struct SigArea {
pub disable_signals_depth: u64, pub disable_signals_depth: u64,
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
#[repr(C)] #[repr(C, align(16))]
pub struct ArchIntRegs { pub struct ArchIntRegs {
pub _pad: [usize; 2], // make size divisible by 16 pub fxsave: [u16; 29],
pub ebp: usize, // ensure fxsave region is 16 byte aligned
pub esi: usize, pub _pad: [usize; 2], // fxsave "available" +0
pub edi: usize,
pub ebx: usize, pub ebp: usize, // fxsave "available" +8
pub eax: usize, pub esi: usize, // avail +12
pub ecx: usize, pub edi: usize, // avail +16
pub edx: usize, pub ebx: usize, // avail +20
pub eax: usize, // avail +24
pub eflags: usize, pub ecx: usize, // avail +28
pub eip: usize, pub edx: usize, // avail +32
pub esp: usize,
pub eflags: usize, // avail +36
pub eip: usize, // avail +40
pub esp: usize, // avail +44
} }
/// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS /// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS
...@@ -131,14 +134,12 @@ asmfunction!(__relibc_internal_sigentry: [" ...@@ -131,14 +134,12 @@ asmfunction!(__relibc_internal_sigentry: ["
// Read first signal word // Read first signal word
mov eax, gs:[{tcb_sc_off} + {sc_word}] mov eax, gs:[{tcb_sc_off} + {sc_word}]
and eax, gs:[{tcb_sc_off} + {sc_word} + 4] and eax, gs:[{tcb_sc_off} + {sc_word} + 4]
and eax, {SIGW0_PENDING_MASK}
bsf eax, eax bsf eax, eax
jnz 2f jnz 2f
// Read second signal word // Read second signal word
mov eax, gs:[{tcb_sc_off} + {sc_word} + 8] mov eax, gs:[{tcb_sc_off} + {sc_word} + 8]
and eax, gs:[{tcb_sc_off} + {sc_word} + 12] and eax, gs:[{tcb_sc_off} + {sc_word} + 12]
and eax, {SIGW1_PENDING_MASK}
bsf eax, eax bsf eax, eax
jz 7f jz 7f
add eax, 32 add eax, 32
...@@ -172,17 +173,17 @@ asmfunction!(__relibc_internal_sigentry: [" ...@@ -172,17 +173,17 @@ asmfunction!(__relibc_internal_sigentry: ["
push esi push esi
push ebp push ebp
sub esp, 8 sub esp, 2 * 4 + 29 * 16
fxsave [esp]
push eax push eax
sub esp, 12 + 512 sub esp, 3 * 4
fxsave [esp]
mov ecx, esp mov ecx, esp
call {inner} call {inner}
fxrstor [esp] fxrstor [esp + 16]
add esp, 512 + 12 + 4 + 8 add esp, 16 + 29 * 16 + 2 * 4
pop ebp pop ebp
pop esi pop esi
...@@ -219,8 +220,6 @@ __relibc_internal_sigentry_crit_second: ...@@ -219,8 +220,6 @@ __relibc_internal_sigentry_crit_second:
tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control), tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control),
pctl_off_actions = const offset_of!(SigProcControl, actions), pctl_off_actions = const offset_of!(SigProcControl, actions),
pctl = sym PROC_CONTROL_STRUCT, pctl = sym PROC_CONTROL_STRUCT,
SIGW0_PENDING_MASK = const !0,
SIGW1_PENDING_MASK = const !0,
STACK_ALIGN = const 16, STACK_ALIGN = const 16,
]); ]);
......
...@@ -28,18 +28,18 @@ pub struct SigArea { ...@@ -28,18 +28,18 @@ pub struct SigArea {
pub pctl: usize, // TODO: find out how to correctly reference that static pub pctl: usize, // TODO: find out how to correctly reference that static
} }
#[repr(C)] #[repr(C, align(16))]
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct ArchIntRegs { pub struct ArchIntRegs {
_pad: [usize; 2], // ensure size is divisible by 32 pub ymm_upper: [u128; 16],
pub fxsave: [u128; 29],
pub r15: usize, pub r15: usize, // fxsave "available" +0
pub r14: usize, pub r14: usize, // available +8
pub r13: usize, pub r13: usize, // available +16
pub r12: usize, pub r12: usize, // available +24
pub rbp: usize, pub rbp: usize, // available +32
pub rbx: usize, pub rbx: usize, // available +40
pub r11: usize, pub r11: usize, // outside fxsave, and so on
pub r10: usize, pub r10: usize,
pub r9: usize, pub r9: usize,
pub r8: usize, pub r8: usize,
...@@ -174,7 +174,6 @@ asmfunction!(__relibc_internal_sigentry: [" ...@@ -174,7 +174,6 @@ asmfunction!(__relibc_internal_sigentry: ["
mov rdx, rax mov rdx, rax
shr rdx, 32 shr rdx, 32
and eax, edx and eax, edx
and eax, {SIGW0_PENDING_MASK}
bsf eax, eax bsf eax, eax
jnz 2f jnz 2f
...@@ -183,9 +182,8 @@ asmfunction!(__relibc_internal_sigentry: [" ...@@ -183,9 +182,8 @@ asmfunction!(__relibc_internal_sigentry: ["
mov rdx, rax mov rdx, rax
shr rdx, 32 shr rdx, 32
and eax, edx and eax, edx
and eax, {SIGW1_PENDING_MASK}
bsf eax, eax bsf eax, eax
jz 7f jz 6f
add eax, 32 add eax, 32
2: 2:
sub rsp, {REDZONE_SIZE} sub rsp, {REDZONE_SIZE}
...@@ -238,35 +236,63 @@ asmfunction!(__relibc_internal_sigentry: [" ...@@ -238,35 +236,63 @@ asmfunction!(__relibc_internal_sigentry: ["
push r13 push r13
push r14 push r14
push r15 push r15
sub rsp, 16 sub rsp, (29 + 16) * 16 // fxsave region minus available bytes
fxsave64 [rsp + 16 * 16]
// TODO: self-modifying?
cmp byte ptr [rip + {supports_avx}], 0
je 5f
// Prefer vextractf128 over vextracti128 since the former only requires AVX version 1.
vextractf128 [rsp + 15 * 16], ymm0, 1
vextractf128 [rsp + 14 * 16], ymm1, 1
vextractf128 [rsp + 13 * 16], ymm2, 1
vextractf128 [rsp + 12 * 16], ymm3, 1
vextractf128 [rsp + 11 * 16], ymm4, 1
vextractf128 [rsp + 10 * 16], ymm5, 1
vextractf128 [rsp + 9 * 16], ymm6, 1
vextractf128 [rsp + 8 * 16], ymm7, 1
vextractf128 [rsp + 7 * 16], ymm8, 1
vextractf128 [rsp + 6 * 16], ymm9, 1
vextractf128 [rsp + 5 * 16], ymm10, 1
vextractf128 [rsp + 4 * 16], ymm11, 1
vextractf128 [rsp + 3 * 16], ymm12, 1
vextractf128 [rsp + 2 * 16], ymm13, 1
vextractf128 [rsp + 16], ymm14, 1
vextractf128 [rsp], ymm15, 1
5:
push rax // selected signal push rax // selected signal
sub rsp, 8
sub rsp, 4096 + 24
cld
mov rdi, rsp mov rdi, rsp
xor eax, eax call {inner}
mov ecx, 4096 + 24
rep stosb
// TODO: self-modifying? add rsp, 16
cmp byte ptr [rip + {supports_xsave}], 0
je 6f
mov eax, 0xffffffff fxrstor64 [rsp]
mov edx, eax
xsave [rsp]
mov rdi, rsp cmp byte ptr [rip + {supports_avx}], 0
call {inner} je 6f
mov eax, 0xffffffff vinsertf128 ymm0, ymm0, [rsp + 15 * 16], 1
mov edx, eax vinsertf128 ymm1, ymm1, [rsp + 14 * 16], 1
xrstor [rsp] vinsertf128 ymm2, ymm2, [rsp + 13 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 12 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 11 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 10 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 9 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 8 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 7 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 6 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 5 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 4 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 3 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 2 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 16], 1
vinsertf128 ymm2, ymm2, [rsp], 1
6:
add rsp, (29 + 16) * 16
5:
add rsp, 4096 + 32 + 16
pop r15 pop r15
pop r14 pop r14
pop r13 pop r13
...@@ -299,14 +325,6 @@ __relibc_internal_sigentry_crit_first: ...@@ -299,14 +325,6 @@ __relibc_internal_sigentry_crit_first:
__relibc_internal_sigentry_crit_second: __relibc_internal_sigentry_crit_second:
jmp qword ptr fs:[{tcb_sa_off} + {sa_tmp_rip}] jmp qword ptr fs:[{tcb_sa_off} + {sa_tmp_rip}]
6: 6:
fxsave64 [rsp]
mov rdi, rsp
call {inner}
fxrstor64 [rsp]
jmp 5b
7:
ud2 ud2
// Spurious signal // Spurious signal
"] <= [ "] <= [
...@@ -325,11 +343,9 @@ __relibc_internal_sigentry_crit_second: ...@@ -325,11 +343,9 @@ __relibc_internal_sigentry_crit_second:
pctl_off_actions = const offset_of!(SigProcControl, actions), pctl_off_actions = const offset_of!(SigProcControl, actions),
//pctl = sym PROC_CONTROL_STRUCT, //pctl = sym PROC_CONTROL_STRUCT,
sa_off_pctl = const offset_of!(SigArea, pctl), sa_off_pctl = const offset_of!(SigArea, pctl),
supports_xsave = sym SUPPORTS_XSAVE, supports_avx = sym SUPPORTS_AVX,
SIGW0_PENDING_MASK = const !0,
SIGW1_PENDING_MASK = const !0,
REDZONE_SIZE = const 128, REDZONE_SIZE = const 128,
STACK_ALIGN = const 64, // if xsave is used STACK_ALIGN = const 16,
]); ]);
extern "C" { extern "C" {
...@@ -357,7 +373,7 @@ pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) { ...@@ -357,7 +373,7 @@ pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) {
} }
} }
static SUPPORTS_XSAVE: AtomicU8 = AtomicU8::new(1); // FIXME static SUPPORTS_AVX: AtomicU8 = AtomicU8::new(1); // FIXME
pub unsafe fn manually_enter_trampoline() { pub unsafe fn manually_enter_trampoline() {
let c = &Tcb::current().unwrap().os_specific.control; let c = &Tcb::current().unwrap().os_specific.control;
......
...@@ -12,39 +12,24 @@ use crate::sync::Mutex; ...@@ -12,39 +12,24 @@ use crate::sync::Mutex;
static CPUID_EAX1_ECX: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0); static CPUID_EAX1_ECX: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0);
pub fn sighandler_function() -> usize { pub fn sighandler_function() -> usize {
//#[cfg(target_arch = "x86_64")]
// Check OSXSAVE bit
// TODO: HWCAP? // TODO: HWCAP?
/*if CPUID_EAX1_ECX.load(core::sync::atomic::Ordering::Relaxed) & (1 << 27) != 0 {
__relibc_internal_sigentry_xsave as usize
} else {
__relibc_internal_sigentry_fxsave as usize
}*/
//#[cfg(any(target_arch = "x86", target_arch = "aarch64"))] __relibc_internal_sigentry as usize
{
__relibc_internal_sigentry as usize
}
} }
#[repr(C)] #[repr(C)]
pub struct SigStack { pub struct SigStack {
#[cfg(target_arch = "x86_64")] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
fx: [u8; 4096], // 64 byte aligned _pad: [usize; 1], // pad to 16 bytes alignment
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
fx: [u8; 512], // 16 byte aligned _pad: [usize; 3], // pad to 16 bytes alignment
#[cfg(target_arch = "x86_64")]
_pad: [usize; 3], // pad to 192 = 3 * 64 = 168 + 24 bytes
#[cfg(target_arch = "x86")]
_pad: [usize; 3], // pad to 64 = 4 * 16 = 52 + 12 bytes
sig_num: usize, sig_num: usize,
// x86_64: 160 bytes // x86_64: 864 bytes
// i686: 48 bytes // i686: 512 bytes
// aarch64: 272 bytes (SIMD TODO)
pub regs: ArchIntRegs, pub regs: ArchIntRegs,
} }
...@@ -261,6 +246,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction ...@@ -261,6 +246,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction
let _sigguard = tmp_disable_signals(); let _sigguard = tmp_disable_signals();
let ctl = current_sigctl(); let ctl = current_sigctl();
let _guard = SIGACTIONS_LOCK.lock();
let action = &PROC_CONTROL_STRUCT.actions[usize::from(signal) - 1]; let action = &PROC_CONTROL_STRUCT.actions[usize::from(signal) - 1];
if let Some(old) = old { if let Some(old) = old {
...@@ -298,7 +285,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction ...@@ -298,7 +285,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction
(new.mask, new.flags, explicit_handler) (new.mask, new.flags, explicit_handler)
} }
}; };
action.first.store((handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32), Ordering::Relaxed); let new_first = (handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32);
action.first.store(new_first, Ordering::Relaxed);
action.user_data.store(mask, Ordering::Relaxed); action.user_data.store(mask, Ordering::Relaxed);
Ok(()) Ok(())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment