Skip to content
Snippets Groups Projects
Verified Commit 4ee878c4 authored by Jacob Lorentzon's avatar Jacob Lorentzon
Browse files

Always use fxsave, store 'YMM_UPPER' manually.

parent 2fe1d614
No related branches found
No related tags found
1 merge request!480Refactor redox runtime and impl signals in userspace
......@@ -22,21 +22,24 @@ pub struct SigArea {
pub disable_signals_depth: u64,
}
#[derive(Debug, Default)]
#[repr(C)]
#[repr(C, align(16))]
pub struct ArchIntRegs {
pub _pad: [usize; 2], // make size divisible by 16
pub ebp: usize,
pub esi: usize,
pub edi: usize,
pub ebx: usize,
pub eax: usize,
pub ecx: usize,
pub edx: usize,
pub eflags: usize,
pub eip: usize,
pub esp: usize,
pub fxsave: [u16; 29],
// ensure fxsave region is 16 byte aligned
pub _pad: [usize; 2], // fxsave "available" +0
pub ebp: usize, // fxsave "available" +8
pub esi: usize, // avail +12
pub edi: usize, // avail +16
pub ebx: usize, // avail +20
pub eax: usize, // avail +24
pub ecx: usize, // avail +28
pub edx: usize, // avail +32
pub eflags: usize, // avail +36
pub eip: usize, // avail +40
pub esp: usize, // avail +44
}
/// Deactive TLS, used before exec() on Redox to not trick target executable into thinking TLS
......@@ -131,14 +134,12 @@ asmfunction!(__relibc_internal_sigentry: ["
// Read first signal word
mov eax, gs:[{tcb_sc_off} + {sc_word}]
and eax, gs:[{tcb_sc_off} + {sc_word} + 4]
and eax, {SIGW0_PENDING_MASK}
bsf eax, eax
jnz 2f
// Read second signal word
mov eax, gs:[{tcb_sc_off} + {sc_word} + 8]
and eax, gs:[{tcb_sc_off} + {sc_word} + 12]
and eax, {SIGW1_PENDING_MASK}
bsf eax, eax
jz 7f
add eax, 32
......@@ -172,17 +173,17 @@ asmfunction!(__relibc_internal_sigentry: ["
push esi
push ebp
sub esp, 8
sub esp, 2 * 4 + 29 * 16
fxsave [esp]
push eax
sub esp, 12 + 512
fxsave [esp]
sub esp, 3 * 4
mov ecx, esp
call {inner}
fxrstor [esp]
add esp, 512 + 12 + 4 + 8
fxrstor [esp + 16]
add esp, 16 + 29 * 16 + 2 * 4
pop ebp
pop esi
......@@ -219,8 +220,6 @@ __relibc_internal_sigentry_crit_second:
tcb_sc_off = const offset_of!(crate::Tcb, os_specific) + offset_of!(RtSigarea, control),
pctl_off_actions = const offset_of!(SigProcControl, actions),
pctl = sym PROC_CONTROL_STRUCT,
SIGW0_PENDING_MASK = const !0,
SIGW1_PENDING_MASK = const !0,
STACK_ALIGN = const 16,
]);
......
......@@ -28,18 +28,18 @@ pub struct SigArea {
pub pctl: usize, // TODO: find out how to correctly reference that static
}
#[repr(C)]
#[repr(C, align(16))]
#[derive(Debug, Default)]
pub struct ArchIntRegs {
_pad: [usize; 2], // ensure size is divisible by 32
pub r15: usize,
pub r14: usize,
pub r13: usize,
pub r12: usize,
pub rbp: usize,
pub rbx: usize,
pub r11: usize,
pub ymm_upper: [u128; 16],
pub fxsave: [u128; 29],
pub r15: usize, // fxsave "available" +0
pub r14: usize, // available +8
pub r13: usize, // available +16
pub r12: usize, // available +24
pub rbp: usize, // available +32
pub rbx: usize, // available +40
pub r11: usize, // outside fxsave, and so on
pub r10: usize,
pub r9: usize,
pub r8: usize,
......@@ -174,7 +174,6 @@ asmfunction!(__relibc_internal_sigentry: ["
mov rdx, rax
shr rdx, 32
and eax, edx
and eax, {SIGW0_PENDING_MASK}
bsf eax, eax
jnz 2f
......@@ -183,9 +182,8 @@ asmfunction!(__relibc_internal_sigentry: ["
mov rdx, rax
shr rdx, 32
and eax, edx
and eax, {SIGW1_PENDING_MASK}
bsf eax, eax
jz 7f
jz 6f
add eax, 32
2:
sub rsp, {REDZONE_SIZE}
......@@ -238,35 +236,63 @@ asmfunction!(__relibc_internal_sigentry: ["
push r13
push r14
push r15
sub rsp, 16
sub rsp, (29 + 16) * 16 // fxsave region minus available bytes
fxsave64 [rsp + 16 * 16]
// TODO: self-modifying?
cmp byte ptr [rip + {supports_avx}], 0
je 5f
// Prefer vextractf128 over vextracti128 since the former only requires AVX version 1.
vextractf128 [rsp + 15 * 16], ymm0, 1
vextractf128 [rsp + 14 * 16], ymm1, 1
vextractf128 [rsp + 13 * 16], ymm2, 1
vextractf128 [rsp + 12 * 16], ymm3, 1
vextractf128 [rsp + 11 * 16], ymm4, 1
vextractf128 [rsp + 10 * 16], ymm5, 1
vextractf128 [rsp + 9 * 16], ymm6, 1
vextractf128 [rsp + 8 * 16], ymm7, 1
vextractf128 [rsp + 7 * 16], ymm8, 1
vextractf128 [rsp + 6 * 16], ymm9, 1
vextractf128 [rsp + 5 * 16], ymm10, 1
vextractf128 [rsp + 4 * 16], ymm11, 1
vextractf128 [rsp + 3 * 16], ymm12, 1
vextractf128 [rsp + 2 * 16], ymm13, 1
vextractf128 [rsp + 16], ymm14, 1
vextractf128 [rsp], ymm15, 1
5:
push rax // selected signal
sub rsp, 8
sub rsp, 4096 + 24
cld
mov rdi, rsp
xor eax, eax
mov ecx, 4096 + 24
rep stosb
call {inner}
// TODO: self-modifying?
cmp byte ptr [rip + {supports_xsave}], 0
je 6f
add rsp, 16
mov eax, 0xffffffff
mov edx, eax
xsave [rsp]
fxrstor64 [rsp]
mov rdi, rsp
call {inner}
cmp byte ptr [rip + {supports_avx}], 0
je 6f
mov eax, 0xffffffff
mov edx, eax
xrstor [rsp]
vinsertf128 ymm0, ymm0, [rsp + 15 * 16], 1
vinsertf128 ymm1, ymm1, [rsp + 14 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 13 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 12 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 11 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 10 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 9 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 8 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 7 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 6 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 5 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 4 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 3 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 2 * 16], 1
vinsertf128 ymm2, ymm2, [rsp + 16], 1
vinsertf128 ymm2, ymm2, [rsp], 1
6:
add rsp, (29 + 16) * 16
5:
add rsp, 4096 + 32 + 16
pop r15
pop r14
pop r13
......@@ -299,14 +325,6 @@ __relibc_internal_sigentry_crit_first:
__relibc_internal_sigentry_crit_second:
jmp qword ptr fs:[{tcb_sa_off} + {sa_tmp_rip}]
6:
fxsave64 [rsp]
mov rdi, rsp
call {inner}
fxrstor64 [rsp]
jmp 5b
7:
ud2
// Spurious signal
"] <= [
......@@ -325,11 +343,9 @@ __relibc_internal_sigentry_crit_second:
pctl_off_actions = const offset_of!(SigProcControl, actions),
//pctl = sym PROC_CONTROL_STRUCT,
sa_off_pctl = const offset_of!(SigArea, pctl),
supports_xsave = sym SUPPORTS_XSAVE,
SIGW0_PENDING_MASK = const !0,
SIGW1_PENDING_MASK = const !0,
supports_avx = sym SUPPORTS_AVX,
REDZONE_SIZE = const 128,
STACK_ALIGN = const 64, // if xsave is used
STACK_ALIGN = const 16,
]);
extern "C" {
......@@ -357,7 +373,7 @@ pub unsafe fn arch_pre(stack: &mut SigStack, area: &mut SigArea) {
}
}
static SUPPORTS_XSAVE: AtomicU8 = AtomicU8::new(1); // FIXME
static SUPPORTS_AVX: AtomicU8 = AtomicU8::new(1); // FIXME
pub unsafe fn manually_enter_trampoline() {
let c = &Tcb::current().unwrap().os_specific.control;
......
......@@ -12,39 +12,24 @@ use crate::sync::Mutex;
static CPUID_EAX1_ECX: core::sync::atomic::AtomicU32 = core::sync::atomic::AtomicU32::new(0);
pub fn sighandler_function() -> usize {
//#[cfg(target_arch = "x86_64")]
// Check OSXSAVE bit
// TODO: HWCAP?
/*if CPUID_EAX1_ECX.load(core::sync::atomic::Ordering::Relaxed) & (1 << 27) != 0 {
__relibc_internal_sigentry_xsave as usize
} else {
__relibc_internal_sigentry_fxsave as usize
}*/
//#[cfg(any(target_arch = "x86", target_arch = "aarch64"))]
{
__relibc_internal_sigentry as usize
}
__relibc_internal_sigentry as usize
}
#[repr(C)]
pub struct SigStack {
#[cfg(target_arch = "x86_64")]
fx: [u8; 4096], // 64 byte aligned
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
_pad: [usize; 1], // pad to 16 bytes alignment
#[cfg(target_arch = "x86")]
fx: [u8; 512], // 16 byte aligned
#[cfg(target_arch = "x86_64")]
_pad: [usize; 3], // pad to 192 = 3 * 64 = 168 + 24 bytes
#[cfg(target_arch = "x86")]
_pad: [usize; 3], // pad to 64 = 4 * 16 = 52 + 12 bytes
_pad: [usize; 3], // pad to 16 bytes alignment
sig_num: usize,
// x86_64: 160 bytes
// i686: 48 bytes
// x86_64: 864 bytes
// i686: 512 bytes
// aarch64: 272 bytes (SIMD TODO)
pub regs: ArchIntRegs,
}
......@@ -261,6 +246,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction
let _sigguard = tmp_disable_signals();
let ctl = current_sigctl();
let _guard = SIGACTIONS_LOCK.lock();
let action = &PROC_CONTROL_STRUCT.actions[usize::from(signal) - 1];
if let Some(old) = old {
......@@ -298,7 +285,8 @@ pub fn sigaction(signal: u8, new: Option<&Sigaction>, old: Option<&mut Sigaction
(new.mask, new.flags, explicit_handler)
}
};
action.first.store((handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32), Ordering::Relaxed);
let new_first = (handler as u64) | (u64::from(flags.bits() & STORED_FLAGS) << 32);
action.first.store(new_first, Ordering::Relaxed);
action.user_data.store(mask, Ordering::Relaxed);
Ok(())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment