From 8d14d54fa98b6141c6520616f515844f00830503 Mon Sep 17 00:00:00 2001 From: 4lDO2 <4lDO2@protonmail.com> Date: Fri, 26 Aug 2022 11:54:47 +0200 Subject: [PATCH] Support scheduler CPU affinity. --- src/context/context.rs | 9 ++++++++- src/context/switch.rs | 3 ++- src/scheme/proc.rs | 24 +++++++++++++++++++++--- src/scheme/sys/context.rs | 11 +++++++++-- 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/context/context.rs b/src/context/context.rs index 566448c4..374edc23 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -117,6 +117,7 @@ pub struct ContextSnapshot { pub running: bool, pub cpu_id: Option<usize>, pub cpu_time: u128, + pub sched_affinity: Option<usize>, pub syscall: Option<(usize, usize, usize, usize, usize, usize)>, // Clone fields //TODO: is there a faster way than allocation? @@ -161,6 +162,7 @@ impl ContextSnapshot { running: context.running, cpu_id: context.cpu_id, cpu_time: context.cpu_time, + sched_affinity: context.sched_affinity, syscall: context.syscall, name, files, @@ -198,12 +200,16 @@ pub struct Context { pub status_reason: &'static str, /// Context running or not pub running: bool, - /// CPU ID, if locked + /// Current CPU ID pub cpu_id: Option<usize>, /// Time this context was switched to pub switch_time: u128, /// Amount of CPU time used pub cpu_time: u128, + /// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than + /// this value. + // TODO: bitmask (selection of multiple allowed CPUs)? + pub sched_affinity: Option<usize>, /// Current system call pub syscall: Option<(usize, usize, usize, usize, usize, usize)>, /// Head buffer to use when system call buffers are not page aligned @@ -352,6 +358,7 @@ impl Context { cpu_id: None, switch_time: 0, cpu_time: 0, + sched_affinity: None, syscall: None, syscall_head, syscall_tail, diff --git a/src/context/switch.rs b/src/context/switch.rs index 6ba12b69..22c2fd3e 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -17,7 +17,8 @@ use crate::time; unsafe fn update(context: &mut Context, cpu_id: usize) { // Take ownership if not already owned - if context.cpu_id == None { + // TODO: Support unclaiming context, while still respecting the CPU affinity. + if context.cpu_id == None && context.sched_affinity.map_or(true, |id| id == crate::cpu_id()) { context.cpu_id = Some(cpu_id); // println!("{}: take {} {}", cpu_id, context.id, *context.name.read()); } diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index d6f80a0a..b90b5d9e 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -141,6 +141,7 @@ enum Operation { // FD to access the file descriptor behind grants. GrantHandle { description: Arc<RwLock<FileDescription>> }, + SchedAffinity, Sigactions(Arc<RwLock<Vec<(SigAction, usize)>>>), CurrentSigactions, AwaitingSigactionsChange(Arc<RwLock<Vec<(SigAction, usize)>>>), @@ -307,6 +308,7 @@ impl ProcScheme { Some("sigactions") => Operation::Sigactions(Arc::clone(&get_context(pid)?.read().actions)), Some("current-sigactions") => Operation::CurrentSigactions, Some("mmap-min-addr") => Operation::MmapMinAddr(Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?)), + Some("sched-affinity") => Operation::SchedAffinity, _ => return Err(Error::new(EINVAL)) }; @@ -834,6 +836,12 @@ impl Scheme for ProcScheme { *buf.array_chunks_mut::<{mem::size_of::<usize>()}>().next().unwrap() = usize::to_ne_bytes(val); Ok(mem::size_of::<usize>()) } + Operation::SchedAffinity => { + // TODO: Deduplicate code + let val = context::contexts().get(info.pid).ok_or(Error::new(EBADFD))?.read().sched_affinity.map_or(usize::MAX, |a| a % crate::cpu_count()); + *buf.array_chunks_mut::<{mem::size_of::<usize>()}>().next().unwrap() = usize::to_ne_bytes(val); + Ok(mem::size_of::<usize>()) + } // TODO: Replace write() with SYS_DUP_FORWARD. // TODO: Find a better way to switch address spaces, since they also require switching // the instruction and stack pointer. Maybe remove `<pid>/regs` altogether and replace it @@ -1081,6 +1089,13 @@ impl Scheme for ProcScheme { addrspace.write().mmap_min = val; Ok(mem::size_of::<usize>()) } + // TODO: Deduplicate code. + Operation::SchedAffinity => { + let val = usize::from_ne_bytes(<[u8; mem::size_of::<usize>()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?); + context::contexts().get(info.pid).ok_or(Error::new(EBADFD))?.write().sched_affinity = if val == usize::MAX { None } else { Some(val % crate::cpu_count()) }; + Ok(mem::size_of::<usize>()) + } + _ => Err(Error::new(EBADF)), } } @@ -1131,6 +1146,7 @@ impl Scheme for ProcScheme { Operation::CurrentSigactions => "current-sigactions", Operation::OpenViaDup => "open-via-dup", Operation::MmapMinAddr(_) => "mmap-min-addr", + Operation::SchedAffinity => "sched-affinity", _ => return Err(Error::new(EOPNOTSUPP)), }); @@ -1359,6 +1375,9 @@ fn inherit_context() -> Result<ContextId> { let mut new_context = new_context_lock.write(); new_context.status = Status::Stopped(SIGSTOP); + + // TODO: Move all of these IDs into somewhere in userspace. Processes as an abstraction + // needs not be in the kernel; contexts are sufficient. new_context.euid = current_context.euid; new_context.egid = current_context.egid; new_context.ruid = current_context.ruid; @@ -1368,10 +1387,9 @@ fn inherit_context() -> Result<ContextId> { new_context.ppid = current_context.id; new_context.pgid = current_context.pgid; new_context.umask = current_context.umask; - new_context.sigmask = current_context.sigmask; - new_context.cpu_id = current_context.cpu_id; - // TODO: More to copy? + // TODO: Force userspace to copy sigmask. Start with "all signals blocked". + new_context.sigmask = current_context.sigmask; new_context.id }; diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs index 0f27aad5..dacc831b 100644 --- a/src/scheme/sys/context.rs +++ b/src/scheme/sys/context.rs @@ -5,7 +5,7 @@ use crate::context; use crate::syscall::error::Result; pub fn resource() -> Result<Vec<u8>> { - let mut string = format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n", + let mut string = format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n", "PID", "PGID", "PPID", @@ -17,6 +17,7 @@ pub fn resource() -> Result<Vec<u8>> { "ENS", "STAT", "CPU", + "AFF", "TIME", "MEM", "NAME"); @@ -62,6 +63,11 @@ pub fn resource() -> Result<Vec<u8>> { } else { format!("?") }; + let affinity = if let Some(aff) = context.sched_affinity { + format!("{}", aff) + } else { + format!("?") + }; let cpu_time_s = context.cpu_time / crate::time::NANOS_PER_SEC; let cpu_time_ns = context.cpu_time % crate::time::NANOS_PER_SEC; @@ -95,7 +101,7 @@ pub fn resource() -> Result<Vec<u8>> { format!("{} B", memory) }; - string.push_str(&format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n", + string.push_str(&format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n", context.id.into(), context.pgid.into(), context.ppid.into(), @@ -107,6 +113,7 @@ pub fn resource() -> Result<Vec<u8>> { context.ens.into(), stat_string, cpu_string, + affinity, cpu_time_string, memory_string, *context.name.read())); -- GitLab