From 8d14d54fa98b6141c6520616f515844f00830503 Mon Sep 17 00:00:00 2001
From: 4lDO2 <4lDO2@protonmail.com>
Date: Fri, 26 Aug 2022 11:54:47 +0200
Subject: [PATCH] Support scheduler CPU affinity.

---
 src/context/context.rs    |  9 ++++++++-
 src/context/switch.rs     |  3 ++-
 src/scheme/proc.rs        | 24 +++++++++++++++++++++---
 src/scheme/sys/context.rs | 11 +++++++++--
 4 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/src/context/context.rs b/src/context/context.rs
index 566448c4..374edc23 100644
--- a/src/context/context.rs
+++ b/src/context/context.rs
@@ -117,6 +117,7 @@ pub struct ContextSnapshot {
     pub running: bool,
     pub cpu_id: Option<usize>,
     pub cpu_time: u128,
+    pub sched_affinity: Option<usize>,
     pub syscall: Option<(usize, usize, usize, usize, usize, usize)>,
     // Clone fields
     //TODO: is there a faster way than allocation?
@@ -161,6 +162,7 @@ impl ContextSnapshot {
             running: context.running,
             cpu_id: context.cpu_id,
             cpu_time: context.cpu_time,
+            sched_affinity: context.sched_affinity,
             syscall: context.syscall,
             name,
             files,
@@ -198,12 +200,16 @@ pub struct Context {
     pub status_reason: &'static str,
     /// Context running or not
     pub running: bool,
-    /// CPU ID, if locked
+    /// Current CPU ID
     pub cpu_id: Option<usize>,
     /// Time this context was switched to
     pub switch_time: u128,
     /// Amount of CPU time used
     pub cpu_time: u128,
+    /// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than
+    /// this value.
+    // TODO: bitmask (selection of multiple allowed CPUs)?
+    pub sched_affinity: Option<usize>,
     /// Current system call
     pub syscall: Option<(usize, usize, usize, usize, usize, usize)>,
     /// Head buffer to use when system call buffers are not page aligned
@@ -352,6 +358,7 @@ impl Context {
             cpu_id: None,
             switch_time: 0,
             cpu_time: 0,
+            sched_affinity: None,
             syscall: None,
             syscall_head,
             syscall_tail,
diff --git a/src/context/switch.rs b/src/context/switch.rs
index 6ba12b69..22c2fd3e 100644
--- a/src/context/switch.rs
+++ b/src/context/switch.rs
@@ -17,7 +17,8 @@ use crate::time;
 
 unsafe fn update(context: &mut Context, cpu_id: usize) {
     // Take ownership if not already owned
-    if context.cpu_id == None {
+    // TODO: Support unclaiming context, while still respecting the CPU affinity.
+    if context.cpu_id == None && context.sched_affinity.map_or(true, |id| id == crate::cpu_id()) {
         context.cpu_id = Some(cpu_id);
         // println!("{}: take {} {}", cpu_id, context.id, *context.name.read());
     }
diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs
index d6f80a0a..b90b5d9e 100644
--- a/src/scheme/proc.rs
+++ b/src/scheme/proc.rs
@@ -141,6 +141,7 @@ enum Operation {
     // FD to access the file descriptor behind grants.
     GrantHandle { description: Arc<RwLock<FileDescription>> },
 
+    SchedAffinity,
     Sigactions(Arc<RwLock<Vec<(SigAction, usize)>>>),
     CurrentSigactions,
     AwaitingSigactionsChange(Arc<RwLock<Vec<(SigAction, usize)>>>),
@@ -307,6 +308,7 @@ impl ProcScheme {
             Some("sigactions") => Operation::Sigactions(Arc::clone(&get_context(pid)?.read().actions)),
             Some("current-sigactions") => Operation::CurrentSigactions,
             Some("mmap-min-addr") => Operation::MmapMinAddr(Arc::clone(get_context(pid)?.read().addr_space().map_err(|_| Error::new(ENOENT))?)),
+            Some("sched-affinity") => Operation::SchedAffinity,
             _ => return Err(Error::new(EINVAL))
         };
 
@@ -834,6 +836,12 @@ impl Scheme for ProcScheme {
                 *buf.array_chunks_mut::<{mem::size_of::<usize>()}>().next().unwrap() = usize::to_ne_bytes(val);
                 Ok(mem::size_of::<usize>())
             }
+            Operation::SchedAffinity => {
+                // TODO: Deduplicate code
+                let val = context::contexts().get(info.pid).ok_or(Error::new(EBADFD))?.read().sched_affinity.map_or(usize::MAX, |a| a % crate::cpu_count());
+                *buf.array_chunks_mut::<{mem::size_of::<usize>()}>().next().unwrap() = usize::to_ne_bytes(val);
+                Ok(mem::size_of::<usize>())
+            }
             // TODO: Replace write() with SYS_DUP_FORWARD.
             // TODO: Find a better way to switch address spaces, since they also require switching
             // the instruction and stack pointer. Maybe remove `<pid>/regs` altogether and replace it
@@ -1081,6 +1089,13 @@ impl Scheme for ProcScheme {
                 addrspace.write().mmap_min = val;
                 Ok(mem::size_of::<usize>())
             }
+            // TODO: Deduplicate code.
+            Operation::SchedAffinity => {
+                let val = usize::from_ne_bytes(<[u8; mem::size_of::<usize>()]>::try_from(buf).map_err(|_| Error::new(EINVAL))?);
+                context::contexts().get(info.pid).ok_or(Error::new(EBADFD))?.write().sched_affinity = if val == usize::MAX { None } else { Some(val % crate::cpu_count()) };
+                Ok(mem::size_of::<usize>())
+            }
+
             _ => Err(Error::new(EBADF)),
         }
     }
@@ -1131,6 +1146,7 @@ impl Scheme for ProcScheme {
             Operation::CurrentSigactions => "current-sigactions",
             Operation::OpenViaDup => "open-via-dup",
             Operation::MmapMinAddr(_) => "mmap-min-addr",
+            Operation::SchedAffinity => "sched-affinity",
 
             _ => return Err(Error::new(EOPNOTSUPP)),
         });
@@ -1359,6 +1375,9 @@ fn inherit_context() -> Result<ContextId> {
         let mut new_context = new_context_lock.write();
 
         new_context.status = Status::Stopped(SIGSTOP);
+
+        // TODO: Move all of these IDs into somewhere in userspace. Processes as an abstraction
+        // needs not be in the kernel; contexts are sufficient.
         new_context.euid = current_context.euid;
         new_context.egid = current_context.egid;
         new_context.ruid = current_context.ruid;
@@ -1368,10 +1387,9 @@ fn inherit_context() -> Result<ContextId> {
         new_context.ppid = current_context.id;
         new_context.pgid = current_context.pgid;
         new_context.umask = current_context.umask;
-        new_context.sigmask = current_context.sigmask;
-        new_context.cpu_id = current_context.cpu_id;
 
-        // TODO: More to copy?
+        // TODO: Force userspace to copy sigmask. Start with "all signals blocked".
+        new_context.sigmask = current_context.sigmask;
 
         new_context.id
     };
diff --git a/src/scheme/sys/context.rs b/src/scheme/sys/context.rs
index 0f27aad5..dacc831b 100644
--- a/src/scheme/sys/context.rs
+++ b/src/scheme/sys/context.rs
@@ -5,7 +5,7 @@ use crate::context;
 use crate::syscall::error::Result;
 
 pub fn resource() -> Result<Vec<u8>> {
-    let mut string = format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n",
+    let mut string = format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n",
                              "PID",
                              "PGID",
                              "PPID",
@@ -17,6 +17,7 @@ pub fn resource() -> Result<Vec<u8>> {
                              "ENS",
                              "STAT",
                              "CPU",
+                             "AFF",
                              "TIME",
                              "MEM",
                              "NAME");
@@ -62,6 +63,11 @@ pub fn resource() -> Result<Vec<u8>> {
             } else {
                 format!("?")
             };
+            let affinity = if let Some(aff) = context.sched_affinity {
+                format!("{}", aff)
+            } else {
+                format!("?")
+            };
 
             let cpu_time_s = context.cpu_time / crate::time::NANOS_PER_SEC;
             let cpu_time_ns = context.cpu_time % crate::time::NANOS_PER_SEC;
@@ -95,7 +101,7 @@ pub fn resource() -> Result<Vec<u8>> {
                 format!("{} B", memory)
             };
 
-            string.push_str(&format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n",
+            string.push_str(&format!("{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<6}{:<12}{:<8}{}\n",
                                context.id.into(),
                                context.pgid.into(),
                                context.ppid.into(),
@@ -107,6 +113,7 @@ pub fn resource() -> Result<Vec<u8>> {
                                context.ens.into(),
                                stat_string,
                                cpu_string,
+                               affinity,
                                cpu_time_string,
                                memory_string,
                                *context.name.read()));
-- 
GitLab