From 7ac379a7a10e6a827b9ba41d6d9a478d6cd86fef Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Sun, 17 Mar 2024 17:09:41 +0100
Subject: [PATCH] Fix thread spawning on aarch64

* Correctly align the stack
* Fix argument order of the ldp instructions
* Remove unnecessary ldr x5 instruction
---
 src/platform/redox/clone.rs | 7 +++----
 src/pthread/mod.rs          | 7 +++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/platform/redox/clone.rs b/src/platform/redox/clone.rs
index cd3d4fe9..3d6ad8df 100644
--- a/src/platform/redox/clone.rs
+++ b/src/platform/redox/clone.rs
@@ -97,10 +97,9 @@ core::arch::global_asm!(
     .p2align 6
 __relibc_internal_rlct_clone_ret:
     # Load registers
-    ldp x0, x8, [sp], #16
-    ldp x2, x1, [sp], #16
-    ldp x4, x3, [sp], #16
-    ldr x5, [sp], #16
+    ldp x8, x0, [sp], #16
+    ldp x1, x2, [sp], #16
+    ldp x3, x4, [sp], #16
 
     # Call entry point
     blr x8
diff --git a/src/pthread/mod.rs b/src/pthread/mod.rs
index 405f473c..3a15a3c5 100644
--- a/src/pthread/mod.rs
+++ b/src/pthread/mod.rs
@@ -174,6 +174,13 @@ pub(crate) unsafe fn create(
             stack.write(value);
         };
 
+        if cfg!(target_arch = "aarch64") {
+            // Aarch64 requires the stack to be 16 byte aligned after
+            // the call instruction, unlike x86 which requires it to be
+            // aligned before the call instruction. As such push an
+            // extra word on the stack to align the stack to 16 bytes.
+            push(0);
+        }
         push(0);
         push(synchronization_mutex as usize);
         push(ptr as usize);
-- 
GitLab