diff --git a/src/platform/redox/clone.rs b/src/platform/redox/clone.rs
index cd3d4fe9d7fb18c6cf77304e23a660e619f5121f..3d6ad8dfbe159a8de2c795b812f5022c7261b749 100644
--- a/src/platform/redox/clone.rs
+++ b/src/platform/redox/clone.rs
@@ -97,10 +97,9 @@ core::arch::global_asm!(
     .p2align 6
 __relibc_internal_rlct_clone_ret:
     # Load registers
-    ldp x0, x8, [sp], #16
-    ldp x2, x1, [sp], #16
-    ldp x4, x3, [sp], #16
-    ldr x5, [sp], #16
+    ldp x8, x0, [sp], #16
+    ldp x1, x2, [sp], #16
+    ldp x3, x4, [sp], #16
 
     # Call entry point
     blr x8
diff --git a/src/pthread/mod.rs b/src/pthread/mod.rs
index 405f473cfd52d9154d74e7eebc9def672221eb89..3a15a3c579f1c67c5f5cec3338d2c1cd5ab521fc 100644
--- a/src/pthread/mod.rs
+++ b/src/pthread/mod.rs
@@ -174,6 +174,13 @@ pub(crate) unsafe fn create(
             stack.write(value);
         };
 
+        if cfg!(target_arch = "aarch64") {
+            // Aarch64 requires the stack to be 16 byte aligned after
+            // the call instruction, unlike x86 which requires it to be
+            // aligned before the call instruction. As such push an
+            // extra word on the stack to align the stack to 16 bytes.
+            push(0);
+        }
         push(0);
         push(synchronization_mutex as usize);
         push(ptr as usize);