diff --git a/linkers/x86_64.ld b/linkers/x86_64.ld
index 1096029b41356adc16fecff42d7924ba7ac9952d..e88c9d076e5a244a068f79d674fb4f4c6eaa7ea8 100644
--- a/linkers/x86_64.ld
+++ b/linkers/x86_64.ld
@@ -2,7 +2,6 @@ ENTRY(kstart)
 OUTPUT_FORMAT(elf64-x86-64)
 
 KERNEL_OFFSET = 0xffffff0000100000;
-/* KERNEL_OFFSET = 0x100000; */
 
 SECTIONS {
     . = KERNEL_OFFSET;
diff --git a/src/arch/x86_64/gdt.rs b/src/arch/x86_64/gdt.rs
index 96e1b99b50416c9fdb63011187eca188d0d6f6ab..ca846074fefc917d339e1a6845569b86b389eef2 100644
--- a/src/arch/x86_64/gdt.rs
+++ b/src/arch/x86_64/gdt.rs
@@ -88,6 +88,18 @@ pub static mut TSS: TaskStateSegment = TaskStateSegment {
     iomap_base: 0xFFFF
 };
 
+#[cfg(feature = "pti")]
+pub unsafe fn set_tss_stack(stack: usize) {
+    use arch::x86_64::pti::{PTI_CPU_STACK, PTI_CONTEXT_STACK};
+    TSS.rsp[0] = (PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len()) as u64;
+    PTI_CONTEXT_STACK = stack;
+}
+
+#[cfg(not(feature = "pti"))]
+pub unsafe fn set_tss_stack(stack: usize) {
+    TSS.rsp[0] = stack as u64;
+}
+
 /// Initialize GDT
 pub unsafe fn init(tcb_offset: usize, stack_offset: usize) {
     // Setup the initial GDT with TLS, so we can setup the TLS GDT (a little confusing)
@@ -124,7 +136,7 @@ pub unsafe fn init(tcb_offset: usize, stack_offset: usize) {
     GDT[GDT_TSS].set_limit(mem::size_of::<TaskStateSegment>() as u32);
 
     // Set the stack pointer when coming back from userspace
-    TSS.rsp[0] = stack_offset as u64;
+    set_tss_stack(stack_offset);
 
     // Load the new GDT, which is correctly located in thread local storage
     dtables::lgdt(&GDTR);
diff --git a/src/arch/x86_64/interrupt/syscall.rs b/src/arch/x86_64/interrupt/syscall.rs
index 89a867db27f423f10b954d0f7b9e53e87a974594..359b33b12843ff8e5cbbcc45d4b9fa7fd8da1855 100644
--- a/src/arch/x86_64/interrupt/syscall.rs
+++ b/src/arch/x86_64/interrupt/syscall.rs
@@ -4,64 +4,65 @@ use syscall;
 #[naked]
 pub unsafe extern fn syscall() {
     #[inline(never)]
-    unsafe fn inner(stack: &mut SyscallStack) {
-        let mut a;
+    unsafe fn inner(stack: &mut SyscallStack) -> usize {
         let rbp;
-        asm!("" : "={rax}"(a), "={rbp}"(rbp)
-                : : : "intel", "volatile");
+        asm!("" : "={rbp}"(rbp) : : : "intel", "volatile");
 
-        // Map kernel
-        pti::map();
+        println!("{:X}, {:X}", stack.rax, stack.rbx);
 
-        a = syscall::syscall(a, stack.rbx, stack.rcx, stack.rdx, stack.rsi, stack.rdi, rbp, stack);
-
-        // Unmap kernel
-        pti::unmap();
-
-        asm!("" : : "{rax}"(a) : : "intel", "volatile");
+        syscall::syscall(stack.rax, stack.rbx, stack.rcx, stack.rdx, stack.rsi, stack.rdi, rbp, stack)
     }
 
-    // Push scratch registers, minus rax for the return value
-    asm!("push rcx
-        push rdx
-        push rdi
-        push rsi
-        push r8
-        push r9
-        push r10
-        push r11
-        push rbx
-        push fs
-        mov r11, 0x18
-        mov fs, r11"
-        : : : : "intel", "volatile");
+    // Push scratch registers
+    asm!("push rax
+         push rbx
+         push rcx
+         push rdx
+         push rdi
+         push rsi
+         push r8
+         push r9
+         push r10
+         push r11
+         push fs
+         mov r11, 0x18
+         mov fs, r11"
+         : : : : "intel", "volatile");
 
     // Get reference to stack variables
     let rsp: usize;
     asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");
 
-    inner(&mut *(rsp as *mut SyscallStack));
+    // Map kernel
+    pti::map();
+
+    let a = inner(&mut *(rsp as *mut SyscallStack));
+
+    // Unmap kernel
+    pti::unmap();
+
+    asm!("" : : "{rax}"(a) : : "intel", "volatile");
 
     // Interrupt return
     asm!("pop fs
-        pop rbx
-        pop r11
-        pop r10
-        pop r9
-        pop r8
-        pop rsi
-        pop rdi
-        pop rdx
-        pop rcx
-        iretq"
-        : : : : "intel", "volatile");
+          pop r11
+          pop r10
+          pop r9
+          pop r8
+          pop rsi
+          pop rdi
+          pop rdx
+          pop rcx
+          pop rbx
+          add rsp, 8
+          iretq"
+          : : : : "intel", "volatile");
 }
 
 #[allow(dead_code)]
 #[repr(packed)]
 pub struct SyscallStack {
     pub fs: usize,
-    pub rbx: usize,
     pub r11: usize,
     pub r10: usize,
     pub r9: usize,
@@ -70,6 +71,8 @@ pub struct SyscallStack {
     pub rdi: usize,
     pub rdx: usize,
     pub rcx: usize,
+    pub rbx: usize,
+    pub rax: usize,
     pub rip: usize,
     pub cs: usize,
     pub rflags: usize,
diff --git a/src/arch/x86_64/macros.rs b/src/arch/x86_64/macros.rs
index c336dae76f4a7c4067d55cffbcabad60cde8b5d3..a7b5a3b019c30c063efaace5ff281346c8e70b43 100644
--- a/src/arch/x86_64/macros.rs
+++ b/src/arch/x86_64/macros.rs
@@ -166,22 +166,22 @@ macro_rules! interrupt {
         pub unsafe extern fn $name () {
             #[inline(never)]
             unsafe fn inner() {
-                // Map kernel
-                $crate::arch::x86_64::pti::map();
-
                 $func
-
-                // Unmap kernel
-                $crate::arch::x86_64::pti::unmap();
             }
 
             // Push scratch registers
             scratch_push!();
             fs_push!();
 
+            // Map kernel
+            $crate::arch::x86_64::pti::map();
+
             // Call inner rust function
             inner();
 
+            // Unmap kernel
+            $crate::arch::x86_64::pti::unmap();
+
             // Pop scratch registers and return
             fs_pop!();
             scratch_pop!();
@@ -213,13 +213,7 @@ macro_rules! interrupt_stack {
         pub unsafe extern fn $name () {
             #[inline(never)]
             unsafe fn inner($stack: &mut $crate::arch::x86_64::macros::InterruptStack) {
-                // Map kernel
-                $crate::arch::x86_64::pti::map();
-
                 $func
-
-                // Unmap kernel
-                $crate::arch::x86_64::pti::unmap();
             }
 
             // Push scratch registers
@@ -230,9 +224,15 @@ macro_rules! interrupt_stack {
             let rsp: usize;
             asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");
 
+            // Map kernel
+            $crate::arch::x86_64::pti::map();
+
             // Call inner rust function
             inner(&mut *(rsp as *mut $crate::arch::x86_64::macros::InterruptStack));
 
+            // Unmap kernel
+            $crate::arch::x86_64::pti::unmap();
+
             // Pop scratch registers and return
             fs_pop!();
             scratch_pop!();
@@ -266,13 +266,7 @@ macro_rules! interrupt_error {
         pub unsafe extern fn $name () {
             #[inline(never)]
             unsafe fn inner($stack: &$crate::arch::x86_64::macros::InterruptErrorStack) {
-                // Map kernel
-                $crate::arch::x86_64::pti::map();
-
                 $func
-
-                // Unmap kernel
-                $crate::arch::x86_64::pti::unmap();
             }
 
             // Push scratch registers
@@ -283,9 +277,15 @@ macro_rules! interrupt_error {
             let rsp: usize;
             asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");
 
+            // Map kernel
+            $crate::arch::x86_64::pti::map();
+
             // Call inner rust function
             inner(&*(rsp as *const $crate::arch::x86_64::macros::InterruptErrorStack));
 
+            // Unmap kernel
+            $crate::arch::x86_64::pti::unmap();
+
             // Pop scratch registers, error code, and return
             fs_pop!();
             scratch_pop!();
@@ -320,13 +320,7 @@ macro_rules! interrupt_stack_p {
         pub unsafe extern fn $name () {
             #[inline(never)]
             unsafe fn inner($stack: &mut $crate::arch::x86_64::macros::InterruptStackP) {
-                // Map kernel
-                $crate::arch::x86_64::pti::map();
-
                 $func
-
-                // Unmap kernel
-                $crate::arch::x86_64::pti::unmap();
             }
 
             // Push scratch registers
@@ -338,9 +332,15 @@ macro_rules! interrupt_stack_p {
             let rsp: usize;
             asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");
 
+            // Map kernel
+            $crate::arch::x86_64::pti::map();
+
             // Call inner rust function
             inner(&mut *(rsp as *mut $crate::arch::x86_64::macros::InterruptStackP));
 
+            // Unmap kernel
+            $crate::arch::x86_64::pti::unmap();
+
             // Pop scratch registers and return
             fs_pop!();
             preserved_pop!();
@@ -377,13 +377,7 @@ macro_rules! interrupt_error_p {
         pub unsafe extern fn $name () {
             #[inline(never)]
             unsafe fn inner($stack: &$crate::arch::x86_64::macros::InterruptErrorStackP) {
-                // Map kernel
-                $crate::arch::x86_64::pti::map();
-
                 $func
-
-                // Unmap kernel
-                $crate::arch::x86_64::pti::unmap();
             }
 
             // Push scratch registers
@@ -395,9 +389,15 @@ macro_rules! interrupt_error_p {
             let rsp: usize;
             asm!("" : "={rsp}"(rsp) : : : "intel", "volatile");
 
+            // Map kernel
+            $crate::arch::x86_64::pti::map();
+
             // Call inner rust function
             inner(&*(rsp as *const $crate::arch::x86_64::macros::InterruptErrorStackP));
 
+            // Unmap kernel
+            $crate::arch::x86_64::pti::unmap();
+
             // Pop scratch registers, error code, and return
             fs_pop!();
             preserved_pop!();
diff --git a/src/arch/x86_64/pti.rs b/src/arch/x86_64/pti.rs
index bee8b3d1ee5a715e4ebb2bbd85228b8cfd2c7511..9124c92f1b95089b7cf88294cd4d9b119ee43417 100644
--- a/src/arch/x86_64/pti.rs
+++ b/src/arch/x86_64/pti.rs
@@ -1,19 +1,76 @@
+use core::ptr;
+
+use memory::Frame;
+use paging::ActivePageTable;
+use paging::entry::EntryFlags;
+
+#[cfg(feature = "pti")]
+#[thread_local]
+pub static mut PTI_CPU_STACK: [u8; 256] = [0; 256];
+
+#[cfg(feature = "pti")]
+#[thread_local]
+pub static mut PTI_CONTEXT_STACK: usize = 0;
+
+#[cfg(feature = "pti")]
+#[inline(always)]
+unsafe fn switch_stack(old: usize, new: usize) {
+    let old_rsp: usize;
+    asm!("" : "={rsp}"(old_rsp) : : : "intel", "volatile");
+
+    let offset_rsp = old - old_rsp;
+
+    let new_rsp = new - offset_rsp;
+
+    ptr::copy_nonoverlapping(
+        old_rsp as *const u8,
+        new_rsp as *mut u8,
+        offset_rsp
+    );
+
+    asm!("" : : "{rsp}"(new_rsp) : : "intel", "volatile");
+}
+
 #[cfg(feature = "pti")]
 #[inline(always)]
 pub unsafe fn map() {
-    let _cr3: usize;
-    asm!("mov $0, cr3
-          mov cr3, $0"
-          : "=r"(_cr3) : : "memory" : "intel", "volatile");
+    // {
+    //     let mut active_table = unsafe { ActivePageTable::new() };
+    //
+    //     // Map kernel heap
+    //     let address = active_table.p4()[::KERNEL_HEAP_PML4].address();
+    //     let frame = Frame::containing_address(address);
+    //     let mut flags = active_table.p4()[::KERNEL_HEAP_PML4].flags();
+    //     flags.remove(EntryFlags::PRESENT);
+    //     active_table.p4_mut()[::KERNEL_HEAP_PML4].set(frame, flags);
+    //
+    //     // Reload page tables
+    //     active_table.flush_all();
+    // }
+
+    // Switch to per-context stack
+    switch_stack(PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len(), PTI_CONTEXT_STACK);
 }
 
 #[cfg(feature = "pti")]
 #[inline(always)]
 pub unsafe fn unmap() {
-    let _cr3: usize;
-    asm!("mov $0, cr3
-          mov cr3, $0"
-          : "=r"(_cr3) : : "memory" : "intel", "volatile");
+    // Switch to per-CPU stack
+    switch_stack(PTI_CONTEXT_STACK, PTI_CPU_STACK.as_ptr() as usize + PTI_CPU_STACK.len());
+
+    // {
+    //     let mut active_table = unsafe { ActivePageTable::new() };
+    //
+    //     // Unmap kernel heap
+    //     let address = active_table.p4()[::KERNEL_HEAP_PML4].address();
+    //     let frame = Frame::containing_address(address);
+    //     let mut flags = active_table.p4()[::KERNEL_HEAP_PML4].flags();
+    //     flags.insert(EntryFlags::PRESENT);
+    //     active_table.p4_mut()[::KERNEL_HEAP_PML4].set(frame, flags);
+    //
+    //     // Reload page tables
+    //     active_table.flush_all();
+    // }
 }
 
 #[cfg(not(feature = "pti"))]
diff --git a/src/arch/x86_64/start.rs b/src/arch/x86_64/start.rs
index ddc7b61de95b3445da574d0b2acf723afe62bc56..5a9f85b8e095c35913ad7832d64c8602f0f6bd40 100644
--- a/src/arch/x86_64/start.rs
+++ b/src/arch/x86_64/start.rs
@@ -190,30 +190,54 @@ pub unsafe extern fn kstart_ap(args_ptr: *const KernelArgsAp) -> ! {
     ::kmain_ap(cpu_id);
 }
 
+#[naked]
 pub unsafe fn usermode(ip: usize, sp: usize, arg: usize) -> ! {
+    asm!("push r10
+          push r11
+          push r12
+          push r13
+          push r14
+          push r15"
+          : // No output
+          :   "{r10}"(gdt::GDT_USER_DATA << 3 | 3), // Data segment
+              "{r11}"(sp), // Stack pointer
+              "{r12}"(1 << 9), // Flags - Set interrupt enable flag
+              "{r13}"(gdt::GDT_USER_CODE << 3 | 3), // Code segment
+              "{r14}"(ip), // IP
+              "{r15}"(arg) // Argument
+          : // No clobbers
+          : "intel", "volatile");
+
     // Unmap kernel
     pti::unmap();
 
     // Go to usermode
-    asm!("mov ds, r10d
-        mov es, r10d
-        mov fs, r11d
-        mov gs, r10d
-        push r10
-        push r12
-        push r13
-        push r14
-        push r15
-        iretq"
-        : // No output because it never returns
-        :   "{r10}"(gdt::GDT_USER_DATA << 3 | 3), // Data segment
-            "{r11}"(gdt::GDT_USER_TLS << 3 | 3), // TLS segment
-            "{r12}"(sp), // Stack pointer
-            "{r13}"(1 << 9), // Flags - Set interrupt enable flag
-            "{r14}"(gdt::GDT_USER_CODE << 3 | 3), // Code segment
-            "{r15}"(ip) // IP
-            "{rdi}"(arg) // Argument
-        : // No clobers because it never returns
-        : "intel", "volatile");
+    asm!("mov ds, r14d
+         mov es, r14d
+         mov fs, r15d
+         mov gs, r14d
+         xor rax, rax
+         xor rbx, rbx
+         xor rcx, rcx
+         xor rdx, rdx
+         xor rsi, rsi
+         xor rdi, rdi
+         xor rbp, rbp
+         xor r8, r8
+         xor r9, r9
+         xor r10, r10
+         xor r11, r11
+         xor r12, r12
+         xor r13, r13
+         xor r14, r14
+         xor r15, r15
+         fninit
+         pop rdi
+         iretq"
+         : // No output because it never returns
+         :   "{r14}"(gdt::GDT_USER_DATA << 3 | 3), // Data segment
+             "{r15}"(gdt::GDT_USER_TLS << 3 | 3) // TLS segment
+         : // No clobbers because it never returns
+         : "intel", "volatile");
     unreachable!();
 }
diff --git a/src/context/switch.rs b/src/context/switch.rs
index 3e0c947f7a15e7a1e5439fabb0ec35624ba89f1e..4c0e91109fb428ca0e10e7cd74803b0b51f913d3 100644
--- a/src/context/switch.rs
+++ b/src/context/switch.rs
@@ -118,7 +118,7 @@ pub unsafe fn switch() -> bool {
         (&mut *from_ptr).running = false;
         (&mut *to_ptr).running = true;
         if let Some(ref stack) = (*to_ptr).kstack {
-            gdt::TSS.rsp[0] = (stack.as_ptr() as usize + stack.len() - 256) as u64;
+            gdt::set_tss_stack(stack.as_ptr() as usize + stack.len());
         }
         CONTEXT_ID.store((&mut *to_ptr).id, Ordering::SeqCst);
     }