From c78b69969f87ce9be701f892e7161408af99b919 Mon Sep 17 00:00:00 2001
From: Jeremy Soller <jeremy@system76.com>
Date: Wed, 15 Jul 2020 21:46:15 -0600
Subject: [PATCH] Include trampoline in kernel to fix multi_core on EFI

---
 build.rs                      |  22 +++++
 src/acpi/madt.rs              |  19 +++-
 src/acpi/mod.rs               |   3 -
 src/asm/x86_64/trampoline.asm | 174 ++++++++++++++++++++++++++++++++++
 4 files changed, 210 insertions(+), 8 deletions(-)
 create mode 100644 src/asm/x86_64/trampoline.asm

diff --git a/build.rs b/build.rs
index 53c7b15..a0f1d4d 100644
--- a/build.rs
+++ b/build.rs
@@ -91,6 +91,26 @@ fn fill_from_location(f: &mut fs::File, loc: &Path) -> Result<(), Error> {
     Ok(())
 }
 
+#[cfg(not(target_arch = "x86_64"))]
+fn asm(_out_dir: &str) {}
+
+#[cfg(target_arch = "x86_64")]
+fn asm(out_dir: &str) {
+    use std::process::Command;
+
+    println!("cargo:rerun-if-changed=src/asm/x86_64/trampoline.asm");
+
+    let status = Command::new("nasm")
+        .arg("-f").arg("bin")
+        .arg("-o").arg(format!("{}/trampoline", out_dir))
+        .arg("src/asm/x86_64/trampoline.asm")
+        .status()
+        .expect("failed to run nasm");
+    if ! status.success() {
+        panic!("nasm failed with exit status {}", status);
+    }
+}
+
 fn main() {
     println!("cargo:rustc-env=TARGET={}", env::var("TARGET").unwrap());
     println!("cargo:rerun-if-env-changed=INITFS_FOLDER");
@@ -100,6 +120,8 @@ fn main() {
     let mut f = fs::File::create(&dest_path).unwrap();
     let src = env::var("INITFS_FOLDER");
 
+    asm(&out_dir);
+
     // Write header
     f.write_all(
         b"
diff --git a/src/acpi/madt.rs b/src/acpi/madt.rs
index 61396b7..cae2189 100644
--- a/src/acpi/madt.rs
+++ b/src/acpi/madt.rs
@@ -5,7 +5,7 @@ use crate::paging::{ActivePageTable, Page, PhysicalAddress, VirtualAddress};
 use crate::paging::entry::EntryFlags;
 
 use super::sdt::Sdt;
-use super::{AP_STARTUP, TRAMPOLINE, find_sdt, load_table, get_sdt_signature};
+use super::{find_sdt, load_table, get_sdt_signature};
 
 use core::intrinsics::{atomic_load, atomic_store};
 use core::sync::atomic::Ordering;
@@ -22,6 +22,9 @@ pub struct Madt {
     pub flags: u32
 }
 
+const TRAMPOLINE: usize = 0x8000;
+static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
+
 pub static mut MADT: Option<Madt> = None;
 pub const FLAG_PCAT: u32 = 1;
 
@@ -52,13 +55,19 @@ impl Madt {
             }
 
             if cfg!(feature = "multi_core") {
+                // Map trampoline
                 let trampoline_frame = Frame::containing_address(PhysicalAddress::new(TRAMPOLINE));
                 let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE));
-
-                // Map trampoline
                 let result = active_table.map_to(trampoline_page, trampoline_frame, EntryFlags::PRESENT | EntryFlags::WRITABLE);
                 result.flush(active_table);
 
+                // Write trampoline, make sure TRAMPOLINE page is free for use
+                for i in 0..TRAMPOLINE_DATA.len() {
+                    unsafe {
+                        atomic_store((TRAMPOLINE as *mut u8).add(i), TRAMPOLINE_DATA[i]);
+                    }
+                }
+
                 for madt_entry in madt.iter() {
                     println!("      {:?}", madt_entry);
                     match madt_entry {
@@ -73,7 +82,7 @@ impl Madt {
                                 let stack_start = allocate_frames(64).expect("no more frames in acpi stack_start").start_address().get() + crate::KERNEL_OFFSET;
                                 let stack_end = stack_start + 64 * 4096;
 
-                                let ap_ready = TRAMPOLINE as *mut u64;
+                                let ap_ready = (TRAMPOLINE + 8) as *mut u64;
                                 let ap_cpu_id = unsafe { ap_ready.offset(1) };
                                 let ap_page_table = unsafe { ap_ready.offset(2) };
                                 let ap_stack_start = unsafe { ap_ready.offset(3) };
@@ -106,7 +115,7 @@ impl Madt {
                                 // Send START IPI
                                 {
                                     //Start at 0x0800:0000 => 0x8000. Hopefully the bootloader code is still there
-                                    let ap_segment = (AP_STARTUP >> 12) & 0xFF;
+                                    let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
                                     let mut icr = 0x4600 | ap_segment as u64;
 
                                     if local_apic.x2 {
diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs
index 4764aa4..25e64eb 100644
--- a/src/acpi/mod.rs
+++ b/src/acpi/mod.rs
@@ -39,9 +39,6 @@ pub mod aml;
 mod rxsdt;
 mod rsdp;
 
-const TRAMPOLINE: usize = 0x7E00;
-const AP_STARTUP: usize = TRAMPOLINE + 512;
-
 pub fn get_sdt(sdt_address: usize, active_table: &mut ActivePageTable) -> &'static Sdt {
     {
         let page = Page::containing_address(VirtualAddress::new(sdt_address));
diff --git a/src/asm/x86_64/trampoline.asm b/src/asm/x86_64/trampoline.asm
new file mode 100644
index 0000000..39d89bc
--- /dev/null
+++ b/src/asm/x86_64/trampoline.asm
@@ -0,0 +1,174 @@
+; trampoline for bringing up APs
+; compiled with nasm by build.rs, and included in src/acpi/madt.rs
+
+ORG 0x8000
+SECTION .text
+USE16
+
+trampoline:
+    jmp short startup_ap
+    times 8 - ($ - trampoline) nop
+    .ready: dq 0
+    .cpu_id: dq 0
+    .page_table: dq 0
+    .stack_start: dq 0
+    .stack_end: dq 0
+    .code: dq 0
+
+startup_ap:
+    cli
+
+    xor ax, ax
+    mov ds, ax
+    mov es, ax
+    mov ss, ax
+
+    ; initialize stack to invalid value
+    mov sp, 0
+
+    ;cr3 holds pointer to PML4
+    mov edi, 0x70000
+    mov cr3, edi
+
+    ; Enable FPU
+    mov eax, cr0
+    and al, 11110011b ; Clear task switched (3) and emulation (2)
+    or al, 00100010b ; Set numeric error (5) monitor co-processor (1)
+    mov cr0, eax
+
+    ; 18: Enable OSXSAVE
+    ; 10: Unmasked SSE exceptions
+    ; 9: FXSAVE/FXRSTOR
+    ; 7: Page Global
+    ; 5: Page Address Extension
+    ; 4: Page Size Extension
+    mov eax, cr4
+    or eax, 1 << 18 | 1 << 10 | 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4
+    mov cr4, eax
+
+    ; initialize floating point registers
+    fninit
+
+    ; load protected mode GDT
+    lgdt [gdtr]
+
+    mov ecx, 0xC0000080               ; Read from the EFER MSR.
+    rdmsr
+    or eax, 1 << 11 | 1 << 8          ; Set the Long-Mode-Enable and NXE bit.
+    wrmsr
+
+    ;enabling paging and protection simultaneously
+    mov ebx, cr0
+    ; 31: Paging
+    ; 16: write protect kernel
+    ; 0: Protected Mode
+    or ebx, 1 << 31 | 1 << 16 | 1
+    mov cr0, ebx
+
+    ; far jump to enable Long Mode and load CS with 64 bit segment
+    jmp gdt.kernel_code:long_mode_ap
+
+USE64
+long_mode_ap:
+    mov rax, gdt.kernel_data
+    mov ds, rax
+    mov es, rax
+    mov fs, rax
+    mov gs, rax
+    mov ss, rax
+
+    mov rcx, [trampoline.stack_end]
+    lea rsp, [rcx - 256]
+
+    mov rdi, trampoline.cpu_id
+
+    mov rax, [trampoline.code]
+    mov qword [trampoline.ready], 1
+    jmp rax
+
+struc GDTEntry
+    .limitl resw 1
+    .basel resw 1
+    .basem resb 1
+    .attribute resb 1
+    .flags__limith resb 1
+    .baseh resb 1
+endstruc
+
+attrib:
+    .present              equ 1 << 7
+    .ring1                equ 1 << 5
+    .ring2                equ 1 << 6
+    .ring3                equ 1 << 5 | 1 << 6
+    .user                 equ 1 << 4
+;user
+    .code                 equ 1 << 3
+;   code
+    .conforming           equ 1 << 2
+    .readable             equ 1 << 1
+;   data
+    .expand_down          equ 1 << 2
+    .writable             equ 1 << 1
+    .accessed             equ 1 << 0
+;system
+;   legacy
+    .tssAvailabe16        equ 0x1
+    .ldt                  equ 0x2
+    .tssBusy16            equ 0x3
+    .call16               equ 0x4
+    .task                 equ 0x5
+    .interrupt16          equ 0x6
+    .trap16               equ 0x7
+    .tssAvailabe32        equ 0x9
+    .tssBusy32            equ 0xB
+    .call32               equ 0xC
+    .interrupt32          equ 0xE
+    .trap32               equ 0xF
+;   long mode
+    .ldt32                equ 0x2
+    .tssAvailabe64        equ 0x9
+    .tssBusy64            equ 0xB
+    .call64               equ 0xC
+    .interrupt64          equ 0xE
+    .trap64               equ 0xF
+
+flags:
+    .granularity equ 1 << 7
+    .available equ 1 << 4
+;user
+    .default_operand_size equ 1 << 6
+;   code
+    .long_mode equ 1 << 5
+;   data
+    .reserved equ 1 << 5
+
+gdtr:
+    dw gdt.end + 1  ; size
+    dq gdt          ; offset
+
+gdt:
+.null equ $ - gdt
+    dq 0
+
+.kernel_code equ $ - gdt
+istruc GDTEntry
+    at GDTEntry.limitl, dw 0
+    at GDTEntry.basel, dw 0
+    at GDTEntry.basem, db 0
+    at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code
+    at GDTEntry.flags__limith, db flags.long_mode
+    at GDTEntry.baseh, db 0
+iend
+
+.kernel_data equ $ - gdt
+istruc GDTEntry
+    at GDTEntry.limitl, dw 0
+    at GDTEntry.basel, dw 0
+    at GDTEntry.basem, db 0
+; AMD System Programming Manual states that the writeable bit is ignored in long mode, but ss can not be set to this descriptor without it
+    at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable
+    at GDTEntry.flags__limith, db 0
+    at GDTEntry.baseh, db 0
+iend
+
+.end equ $ - gdt
-- 
GitLab