From 7f8dc2f251e1e39cca731e4ccb94b01b5e478fcb Mon Sep 17 00:00:00 2001
From: oddcoder <ahmedsoliman@oddcoder.com>
Date: Mon, 24 Feb 2020 03:01:44 +0200
Subject: [PATCH] Add support for invoking ld.so via execve() and friends

Introduction:

The original implementation of `relibc_ld_so_start` assumes that
ld.so will always be invoked manually as in "/lib/ld64.so ./a.out"
The problem is regarding this snippet.
    if sp.argc < 2 {
      eprintln!("ld.so [executable] [arguments...]");
      unistd::_exit(1);
      loop {}
    }

As such, In linux when user types "./a.out" he will recieve the message
    ld.so [executable] [arguments...]

This patch makes use of AUXV, specifically AT_ENTRY. When invoking ld.so
manually, AT_ENTRY happens to be the entry point of ld.so. But when
running `./a.out` directly, AT_ENTRY becomes the entry point of `a.out`
this patch compares AT_ENTRY to the entry point of ld.so, if they are
equal only then it will assume that argv[1] is the real program and
adjust the stack, otherwise it will proceed with the stack unadjusted.
---
 src/header/sys_auxv/mod.rs |  28 ++++-
 src/ld_so/src/lib.rs       |   7 +-
 src/ld_so/start.rs         | 208 +++++++++++++++++++++++--------------
 3 files changed, 164 insertions(+), 79 deletions(-)

diff --git a/src/header/sys_auxv/mod.rs b/src/header/sys_auxv/mod.rs
index 3e191883f..184eba1a7 100644
--- a/src/header/sys_auxv/mod.rs
+++ b/src/header/sys_auxv/mod.rs
@@ -2,7 +2,33 @@
 
 use crate::platform::types::*;
 
-pub const AT_HWCAP: usize = 16;
+pub const AT_NULL: usize = 0; /* End of vector */
+pub const AT_IGNORE: usize = 1; /* Entry should be ignored */
+pub const AT_EXECFD: usize = 2; /* File descriptor of program */
+pub const AT_PHDR: usize = 3; /* Program headers for program */
+pub const AT_PHENT: usize = 4; /* Size of program header entry */
+pub const AT_PHNUM: usize = 5; /* Number of program headers */
+pub const AT_PAGESZ: usize = 6; /* System page size */
+pub const AT_BASE: usize = 7; /* Base address of interpreter */
+pub const AT_FLAGS: usize = 8; /* Flags */
+pub const AT_ENTRY: usize = 9; /* Entry point of program */
+pub const AT_NOTELF: usize = 10; /* Program is not ELF */
+pub const AT_UID: usize = 11; /* Real uid */
+pub const AT_EUID: usize = 12; /* Effective uid */
+pub const AT_GID: usize = 13; /* Real gid */
+pub const AT_EGID: usize = 14; /* Effective gid */
+pub const AT_CLKTCK: usize = 17; /* Frequency of times() */
+pub const AT_PLATFORM: usize = 15; /* String identifying platform.  */
+pub const AT_HWCAP: usize = 16; /* Machine-dependent hints about */
+pub const AT_FPUCW: usize = 18; /* Used FPU control word.  */
+pub const AT_DCACHEBSIZE: usize = 19; /* Data cache block size.  */
+pub const AT_ICACHEBSIZE: usize = 20; /* Instruction cache block size.  */
+pub const AT_UCACHEBSIZE: usize = 21; /* Unified cache block size.  */
+pub const AT_IGNOREPPC: usize = 22; /* Entry should be ignored.  */
+pub const AT_BASE_PLATFORM: usize = 24; /* String identifying real platforms.*/
+pub const AT_RANDOM: usize = 25; /* Address of 16 random bytes.  */
+pub const AT_HWCAP2: usize = 26; /* More machine-dependent hints about*/
+pub const AT_EXECFN: usize = 31; /* Filename of executable.  */
 
 #[no_mangle]
 pub extern "C" fn getauxval(_t: c_ulong) -> c_ulong {
diff --git a/src/ld_so/src/lib.rs b/src/ld_so/src/lib.rs
index e64534ccb..f26bc0af6 100644
--- a/src/ld_so/src/lib.rs
+++ b/src/ld_so/src/lib.rs
@@ -8,12 +8,17 @@
 pub unsafe extern "C" fn _start() {
     #[cfg(target_arch = "x86_64")]
     asm!("
+        # rsi = _start + 5
+        call next
+next:   pop rsi
+
         # Save original stack and align stack to 16 bytes
         mov rbp, rsp
         and rsp, 0xFFFFFFFFFFFFFFF0
 
-        # Call ld_so_start(stack)
+        # Call ld_so_start(stack, entry)
         mov rdi, rbp
+        sub rsi, 5
         call relibc_ld_so_start
 
         # Restore original stack, clear registers, and jump to new start function
diff --git a/src/ld_so/start.rs b/src/ld_so/start.rs
index 56501aad6..ec888ce0b 100644
--- a/src/ld_so/start.rs
+++ b/src/ld_so/start.rs
@@ -1,103 +1,157 @@
 // Start code adapted from https://gitlab.redox-os.org/redox-os/relibc/blob/master/src/start.rs
 
-use alloc::boxed::Box;
-
-use crate::{c_str::CStr, header::unistd, platform::types::c_char, start::Stack, sync::mutex::Mutex};
-
-use super::linker::Linker;
-use super::tcb::Tcb;
-
-#[no_mangle]
-pub extern "C" fn relibc_ld_so_start(sp: &'static mut Stack) -> usize {
-    if sp.argc < 2 {
-        eprintln!("ld.so [executable] [arguments...]");
-        unistd::_exit(1);
-        loop {}
-    }
-
-    // Some variables that will be overridden by environment and auxiliary vectors
-    let mut library_path = "/lib";
-    //let mut page_size = 4096;
-
-    // Pop the first argument (path to ld_so), and get the path of the program
-    let path_c = unsafe {
-        let mut argv = sp.argv() as *mut usize;
-
-        // Move arguments
-        loop {
-            let next_argv = argv.add(1);
-            let arg = *next_argv;
-            *argv = arg;
-            argv = next_argv;
-            if arg == 0 {
-                break;
+use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeMap, string::String, vec::Vec};
+
+use crate::{
+    c_str::CStr, header::unistd, platform::types::c_char, start::Stack, sync::mutex::Mutex,
+};
+
+use super::{linker::Linker, tcb::Tcb};
+use crate::header::sys_auxv::AT_ENTRY;
+
+unsafe fn get_argv(mut ptr: *const usize) -> (Vec<String>, *const usize) {
+    //traverse the stack and collect argument vector
+    let mut argv = Vec::new();
+    while *ptr != 0 {
+        let arg = *ptr;
+        match CStr::from_ptr(arg as *const c_char).to_str() {
+            Ok(arg_str) => argv.push(arg_str.to_owned()),
+            _ => {
+                eprintln!("ld.so: failed to parse argv[{}]", argv.len());
+                unistd::_exit(1);
+                loop {}
             }
+        }
+        ptr = ptr.add(1);
+    }
+    return (argv, ptr);
+}
 
-            if let Ok(arg_str) = CStr::from_ptr(arg as *const c_char).to_str() {
-                println!("  arg: '{}'", arg_str);
+unsafe fn get_env(mut ptr: *const usize) -> (BTreeMap<String, String>, *const usize) {
+    //traverse the stack and collect argument environment variables
+    let mut envs = BTreeMap::new();
+    while *ptr != 0 {
+        let env = *ptr;
+        if let Ok(arg_str) = CStr::from_ptr(env as *const c_char).to_str() {
+            let mut parts = arg_str.splitn(2, '=');
+            if let Some(key) = parts.next() {
+                if let Some(value) = parts.next() {
+                    envs.insert(key.to_owned(), value.to_owned());
+                }
             }
         }
+        ptr = ptr.add(1);
+    }
+    return (envs, ptr);
+}
 
-        // Move environment
-        loop {
-            let next_argv = argv.add(1);
-            let arg = *next_argv;
-            *argv = arg;
-            argv = next_argv;
-            if arg == 0 {
-                break;
-            }
+unsafe fn get_auxv(mut ptr: *const usize) -> BTreeMap<usize, usize> {
+    //traverse the stack and collect argument environment variables
+    let mut auxv = BTreeMap::new();
+    while *ptr != 0 {
+        let kind = *ptr;
+        ptr = ptr.add(1);
+        let value = *ptr;
+        ptr = ptr.add(1);
+        auxv.insert(kind, value);
+    }
+    return auxv;
+}
 
-            if let Ok(arg_str) = CStr::from_ptr(arg as *const c_char).to_str() {
-                println!("  env: '{}'", arg_str);
+unsafe fn adjust_stack(sp: &'static mut Stack) {
+    let mut argv = sp.argv() as *mut usize;
+
+    // Move arguments
+    loop {
+        let next_argv = argv.add(1);
+        let arg = *next_argv;
+        *argv = arg;
+        argv = next_argv;
+        if arg == 0 {
+            break;
+        }
+    }
 
-                let mut parts = arg_str.splitn(2, '=');
-                if let Some(key) = parts.next() {
-                    if let Some(value) = parts.next() {
-                        if let "LD_LIBRARY_PATH" = key {
-                            library_path = value
-                        }
+    // Move environment
+    loop {
+        let next_argv = argv.add(1);
+        let arg = *next_argv;
+        *argv = arg;
+        argv = next_argv;
+        if arg == 0 {
+            break;
+        }
+        if let Ok(arg_str) = CStr::from_ptr(arg as *const c_char).to_str() {
+            let mut parts = arg_str.splitn(2, '=');
+            if let Some(key) = parts.next() {
+                if let Some(value) = parts.next() {
+                    if let "LD_LIBRARY_PATH" = key {
+                        //library_path = value
                     }
                 }
             }
         }
+    }
 
-        // Move auxiliary vectors
-        loop {
-            let next_argv = argv.add(1);
-            let kind = *next_argv;
-            *argv = kind;
-            argv = next_argv;
+    // Move auxiliary vectors
+    loop {
+        let next_argv = argv.add(1);
+        let kind = *next_argv;
+        *argv = kind;
+        argv = next_argv;
+        let next_argv = argv.add(1);
+        let value = *next_argv;
+        *argv = value;
+        argv = next_argv;
+        if kind == 0 {
+            break;
+        }
+    }
 
-            let next_argv = argv.add(1);
-            let value = *next_argv;
-            *argv = value;
-            argv = next_argv;
+    sp.argc -= 1;
+}
+#[no_mangle]
+pub extern "C" fn relibc_ld_so_start(sp: &'static mut Stack, ld_entry: usize) -> usize {
+    // first we get the arguments, the environment, and the auxilary vector
+    let (argv, envs, auxv) = unsafe {
+        let argv_start = sp.argv() as *mut usize;
+        let (argv, argv_end) = get_argv(argv_start);
+        let (envs, envs_end) = get_env(argv_end.add(1));
+        let auxv = get_auxv(envs_end.add(1));
+        (argv, envs, auxv)
+    };
 
-            if kind == 0 {
-                break;
-            }
+    let img_entry = *auxv.get(&AT_ENTRY).unwrap_or_else(|| {
+        eprintln!("failed to find AT_ENTRY");
+        unistd::_exit(1);
+        loop {}
+    });
 
-            println!("  aux: {}={:#x}", kind, value);
-            //match kind {
-            //    6 => page_size = value,
-            //    _ => (),
-            //}
-        }
+    // Some variables that will be overridden by environment and auxiliary vectors
+    let library_path = match envs.get("LD_LIBRARY_PATH") {
+        Some(lib_path) => lib_path,
+        None => "/lib",
+    };
 
-        sp.argc -= 1;
+    let path;
 
-        CStr::from_ptr(sp.argv0)
-    };
+    let is_manual = img_entry == ld_entry;
+    if is_manual {
+        // ld.so is run directly by user and not via execve() or similar systemcall
+        println!("argv: {:#?}", argv);
+        println!("envs: {:#?}", envs);
+        println!("auxv: {:#x?}", auxv);
 
-    let path = match path_c.to_str() {
-        Ok(ok) => ok,
-        Err(err) => {
-            eprintln!("ld.so: failed to parse path: {}", err);
+        if sp.argc < 2 {
+            eprintln!("ld.so [executable] [arguments...]");
             unistd::_exit(1);
             loop {}
         }
-    };
+        unsafe { adjust_stack(sp) };
+        path = &argv[1];
+    } else {
+        path = &argv[0];
+    }
 
     let mut linker = Linker::new(library_path);
     match linker.load(&path, &path) {
-- 
GitLab