diff --git a/src/externs.rs b/src/externs.rs index d92f8c44880b54ab1baf5aa7d9d7930234beece9..9db9ea3a256cd79046a949d254a05e97e8b81648 100644 --- a/src/externs.rs +++ b/src/externs.rs @@ -1,10 +1,50 @@ /// Memcpy /// /// Copy N bytes of memory from one location to another. +/// +/// This faster implementation works by copying bytes not one-by-one, but in +/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). +#[cfg(target_pointer_width = "64")] #[no_mangle] pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { - let mut i = 0; + let n_64: usize = n/8; // Number of 64-bit groups + let mut i: usize = 0; + + // Copy 8 bytes at a time + while i < n_64 { + *((dest as usize + i*8) as *mut u64) = + *((src as usize + i*8) as *const u64); + i += 1; + } + + let mut i: usize = i*8; + + // Copy 1 byte at a time + while i < n { + *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8); + i += 1; + } + + dest +} + +// 32-bit version of the function above +#[cfg(target_pointer_width = "32")] +#[no_mangle] +pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8, + n: usize) -> *mut u8 { + let n_32: usize = n/8; // Number of 32-bit groups + let mut i: usize = 0; + + // Copy 4 bytes at a time + while i < n_32 { + *((dest as usize + i*4) as *mut u32) = + *((src as usize + i*4) as *const u32); + i += 1; + } + + let mut i: usize = i*4; while i < n { *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8); i += 1; @@ -16,19 +56,97 @@ pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8, /// Memmove /// /// Copy N bytes of memory from src to dest. The memory areas may overlap. +/// +/// This faster implementation works by copying bytes not one-by-one, but in +/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). +#[cfg(target_pointer_width = "64")] #[no_mangle] pub unsafe extern fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { if src < dest as *const u8 { - let mut i = n; + let n_64: usize = n/8; // Number of 64-bit groups + let mut i: usize = n_64; + + // Copy 8 bytes at a time while i != 0 { i -= 1; - *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8); + *((dest as usize + i*8) as *mut u64) = + *((src as usize + i*8) as *const u64); + } + + let mut i: usize = n; + + // Copy 1 byte at a time + while i != n_64*8 { + i -= 1; + *((dest as usize + i) as *mut u8) = + *((src as usize + i) as *const u8); } } else { - let mut i = 0; + let n_64: usize = n/8; // Number of 64-bit groups + let mut i: usize = 0; + + // Copy 8 bytes at a time + while i < n_64 { + *((dest as usize + i*8) as *mut u64) = + *((src as usize + i*8) as *const u64); + i += 1; + } + + let mut i: usize = i*8; + + // Copy 1 byte at a time while i < n { - *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8); + *((dest as usize + i) as *mut u8) = + *((src as usize + i) as *const u8); + i += 1; + } + } + + dest +} + +// 32-bit version of the function above +#[cfg(target_pointer_width = "32")] +#[no_mangle] +pub unsafe extern fn memmove(dest: *mut u8, src: *const u8, + n: usize) -> *mut u8 { + if src < dest as *const u8 { + let n_32: usize = n/4; // Number of 32-bit groups + let mut i: usize = n_32; + + // Copy 4 bytes at a time + while i != 0 { + i -= 1; + *((dest as usize + i*4) as *mut u32) = + *((src as usize + i*4) as *const u32); + } + + let mut i: usize = n; + + // Copy 1 byte at a time + while i != n_32*4 { + i -= 1; + *((dest as usize + i) as *mut u8) = + *((src as usize + i) as *const u8); + } + } else { + let n_32: usize = n/4; // Number of 32-bit groups + let mut i: usize = 0; + + // Copy 4 bytes at a time + while i < n_32 { + *((dest as usize + i*4) as *mut u32) = + *((src as usize + i*4) as *const u32); + i += 1; + } + + let mut i: usize = i*4; + + // Copy 1 byte at a time + while i < n { + *((dest as usize + i) as *mut u8) = + *((src as usize + i) as *const u8); i += 1; } } @@ -39,11 +157,57 @@ pub unsafe extern fn memmove(dest: *mut u8, src: *const u8, /// Memset /// /// Fill a block of memory with a specified value. +/// +/// This faster implementation works by setting bytes not one-by-one, but in +/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). +#[cfg(target_pointer_width = "64")] +#[no_mangle] +pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 { + let c = c as u64; + let c = (c << 56) | (c << 48) | (c << 40) | (c << 32) + | (c << 24) | (c << 16) | (c << 8) | c; + let n_64: usize = n/8; + let mut i: usize = 0; + + // Set 8 bytes at a time + while i < n_64 { + *((dest as usize + i*8) as *mut u64) = c; + i += 1; + } + + let c = c as u8; + let mut i: usize = i*8; + + // Set 1 byte at a time + while i < n { + *((dest as usize + i) as *mut u8) = c; + i += 1; + } + + dest +} + +// 32-bit version of the function above +#[cfg(target_pointer_width = "32")] #[no_mangle] pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 { - let mut i = 0; + let c = c as u32; + let c = (c << 24) | (c << 16) | (c << 8) | c; + let n_32: usize = n/4; + let mut i: usize = 0; + + // Set 4 bytes at a time + while i < n_32 { + *((dest as usize + i*4) as *mut u32) = c; + i += 1; + } + + let c = c as u8; + let mut i: usize = i*4; + + // Set 1 byte at a time while i < n { - *((dest as usize + i) as *mut u8) = c as u8; + *((dest as usize + i) as *mut u8) = c; i += 1; } @@ -53,15 +217,80 @@ pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 { /// Memcmp /// /// Compare two blocks of memory. +/// +/// This faster implementation works by comparing bytes not one-by-one, but in +/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures). +#[cfg(target_pointer_width = "64")] #[no_mangle] pub unsafe extern fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - let mut i = 0; + let n_64: usize = n/8; + let mut i: usize = 0; + + while i < n_64 { + let a = *((s1 as usize + i*8) as *const u64); + let b = *((s2 as usize + i*8) as *const u64); + if a != b { + let mut i: usize = i*8; + let n: usize = i + 8; + // Find the one byte that is not equal + while i < n { + let a = *((s1 as usize + i) as *const u8); + let b = *((s2 as usize + i) as *const u8); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + } + i += 1; + } + + let mut i: usize = i*8; + + while i < n { + let a = *((s1 as usize + i) as *const u8); + let b = *((s2 as usize + i) as *const u8); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + + 0 +} + +#[cfg(target_pointer_width = "32")] +#[no_mangle] +pub unsafe extern fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { + let n_32: usize = n/4; + let mut i: usize = 0; + + while i < n_32 { + let a = *((s1 as usize + i*4) as *const u32); + let b = *((s2 as usize + i*4) as *const u32); + if a != b { + let mut i: usize = i*4; + let n: usize = i + 4; + // Find the one byte that is not equal + while i < n { + let a = *((s1 as usize + i) as *const u8); + let b = *((s2 as usize + i) as *const u8); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + } + i += 1; + } + + let mut i: usize = i*4; while i < n { let a = *((s1 as usize + i) as *const u8); let b = *((s2 as usize + i) as *const u8); if a != b { - return a as i32 - b as i32 + return a as i32 - b as i32; } i += 1; }