diff --git a/src/externs.rs b/src/externs.rs
index d92f8c44880b54ab1baf5aa7d9d7930234beece9..9db9ea3a256cd79046a949d254a05e97e8b81648 100644
--- a/src/externs.rs
+++ b/src/externs.rs
@@ -1,10 +1,50 @@
 /// Memcpy
 ///
 /// Copy N bytes of memory from one location to another.
+///
+/// This faster implementation works by copying bytes not one-by-one, but in
+/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures).
+#[cfg(target_pointer_width = "64")]
 #[no_mangle]
 pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8,
                             n: usize) -> *mut u8 {
-    let mut i = 0;
+    let n_64: usize = n/8; // Number of 64-bit groups
+    let mut i: usize = 0;
+
+    // Copy 8 bytes at a time
+    while i < n_64 {
+        *((dest as usize + i*8) as *mut u64) =
+            *((src as usize + i*8) as *const u64);
+        i += 1;
+    }
+
+    let mut i: usize = i*8;
+
+    // Copy 1 byte at a time
+    while i < n {
+        *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8);
+        i += 1;
+    }
+
+    dest
+}
+
+// 32-bit version of the function above
+#[cfg(target_pointer_width = "32")]
+#[no_mangle]
+pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8,
+                            n: usize) -> *mut u8 {
+    let n_32: usize = n/8; // Number of 32-bit groups
+    let mut i: usize = 0;
+
+    // Copy 4 bytes at a time
+    while i < n_32 {
+        *((dest as usize + i*4) as *mut u32) =
+            *((src as usize + i*4) as *const u32);
+        i += 1;
+    }
+
+    let mut i: usize = i*4;
     while i < n {
         *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8);
         i += 1;
@@ -16,19 +56,97 @@ pub unsafe extern fn memcpy(dest: *mut u8, src: *const u8,
 /// Memmove
 ///
 /// Copy N bytes of memory from src to dest. The memory areas may overlap.
+///
+/// This faster implementation works by copying bytes not one-by-one, but in
+/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures).
+#[cfg(target_pointer_width = "64")]
 #[no_mangle]
 pub unsafe extern fn memmove(dest: *mut u8, src: *const u8,
                              n: usize) -> *mut u8 {
     if src < dest as *const u8 {
-        let mut i = n;
+        let n_64: usize = n/8; // Number of 64-bit groups
+        let mut i: usize = n_64;
+
+        // Copy 8 bytes at a time
         while i != 0 {
             i -= 1;
-            *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8);
+            *((dest as usize + i*8) as *mut u64) =
+                *((src as usize + i*8) as *const u64);
+        }
+
+        let mut i: usize = n;
+
+        // Copy 1 byte at a time
+        while i != n_64*8 {
+            i -= 1;
+            *((dest as usize + i) as *mut u8) =
+                *((src as usize + i) as *const u8);
         }
     } else {
-        let mut i = 0;
+        let n_64: usize = n/8; // Number of 64-bit groups
+        let mut i: usize = 0;
+
+        // Copy 8 bytes at a time
+        while i < n_64 {
+            *((dest as usize + i*8) as *mut u64) =
+                *((src as usize + i*8) as *const u64);
+            i += 1;
+        }
+
+        let mut i: usize = i*8;
+
+        // Copy 1 byte at a time
         while i < n {
-            *((dest as usize + i) as *mut u8) = *((src as usize + i) as *const u8);
+            *((dest as usize + i) as *mut u8) =
+                *((src as usize + i) as *const u8);
+            i += 1;
+        }
+    }
+
+    dest
+}
+
+// 32-bit version of the function above
+#[cfg(target_pointer_width = "32")]
+#[no_mangle]
+pub unsafe extern fn memmove(dest: *mut u8, src: *const u8,
+                             n: usize) -> *mut u8 {
+    if src < dest as *const u8 {
+        let n_32: usize = n/4; // Number of 32-bit groups
+        let mut i: usize = n_32;
+
+        // Copy 4 bytes at a time
+        while i != 0 {
+            i -= 1;
+            *((dest as usize + i*4) as *mut u32) =
+                *((src as usize + i*4) as *const u32);
+        }
+
+        let mut i: usize = n;
+
+        // Copy 1 byte at a time
+        while i != n_32*4 {
+            i -= 1;
+            *((dest as usize + i) as *mut u8) =
+                *((src as usize + i) as *const u8);
+        }
+    } else {
+        let n_32: usize = n/4; // Number of 32-bit groups
+        let mut i: usize = 0;
+
+        // Copy 4 bytes at a time
+        while i < n_32 {
+            *((dest as usize + i*4) as *mut u32) =
+                *((src as usize + i*4) as *const u32);
+            i += 1;
+        }
+
+        let mut i: usize = i*4;
+
+        // Copy 1 byte at a time
+        while i < n {
+            *((dest as usize + i) as *mut u8) =
+                *((src as usize + i) as *const u8);
             i += 1;
         }
     }
@@ -39,11 +157,57 @@ pub unsafe extern fn memmove(dest: *mut u8, src: *const u8,
 /// Memset
 ///
 /// Fill a block of memory with a specified value.
+///
+/// This faster implementation works by setting bytes not one-by-one, but in
+/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures).
+#[cfg(target_pointer_width = "64")]
+#[no_mangle]
+pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 {
+    let c = c as u64;
+    let c = (c << 56) | (c << 48) | (c << 40) | (c << 32)
+          | (c << 24) | (c << 16) | (c << 8)  | c;
+    let n_64: usize = n/8;
+    let mut i: usize = 0;
+
+    // Set 8 bytes at a time
+    while i < n_64 {
+        *((dest as usize + i*8) as *mut u64) = c;
+        i += 1;
+    }
+
+    let c = c as u8;
+    let mut i: usize = i*8;
+
+    // Set 1 byte at a time
+    while i < n {
+        *((dest as usize + i) as *mut u8) = c;
+        i += 1;
+    }
+
+    dest
+}
+
+// 32-bit version of the function above
+#[cfg(target_pointer_width = "32")]
 #[no_mangle]
 pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 {
-    let mut i = 0;
+    let c = c as u32;
+    let c = (c << 24) | (c << 16) | (c << 8)  | c;
+    let n_32: usize = n/4;
+    let mut i: usize = 0;
+
+    // Set 4 bytes at a time
+    while i < n_32 {
+        *((dest as usize + i*4) as *mut u32) = c;
+        i += 1;
+    }
+
+    let c = c as u8;
+    let mut i: usize = i*4;
+
+    // Set 1 byte at a time
     while i < n {
-        *((dest as usize + i) as *mut u8) = c as u8;
+        *((dest as usize + i) as *mut u8) = c;
         i += 1;
     }
 
@@ -53,15 +217,80 @@ pub unsafe extern fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8 {
 /// Memcmp
 ///
 /// Compare two blocks of memory.
+///
+/// This faster implementation works by comparing bytes not one-by-one, but in
+/// groups of 8 bytes (or 4 bytes in the case of 32-bit architectures).
+#[cfg(target_pointer_width = "64")]
 #[no_mangle]
 pub unsafe extern fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
-    let mut i = 0;
+    let n_64: usize = n/8;
+    let mut i: usize = 0;
+
+    while i < n_64 {
+        let a = *((s1 as usize + i*8) as *const u64);
+        let b = *((s2 as usize + i*8) as *const u64);
+        if a != b {
+            let mut i: usize = i*8;
+            let n: usize = i + 8;
+            // Find the one byte that is not equal
+            while i < n {
+                let a = *((s1 as usize + i) as *const u8);
+                let b = *((s2 as usize + i) as *const u8);
+                if a != b {
+                    return a as i32 - b as i32;
+                }
+                i += 1;
+            }
+        }
+        i += 1;
+    }
+
+    let mut i: usize = i*8;
+
+    while i < n {
+        let a = *((s1 as usize + i) as *const u8);
+        let b = *((s2 as usize + i) as *const u8);
+        if a != b {
+            return a as i32 - b as i32;
+        }
+        i += 1;
+    }
+
+    0
+}
+
+#[cfg(target_pointer_width = "32")]
+#[no_mangle]
+pub unsafe extern fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+    let n_32: usize = n/4;
+    let mut i: usize = 0;
+
+    while i < n_32 {
+        let a = *((s1 as usize + i*4) as *const u32);
+        let b = *((s2 as usize + i*4) as *const u32);
+        if a != b {
+            let mut i: usize = i*4;
+            let n: usize = i + 4;
+            // Find the one byte that is not equal
+            while i < n {
+                let a = *((s1 as usize + i) as *const u8);
+                let b = *((s2 as usize + i) as *const u8);
+                if a != b {
+                    return a as i32 - b as i32;
+                }
+                i += 1;
+            }
+        }
+        i += 1;
+    }
+
+    let mut i: usize = i*4;
 
     while i < n {
         let a = *((s1 as usize + i) as *const u8);
         let b = *((s2 as usize + i) as *const u8);
         if a != b {
-            return a as i32 - b as i32
+            return a as i32 - b as i32;
         }
         i += 1;
     }