Commit ecee17f1 authored by Tom Almeida's avatar Tom Almeida

Merge branch 'fix_chunking' into 'master'

Fix chunking

Closes #5

See merge request !3
parents e018b387 d9305643
Pipeline #7848 passed with stage
in 57 seconds
......@@ -8,3 +8,6 @@ documentation = "https://docs.rs/seahash"
license = "MIT"
keywords = ["hash", "hashing", "checksum", "checsumming", "portable"]
exclude = ["target", "Cargo.lock"]
[dev-dependencies]
quickcheck = "0.9.2"
......@@ -67,7 +67,7 @@ impl SeaHasher {
self.ntail = 0;
self.tail = 0;
// We've done the existing tail, now just do the rest
// We've done the existing tail, now just do the rest in chunks of 4 x u64.
ptr = ptr.offset(copied as isize);
let end_ptr = ptr.offset((bytes.len()-copied) as isize & !0x1F);
while end_ptr > ptr {
......@@ -77,22 +77,29 @@ impl SeaHasher {
self.state.3 = helper::diffuse(self.state.3 ^ helper::read_u64(ptr.offset(24)));
ptr = ptr.offset(32);
self.written += 32;
}
let mut excessive = bytes.len() + bytes.as_ptr() as usize - ptr as usize;
match excessive {
0 => {},
0 => {
// input was a multiple of 4 x u64 bytes long; no new tail bytes.
},
1..=7 => {
self.tail = helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive));
self.ntail = excessive;
// self.written does not need to be updated as we only gathered self.tail
// bytes after larger chunks.
},
8 => {
self.push(helper::read_u64(ptr));
// self.written is updated by self.push
},
9..=15 => {
self.push(helper::read_u64(ptr));
excessive -= 8;
self.tail = helper::read_int(slice::from_raw_parts(ptr, excessive));
self.tail = helper::read_int(slice::from_raw_parts(ptr.offset(8), excessive));
self.ntail = excessive;
// self.written is updated by self.push
},
16 => {
let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
......
extern crate seahash;
use seahash::SeaHasher as H;
use std::hash::Hasher;
#[test]
fn hash_chunking_vs_not() {
// originally from https://gitlab.redox-os.org/redox-os/seahash/issues/5
let c1: &[u8] = b"This hashing algorithm was extracted from the Rustc compiler.";
let c2: &[u8] = b" This is the same hashing algoirthm used for some internal operations in FireFox.";
let c3: &[u8] = b" The strength of this algorithm is in hashing 8 bytes at a time on 64-bit platforms, where the FNV algorithm works on one byte at a time.";
let mut h1 = H::default();
h1.write(c1);
h1.write(c2);
h1.write(c3);
let hash1 = h1.finish();
let mut c4 = Vec::<u8>::new();
c4.extend_from_slice(c1);
c4.extend_from_slice(c2);
c4.extend_from_slice(c3);
let mut h2 = H::default();
h2.write(&c4);
let hash2 = h2.finish();
let reference = seahash::reference::hash(&c4);
let buffer = seahash::hash(&c4);
println!("hash1: {:016x}", hash1);
println!("hash2: {:016x}", hash2);
println!("ref : {:016x}", reference);
println!("buf : {:016x}", buffer);
assert_eq!(hash1, hash2);
assert_eq!(hash1, reference);
assert_eq!(hash1, buffer);
}
#[test]
fn test_different_chunk_sizes() {
let v = {
let c1: &[u8] = b"This hashing algorithm was extracted from the Rustc compiler.";
let c2: &[u8] = b" This is the same hashing algoirthm used for some internal operations in FireFox.";
let c3: &[u8] = b" The strength of this algorithm is in hashing 8 bytes at a time on 64-bit platforms, where the FNV algorithm works on one byte at a time.";
[c1, c2, c3].concat()
};
let mut h1 = H::default();
h1.write(&v);
let h1 = h1.finish();
for chunk_len in 1..v.len() {
let mut h2 = H::default();
for w in v.chunks(chunk_len) {
h2.write(w);
}
let h2 = h2.finish();
assert_eq!(h1, h2, "failed with chunk_len={}", chunk_len);
}
}
extern crate seahash;
#[macro_use]
extern crate quickcheck;
use quickcheck::TestResult;
use seahash::SeaHasher;
use seahash::hash;
use seahash::reference::hash as reference;
use std::hash::Hasher;
use std::num::{NonZeroUsize, NonZeroU8};
quickcheck! {
fn chunked_matches_buffered(xs: Vec<u8>, chunk_size: NonZeroUsize, times: NonZeroU8, additional: u8) -> TestResult {
let target_size = xs.len() * times.get() as usize + additional as usize;
if xs.is_empty() || target_size > 10_000_000 {
TestResult::discard()
} else {
let xs = xs.into_iter()
.cycle()
// the vecs produced by quickcheck are perhaps a bit small by default.
// additional should add some noise to avoid only getting nice even lengths.
.take(target_size)
.collect::<Vec<_>>();
// write all at once
let mut h0 = SeaHasher::default();
h0.write(&xs);
let h0 = h0.finish();
// write in chunks
let mut h1 = SeaHasher::default();
for chunk in xs.chunks(chunk_size.get()) {
h1.write(chunk);
}
let h1 = h1.finish();
// compare all, including to buffered and reference
let outcome = h0 == h1
&& h0 == hash(&xs)
&& h0 == reference(&xs);
TestResult::from_bool(outcome)
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment