Commit b82dcef0 authored by Tom Almeida's avatar Tom Almeida
Browse files

Merge branch 'fix_chunking' into 'master'

stream: Fix chunking sensitivity.

Closes #1

See merge request !2
parents c1cf7cff 703e3f06
Pipeline #6924 passed with stage
in 18 seconds
......@@ -2,6 +2,9 @@
extern crate test;
extern crate seahash;
extern crate core;
use core::hash::Hasher;
#[bench]
fn gigabyte(b: &mut test::Bencher) {
......@@ -17,3 +20,17 @@ fn gigabyte(b: &mut test::Bencher) {
x
})
}
#[bench]
fn gigabyte_stream(b: &mut test::Bencher) {
b.iter(|| {
let mut buf = [15;4096];
let mut hasher = seahash::SeaHasher::default();
for _ in 0..250_000 {
Hasher::write(&mut hasher,&buf);
buf[0] += buf[0].wrapping_add(1);
}
hasher.finish()
})
}
......@@ -69,7 +69,7 @@ impl State {
// Handle the excessive bytes.
match excessive {
0 => {},
1...7 => {
1..=7 => {
// 1 or more excessive.
// Write the last excessive bytes (<8 bytes).
......@@ -87,7 +87,7 @@ impl State {
// Diffuse.
a = helper::diffuse(a);
},
9...15 => {
9..=15 => {
// More than 8 bytes excessive.
// Mix in the partial block.
......@@ -106,14 +106,10 @@ impl State {
// 16 bytes excessive.
// Mix in the partial block.
a ^= helper::read_u64(ptr);
b ^= helper::read_u64(ptr.offset(8));
// Diffuse.
a = helper::diffuse(a);
b = helper::diffuse(b);
a = helper::diffuse(a ^ helper::read_u64(ptr));
b = helper::diffuse(b ^ helper::read_u64(ptr.offset(8)));
},
17...23 => {
17..=23 => {
// 16 bytes or more excessive.
// Mix in the partial block.
......
......@@ -73,7 +73,7 @@ pub unsafe fn read_u64(ptr: *const u8) -> u64 {
///
/// This is a bijective function emitting chaotic behavior. Such functions are used as building
/// blocks for hash functions.
pub fn diffuse(mut x: u64) -> u64 {
pub const fn diffuse(mut x: u64) -> u64 {
// These are derived from the PCG RNG's round. Thanks to @Veedrac for proposing this. The basic
// idea is that we use dynamic shifts, which are determined by the input itself. The shift is
// chosen by the higher bits, which means that changing those flips the lower bits, which
......@@ -89,7 +89,7 @@ pub fn diffuse(mut x: u64) -> u64 {
}
/// Reverse the `diffuse` function.
pub fn undiffuse(mut x: u64) -> u64 {
pub const fn undiffuse(mut x: u64) -> u64 {
// 0x2f72b4215a3d8caf is the modular multiplicative inverse of the constant used in `diffuse`.
x = x.wrapping_mul(0x2f72b4215a3d8caf);
......
use core::hash::Hasher;
use core::slice;
use {hash_seeded, helper};
use helper;
/// The streaming version of the algorithm.
#[derive(Clone, Copy)]
pub struct SeaHasher {
/// The state of the hasher.
state: u64,
/// The first key.
k1: u64,
/// The second key.
k2: u64,
/// The third key.
k3: u64,
/// The fourth key.
k4: u64,
state: (u64, u64, u64, u64),
/// The number of bytes we have written in total
written: u64,
/// Our tail
tail: u64,
/// The number of bytes in the tail
ntail: usize,
}
impl Default for SeaHasher {
fn default() -> SeaHasher {
SeaHasher::with_seeds(0xe7b0c93ca8525013, 0x011d02b854ae8182, 0x7bcc5cf9c39cec76, 0xfa336285d102d083)
SeaHasher::with_seeds(0x16f11fe89b0d677c, 0xb480a793d8e6c86c, 0x6fe2e5aaf078ebc9, 0x14f994a4c5259381)
}
}
......@@ -34,90 +33,226 @@ impl SeaHasher {
/// For maximum quality, these seeds should be chosen at random.
pub fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> SeaHasher {
SeaHasher {
state: k1 ^ k3,
k1: k1,
k2: k2,
k3: k3,
k4: k4,
state: (k1, k2, k3, k4),
written: 0,
tail: 0,
ntail: 0,
}
}
/// Write some integer in.
///
/// This applies XEX key whitening with the keys given as argument.
fn write(&mut self, n: u64, k1: u64, k2: u64) {
self.state ^= n ^ k1;
self.state = helper::diffuse(self.state) ^ k2;
#[inline(always)]
fn push(&mut self, x: u64) {
let a = helper::diffuse(self.state.0 ^ x);
self.state.0 = self.state.1;
self.state.1 = self.state.2;
self.state.2 = self.state.3;
self.state.3 = a;
self.written += 8;
}
#[inline(always)]
fn push_bytes(&mut self, bytes: &[u8]) {
// The start of the bytes that aren't in the tail
let copied = core::cmp::min(8 - self.ntail, bytes.len());
unsafe {
let mut this = self.tail.to_le_bytes();
let mut ptr = bytes.as_ptr();
ptr.copy_to_nonoverlapping(&mut this[self.ntail], copied);
// It will be at most 8
if copied + self.ntail != 8 {
self.ntail += copied;
self.tail = u64::from_le_bytes(this);
} else {
self.push(u64::from_le_bytes(this));
self.ntail = 0;
self.tail = 0;
// We've done the existing tail, now just do the rest
ptr = ptr.offset(copied as isize);
let end_ptr = ptr.offset((bytes.len()-copied) as isize & !0x1F);
while end_ptr > ptr {
self.state.0 = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
self.state.1 = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
self.state.2 = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16)));
self.state.3 = helper::diffuse(self.state.3 ^ helper::read_u64(ptr.offset(24)));
ptr = ptr.offset(32);
}
let mut excessive = bytes.len() + bytes.as_ptr() as usize - ptr as usize;
match excessive {
0 => {},
1..=7 => {
self.tail = helper::read_int(slice::from_raw_parts(ptr as *const u8, excessive));
self.ntail = excessive;
},
8 => {
self.push(helper::read_u64(ptr));
},
9..=15 => {
self.push(helper::read_u64(ptr));
excessive -= 8;
self.tail = helper::read_int(slice::from_raw_parts(ptr, excessive));
self.ntail = excessive;
},
16 => {
let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
// rotate
self.state.0 = self.state.2;
self.state.1 = self.state.3;
self.state.2 = a;
self.state.3 = b;
self.written += 16;
},
17..=23 => {
let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
// rotate
self.state.0 = self.state.2;
self.state.1 = self.state.3;
self.state.2 = a;
self.state.3 = b;
excessive -= 16;
self.tail = helper::read_int(slice::from_raw_parts(ptr.offset(16), excessive));
self.ntail = excessive;
self.written += 16;
},
24 => {
let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
let c = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16)));
self.state.0 = self.state.3;
self.state.1 = a;
self.state.2 = b;
self.state.3 = c;
self.written += 24;
},
_ => {
let a = helper::diffuse(self.state.0 ^ helper::read_u64(ptr));
let b = helper::diffuse(self.state.1 ^ helper::read_u64(ptr.offset(8)));
let c = helper::diffuse(self.state.2 ^ helper::read_u64(ptr.offset(16)));
self.state.0 = self.state.3;
self.state.1 = a;
self.state.2 = b;
self.state.3 = c;
excessive -= 24;
self.tail = helper::read_int(slice::from_raw_parts(ptr.offset(24), excessive));
self.ntail = excessive;
self.written += 24;
}
}
}
}
}
}
impl Hasher for SeaHasher {
fn finish(&self) -> u64 {
helper::diffuse(self.state ^ self.k3) ^ self.k4
let a = if self.ntail > 0 {
let tail = helper::read_int(&self.tail.to_le_bytes()[..self.ntail]);
helper::diffuse(self.state.0 ^ tail)
} else {
self.state.0
};
helper::diffuse(a ^ self.state.1 ^ self.state.2 ^ self.state.3 ^ self.written + self.ntail as u64)
}
fn write(&mut self, bytes: &[u8]) {
self.state ^= hash_seeded(bytes, self.k1, self.k2, self.k3, self.k4);
self.state = helper::diffuse(self.state);
self.push_bytes(bytes)
}
fn write_u64(&mut self, n: u64) {
let k1 = self.k1;
let k2 = self.k2;
self.write(n, k1, k2)
self.write(&n.to_le_bytes())
}
fn write_u8(&mut self, n: u8) {
let k1 = self.k1;
let k3 = self.k3;
self.write(n as u64, k1, k3)
self.write(&n.to_le_bytes())
}
fn write_u16(&mut self, n: u16) {
let k1 = self.k1;
let k2 = self.k2;
self.write(n as u64, k2, k1)
self.write(&n.to_le_bytes())
}
fn write_u32(&mut self, n: u32) {
let k2 = self.k2;
let k3 = self.k3;
self.write(n as u64, k2, k3)
self.write(&n.to_le_bytes())
}
fn write_usize(&mut self, n: usize) {
let k2 = self.k2;
let k3 = self.k3;
self.write(n as u64, k3, k2)
self.write(&n.to_le_bytes())
}
fn write_i64(&mut self, n: i64) {
let k1 = self.k1;
let k2 = self.k2;
self.write(n as u64, !k1, !k2)
self.write(&n.to_le_bytes())
}
fn write_i8(&mut self, n: i8) {
let k1 = self.k1;
let k3 = self.k3;
self.write(n as u64, !k1, !k3)
self.write(&n.to_le_bytes())
}
fn write_i16(&mut self, n: i16) {
let k1 = self.k1;
let k2 = self.k2;
self.write(n as u64, !k2, !k1)
self.write(&n.to_le_bytes())
}
fn write_i32(&mut self, n: i32) {
let k2 = self.k2;
let k3 = self.k3;
self.write(n as u64, !k2, !k3)
self.write(&n.to_le_bytes())
}
fn write_isize(&mut self, n: isize) {
let k2 = self.k2;
let k3 = self.k3;
self.write(n as u64, !k3, !k2)
self.write(&n.to_le_bytes())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hash_seeded;
use core::hash::Hasher;
#[test]
fn chunked_equiv() {
let test_buf: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00];
let mut stream_hasher1 = SeaHasher::default();
Hasher::write(&mut stream_hasher1, test_buf);
let mut stream_hasher2 = SeaHasher::default();
Hasher::write(&mut stream_hasher2, &test_buf[..8]);
Hasher::write(&mut stream_hasher2, &test_buf[8..]);
let mut stream_hasher3 = SeaHasher::default();
Hasher::write(&mut stream_hasher3, &test_buf[..3]);
Hasher::write(&mut stream_hasher3, &test_buf[3..]);
let mut stream_hasher4 = SeaHasher::default();
Hasher::write_u16(&mut stream_hasher4, 0xffff);
Hasher::write_u16(&mut stream_hasher4, 0xffff);
Hasher::write_u32(&mut stream_hasher4, 0xffffffff);
Hasher::write_u64(&mut stream_hasher4, 0);
assert_eq!(stream_hasher1.finish(), stream_hasher2.finish());
assert_eq!(stream_hasher1.finish(), stream_hasher3.finish());
assert_eq!(stream_hasher1.finish(), stream_hasher4.finish());
}
#[test]
fn match_optimized() {
let test_buf: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00];
let mut sea_hasher = SeaHasher::with_seeds(0xe7b0c93ca8525013, 0x011d02b854ae8182, 0x7bcc5cf9c39cec76, 0xfa336285d102d083);
sea_hasher.write(test_buf);
let stream_hash = sea_hasher.finish();
let buffer_hash = hash_seeded(test_buf, 0xe7b0c93ca8525013, 0x011d02b854ae8182, 0x7bcc5cf9c39cec76, 0xfa336285d102d083);
assert_eq!(buffer_hash, stream_hash)
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment