xxhash-rust
xxhash-rust copied to clipboard
Performance comparing against the other twox implementation?
Hi, I have following test([1]) to benchmark the hash function. I am comparing SipHasher, twox_hash and xxh3_64 (from this git repo). I always get sub-par performance from xxh3_64 from this git repo. [0] shows the features enabled by default in cargo and the results of this test. What am I doing wrong?
[0]
For 127 datalen and 100000000 iterations:
sip_hasher 2822
xxhash3-rust oneshot 6400
xxhash3-rust streaming 9643
twoxhash oneshot 452
For 129 datalen and 100000000 iterations:
sip_hasher 2853
xxhash3-rust oneshot 7387
xxhash3-rust streaming 9769
twoxhash oneshot 452
For 255 datalen and 100000000 iterations:
sip_hasher 4899ms
xxhash3-rust oneshot 14050ms
xxhash3-rust streaming 12819ms
twoxhash oneshot 5757ms
For 511 datalen and 100000000 iterations:
sip_hasher 9376ms
xxhash3-rust oneshot 19228ms
xxhash3-rust streaming 18570ms
twoxhash oneshot 9335ms
======
% rustc --print=cfg
debug_assertions
panic="unwind"
target_abi=""
target_arch="x86_64"
target_endian="little"
target_env="gnu"
target_family="unix"
target_feature="fxsr"
target_feature="sse"
target_feature="sse2"
target_has_atomic="16"
target_has_atomic="32"
target_has_atomic="64"
target_has_atomic="8"
target_has_atomic="ptr"
target_os="linux"
target_pointer_width="64"
target_vendor="unknown"
unix
[1]
#[test]
fn test_hash_speeds() {
let repetitions = 100000000;
// < 128 bytes
let mut samples127: Vec<Vec<u8>> = Vec::new();
{ 1..11 }.for_each(|_| {
let mut temp: Vec<u8> = Vec::new();
{ 1..128 }.into_iter().for_each(|_| {
temp.push(rand::random());
});
samples127.push(temp);
});
test_hash_speed(samples127, repetitions);
// > 128 bytes
let mut samples129: Vec<Vec<u8>> = Vec::new();
{ 1..11 }.for_each(|_| {
let mut temp: Vec<u8> = Vec::new();
{ 1..130 }.into_iter().for_each(|_| {
temp.push(rand::random());
});
samples129.push(temp);
});
test_hash_speed(samples129, repetitions);
}
fn test_hash_speed(data: Vec<Vec<u8>>, total: usize) {
let seed = 12897;
let seed2 = 13297;
let mut xxh3_hasher: Xxh3 = Xxh3::with_seed(seed);
let mut sip_hasher = std::hash::SipHasher::new_with_keys(seed, seed2);
let data_len = data[0].len();
let mut new_data: Vec<&[u8]> = Vec::new();
data.iter().for_each(|x| {
new_data.push(x.as_slice());
});
let mut a: u64 = 0;
use std::time::SystemTime;
// siphasher
let mut now = SystemTime::now();
for _i in 1..total {
sip_hasher.write(new_data[_i % new_data.len()]);
a = a.wrapping_add(sip_hasher.finish());
}
let siphasher_then = now.elapsed().unwrap().as_millis();
// xxh3_64 oneshot
now = SystemTime::now();
for _i in 1..total {
a = a.wrapping_add(xxh3_64_with_seed(new_data[_i % new_data.len()], seed));
}
let xxh3_64_oneshot_then = now.elapsed().unwrap().as_millis();
// xxh3_64 streaming
now = SystemTime::now();
for _i in 1..total {
xxh3_hasher.write(new_data[_i % new_data.len()]);
a = a.wrapping_add(xxh3_hasher.finish());
}
let xxh3_64_streaming_then = now.elapsed().unwrap().as_millis();
// twox_hash oneshot
now = SystemTime::now();
for _i in 1..total {
a = a.wrapping_add(twox_hash::XxHash3_64::oneshot_with_seed(
seed,
new_data[_i % new_data.len()],
));
}
let twox_oneshot_then = now.elapsed().unwrap().as_millis();
println!("For {} datalen and {} iterations: \n sip_hasher {} \n xxhash3-rust oneshot {} \n xxhash3-rust streaming {} \n twoxhash oneshot {}", data_len, total, siphasher_then, xxh3_64_oneshot_then, xxh3_64_streaming_then, twox_oneshot_then);
}