xxhash-rust icon indicating copy to clipboard operation
xxhash-rust copied to clipboard

Performance comparing against the other twox implementation?

Open parthpatel opened this issue 1 year ago • 8 comments

Hi, I have following test([1]) to benchmark the hash function. I am comparing SipHasher, twox_hash and xxh3_64 (from this git repo). I always get sub-par performance from xxh3_64 from this git repo. [0] shows the features enabled by default in cargo and the results of this test. What am I doing wrong?

[0]

For 127 datalen and 100000000 iterations: 
 sip_hasher 2822 
 xxhash3-rust oneshot 6400 
 xxhash3-rust streaming 9643 
 twoxhash oneshot 452
For 129 datalen and 100000000 iterations: 
 sip_hasher 2853 
 xxhash3-rust oneshot 7387 
 xxhash3-rust streaming 9769 
 twoxhash oneshot 452
For 255 datalen and 100000000 iterations: 
 sip_hasher 4899ms 
 xxhash3-rust oneshot 14050ms 
 xxhash3-rust streaming 12819ms 
 twoxhash oneshot 5757ms 
For 511 datalen and 100000000 iterations: 
 sip_hasher 9376ms 
 xxhash3-rust oneshot 19228ms 
 xxhash3-rust streaming 18570ms 
 twoxhash oneshot 9335ms 

======
 % rustc --print=cfg                                                                             
debug_assertions
panic="unwind"
target_abi=""
target_arch="x86_64"
target_endian="little"
target_env="gnu"
target_family="unix"
target_feature="fxsr"
target_feature="sse"
target_feature="sse2"
target_has_atomic="16"
target_has_atomic="32"
target_has_atomic="64"
target_has_atomic="8"
target_has_atomic="ptr"
target_os="linux"
target_pointer_width="64"
target_vendor="unknown"
unix

[1]


    #[test]
    fn test_hash_speeds() {
        let repetitions = 100000000;
        // < 128 bytes
        let mut samples127: Vec<Vec<u8>> = Vec::new();
        { 1..11 }.for_each(|_| {
            let mut temp: Vec<u8> = Vec::new();
            { 1..128 }.into_iter().for_each(|_| {
                temp.push(rand::random());
            });
            samples127.push(temp);
        });

        test_hash_speed(samples127, repetitions);

        // > 128 bytes
        let mut samples129: Vec<Vec<u8>> = Vec::new();
        { 1..11 }.for_each(|_| {
            let mut temp: Vec<u8> = Vec::new();
            { 1..130 }.into_iter().for_each(|_| {
                temp.push(rand::random());
            });
            samples129.push(temp);
        });

        test_hash_speed(samples129, repetitions);
    }

    fn test_hash_speed(data: Vec<Vec<u8>>, total: usize) {
        let seed = 12897;
        let seed2 = 13297;
        let mut xxh3_hasher: Xxh3 = Xxh3::with_seed(seed);
        let mut sip_hasher = std::hash::SipHasher::new_with_keys(seed, seed2);

        let data_len = data[0].len();
        let mut new_data: Vec<&[u8]> = Vec::new();
        data.iter().for_each(|x| {
            new_data.push(x.as_slice());
        });
        
        let mut a: u64 = 0;

        use std::time::SystemTime;
        // siphasher
        let mut now = SystemTime::now();
        for _i in 1..total {
            sip_hasher.write(new_data[_i % new_data.len()]);
            a = a.wrapping_add(sip_hasher.finish());
        }
        let siphasher_then = now.elapsed().unwrap().as_millis();

        // xxh3_64 oneshot
        now = SystemTime::now();

        for _i in 1..total {
            a = a.wrapping_add(xxh3_64_with_seed(new_data[_i % new_data.len()], seed));
        }
        let xxh3_64_oneshot_then = now.elapsed().unwrap().as_millis();

        // xxh3_64 streaming
        now = SystemTime::now();
        for _i in 1..total {
            xxh3_hasher.write(new_data[_i % new_data.len()]);
            a = a.wrapping_add(xxh3_hasher.finish());
        }
        let xxh3_64_streaming_then = now.elapsed().unwrap().as_millis();

        // twox_hash oneshot
        now = SystemTime::now();
        for _i in 1..total {
            a = a.wrapping_add(twox_hash::XxHash3_64::oneshot_with_seed(
                seed,
                new_data[_i % new_data.len()],
            ));
        }
        let twox_oneshot_then = now.elapsed().unwrap().as_millis();

        println!("For {} datalen and {} iterations: \n sip_hasher {} \n xxhash3-rust oneshot {} \n xxhash3-rust streaming {} \n twoxhash oneshot {}", data_len, total, siphasher_then, xxh3_64_oneshot_then, xxh3_64_streaming_then, twox_oneshot_then);
    }

parthpatel avatar Dec 18 '24 18:12 parthpatel