PicoSHA2 icon indicating copy to clipboard operation
PicoSHA2 copied to clipboard

different hashes computed

Open skysley opened this issue 5 years ago • 4 comments

Running this program

#include "picosha2.h"

#include <fstream>
#include <iostream>
#include <cstring>


std::string hash1(std::string fileName)
{
    std::ifstream inputFile(fileName, std::ios::binary);
    std::vector<char> hashVec(picosha2::k_digest_size);
    std::string hexHash;    

    picosha2::hash256(inputFile, hashVec.begin(), hashVec.end());
    picosha2::hash256_hex_string(hashVec, hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash2(std::string fileName)
{
    std::ifstream inputFile(fileName, std::ios::binary);
    std::istreambuf_iterator<char> start(inputFile);
    std::istreambuf_iterator<char> end;
    std::vector<char> data(start, end);
    std::vector<char> hashVec(picosha2::k_digest_size);
    std::string hexHash;    
    
    picosha2::hash256(data.begin(), data.end(), hashVec);
    picosha2::hash256_hex_string(hashVec, hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash3(std::string fileName)
{
    picosha2::hash256_one_by_one hasher;
    std::ifstream inputFile(fileName, std::ios::binary);
    std::vector<char> dataToHash;
    std::istreambuf_iterator<char> it(inputFile);
    std::istreambuf_iterator<char> endIt;
    for (; it != endIt; ++it) 
        dataToHash.push_back(*it);

    
    hasher.process(dataToHash.begin(), 
                   dataToHash.end());
    hasher.finish();
    
    std::string hexHash;
    picosha2::get_hash_hex_string(hasher, hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash4(std::string fileName)
{
    picosha2::hash256_one_by_one hasher;
    std::ifstream inputFile(fileName, std::ios::binary);

    hasher.process(std::istreambuf_iterator<char>(inputFile),
                   std::istreambuf_iterator<char>());

    hasher.finish();
    
    std::string hexHash;
    picosha2::get_hash_hex_string(hasher, hexHash);
    inputFile.close();
    
    
    return hexHash;
}

std::string hash5()
{
    char text[] = "The quick brown fox jumps over the lazy dog";
    std::vector<char> hashVec(picosha2::k_digest_size);
    std::string hexHash;    

    picosha2::hash256(&text[0], &text[strlen(text)], hashVec);
    picosha2::hash256_hex_string(hashVec, hexHash);
    
    return hexHash;    
}

std::string hash6()
{
    // Example from README.md:
    std::string src_str = "The quick brown fox jumps over the lazy dog";
    std::string hash_hex_str;
    picosha2::hash256_hex_string(src_str, hash_hex_str);
    
    return hash_hex_str;
}

int main(int, char**)
{
    std::string filename("test.txt");
    
    std::cout << "hash1: " << hash1(filename) << std::endl;
    std::cout << "hash2: " << hash2(filename) << std::endl;
    std::cout << "hash3: " << hash3(filename) << std::endl;
    std::cout << "hash4: " << hash4(filename) << std::endl;
    std::cout << "hash5: " << hash5() << std::endl;
    std::cout << "hash6: " << hash6() << std::endl;

    return 0;
}

on a file that contains "The quick brown fox jumps over the lazy dog" (without newline!) results in a number of different Hashes:

g++ -std=c++11 test_hashes.cpp -o test_hashes && ./test_hashes

hash1: 6d37795021e544d82b41850edf7aabab9a0ebe274e54a519840c4666f35b3937
hash2: 6d37795021e544d82b41850edf7aabab9a0ebe274e54a519840c4666f35b3937
hash3: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
hash4: dc28302600b7b0f5f2c8f20e3e5ca99b705337d6712640d607eff1237d0b8c72
hash5: 6d37795021e544d82b41850edf7aabab9a0ebe274e54a519840c4666f35b3937
hash6: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592

skysley avatar Oct 26 '18 13:10 skysley

Hi @skysley. Sorry for my late reply.

hash1, hash2 and hash5 share a same issue. hash256_hex_string calculates a hash so picosha2::hash256_hex_string(hashVec, hexHash); means calculating a hash of hashVec. So the result seems wrong. You can use bytes_to_hex_string here.

hash4 is my failure. hash256_one_by_one::process can take only random access iterators. You know, istreambuf_itarator is not random access iterator. I had to write a documentation for that fact. I'm apologize.

hash3 and hash6 are correct.

okdshin avatar Nov 13 '18 13:11 okdshin

Thank you for the reply. I am sorry, I did not get what is wrong with my usage of hash256_hex_string. Could you please explain?

Nevertheless, I've already switched to Crypto++.

skysley avatar Nov 13 '18 13:11 skysley

hash256_hex_string() is functionally equal to hash256() + bytes_to_hex_string(). So calling hash256_hex_string() following hash256() means calculating hash twice.

You can modify like:

picosha2::hash256(inputFile, hashVec.begin(), hashVec.end());
//picosha2::hash256_hex_string(hashVec, hexHash);
picosha2::bytes_to_hex_string(hashVec.begin(), hashVec.end(), hexHash);

If you would like to inject a sha256 function simply to your software, you can use PicoSHA2 which has single header file. Crypto++ is a good library though.

okdshin avatar Nov 14 '18 02:11 okdshin

Okay, thanks for the clarifications. In addition to your comments, one also needs to use unsigned char instead of char for hash1, hash2, and hash5.

Additionally, if I change hash256_one_by_one::process like this, all hashes are computed correctly:

//add_to_data_length(static_cast<word_t>(std::distance(first, last)));
//std::copy(first, last, std::back_inserter(buffer_));
size_t old_size = buffer_.size();
std::copy(first, last, std::back_inserter(buffer_));
add_to_data_length(static_cast<word_t>(buffer_.size() - old_size));

However, for large files (iterators) it would be nice if not all the data is loaded into buffer_ at once.

Updated test program:

#include "picosha2.h"

#include <fstream>
#include <iostream>
#include <cstring>


std::string hash1(std::string fileName)
{
    std::ifstream inputFile(fileName, std::ios::binary);
    std::vector<unsigned char> hashVec(picosha2::k_digest_size);
    std::string hexHash;    

    picosha2::hash256(inputFile, hashVec.begin(), hashVec.end());
    picosha2::bytes_to_hex_string(hashVec.begin(), hashVec.end(), hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash2(std::string fileName)
{
    std::ifstream inputFile(fileName, std::ios::binary);
    std::istreambuf_iterator<char> start(inputFile);
    std::istreambuf_iterator<char> end;
    std::vector<unsigned char> data(start, end);
    std::vector<unsigned char> hashVec(picosha2::k_digest_size);
    std::string hexHash;    
    
    picosha2::hash256(data.begin(), data.end(), hashVec);
    picosha2::bytes_to_hex_string(hashVec.begin(), hashVec.end(), hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash3(std::string fileName)
{
    picosha2::hash256_one_by_one hasher;
    std::ifstream inputFile(fileName, std::ios::binary);
    std::vector<char> dataToHash;
    std::istreambuf_iterator<char> it(inputFile);
    std::istreambuf_iterator<char> endIt;
    for (; it != endIt; ++it) 
        dataToHash.push_back(*it);

    
    hasher.process(dataToHash.begin(), 
                   dataToHash.end());
    hasher.finish();
    
    std::string hexHash;
    picosha2::get_hash_hex_string(hasher, hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash4(std::string fileName)
{
    picosha2::hash256_one_by_one hasher;
    std::ifstream inputFile(fileName, std::ios::binary);

    hasher.process(std::istreambuf_iterator<char>(inputFile),
                   std::istreambuf_iterator<char>());
    hasher.finish();
    
    std::string hexHash;
    picosha2::get_hash_hex_string(hasher, hexHash);
    inputFile.close();
    
    return hexHash;
}

std::string hash5()
{
    char text[] = "The quick brown fox jumps over the lazy dog";
    std::vector<unsigned char> hashVec(picosha2::k_digest_size);
    std::string hexHash;    

    picosha2::hash256(&text[0], &text[strlen(text)], hashVec);
    picosha2::bytes_to_hex_string(hashVec.begin(), hashVec.end(), hexHash);
    
    return hexHash;    
}

std::string hash6()
{
    // Example from README.md:
    std::string src_str = "The quick brown fox jumps over the lazy dog";
    std::string hash_hex_str;
    picosha2::hash256_hex_string(src_str, hash_hex_str);
    
    return hash_hex_str;
}

int main(int, char**)
{
    std::string filename("test.txt");
    
    std::cout << "hash1: " << hash1(filename) << std::endl;
    std::cout << "hash2: " << hash2(filename) << std::endl;
    std::cout << "hash3: " << hash3(filename) << std::endl;
    std::cout << "hash4: " << hash4(filename) << std::endl;
    std::cout << "hash5: " << hash5() << std::endl;
    std::cout << "hash6: " << hash6() << std::endl;

    return 0;
}

Program output:

hash1: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
hash2: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
hash3: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
hash4: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
hash5: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592
hash6: d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592

skysley avatar Nov 14 '18 08:11 skysley