js-xxhash icon indicating copy to clipboard operation
js-xxhash copied to clipboard

Add version of xxHash64 that uses native BigInt

Open Daniel15 opened this issue 4 years ago • 0 comments

Adds a new file, xxhash64-bigint.js, that uses native JavaScript BigInts rather than the cuint library. I haven't added it to your build scripts or anything like that yet, as I'm not quite sure how it should be packaged. Probably the best thing is to have a separate npm package (eg. xxhashjs-bigint) that doesn't depend on the cunit package at all. It could also be added to the existing package, but that means the cunit package would still be pulled in even when not needed.

I've been testing with scripts similar to this:

debugger;
const New64 = require('./lib/xxhash64-bigint');
const Old64 = require('./lib/xxhash64');

const input = 'hello world AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';

console.log('NEW: ', New64(input, 0));
console.log('OLD: ', Old64(input, 0).toString());

Which results in output like:

NEW:  769387546530018441n
OLD:  769387546530018441

Since it's a new file, the easiest way to view the changes is probably to manually compare the original xxhash64.js (after the patch in #23) with the new file. A diff is below:

--- C:/src/js-xxhash/lib/xxhash64-orig.js	Sat Jul 27 17:20:28 2019
+++ C:/src/js-xxhash/lib/xxhash64-bigint.js	Sat Jul 27 17:21:21 2019
@@ -2,18 +2,20 @@
 xxHash64 implementation in pure Javascript
 
 Copyright (C) 2016, Pierre Curto
+Copyright (C) 2019, Daniel Lo Nigro <d.sb>
 MIT license
 */
-var UINT64 = require('cuint').UINT64
 
 /*
  * Constants
  */
-var PRIME64_1 = UINT64( '11400714785074694791' )
-var PRIME64_2 = UINT64( '14029467366897019727' )
-var PRIME64_3 = UINT64(  '1609587929392839161' )
-var PRIME64_4 = UINT64(  '9650029242287828579' )
-var PRIME64_5 = UINT64(  '2870177450012600261' )
+var PRIME64_1 = BigInt( '11400714785074694791' )
+var PRIME64_2 = BigInt( '14029467366897019727' )
+var PRIME64_3 = BigInt(  '1609587929392839161' )
+var PRIME64_4 = BigInt(  '9650029242287828579' )
+var PRIME64_5 = BigInt(  '2870177450012600261' )
+var BITS = 64n
+var BITMASK = 2n ** BITS - 1n
 
 /**
 * Convert string to proper UTF-8 array
@@ -53,6 +55,64 @@
 }
 
 /**
+ * Converts bits to a BigInt.
+ * @param {Number} first low bits (8)
+ * @param {Number} second low bits (8)
+ * @param {Number} first high bits (8)
+ * @param {Number} second high bits (8)
+ */
+function bitsToBigInt(a00, a16, a32, a48) {
+	return (
+		BigInt(a00) | 
+		(BigInt(a16) << 16n) |
+		(BigInt(a32) << 32n) |
+		(BigInt(a48) << 48n)
+	)
+}
+
+/**
+ * Converts a chunk of memory (either an ArrayBuffer or a Node.js Buffer)
+ * to a BigInt representing a 64-bit integer.
+ * @param {ArrayBuffer|Buffer} The buffer
+ * @param {number} offset
+ * @return BigInt
+ */
+function memoryToBigInt(memory, offset) {
+	return (
+		BigInt(memory[offset]) | 
+		(BigInt(memory[offset+1]) << 8n) |
+		(BigInt(memory[offset+2]) << 16n) |
+		(BigInt(memory[offset+3]) << 24n) |
+		(BigInt(memory[offset+4]) << 32n) |
+		(BigInt(memory[offset+5]) << 40n) |
+		(BigInt(memory[offset+6]) << 48n) | 
+		(BigInt(memory[offset+7]) << 56n)
+	)
+}
+
+/**
+ * Performs a left bitwise rotation on the given unsigned 64-bit integer.
+ * @param {BigInt} number to rotate
+ * @param {BigInt} number of bits to rotate by
+ * @return BigInt
+ */
+function rotateLeft(value, rotation) {
+	return (
+		((value << rotation) & BITMASK) |
+		(value >> (BITS - rotation))
+	);
+}
+
+/**
+ * Truncate a BigInt to a 64-bit unsigned integer.
+ * @param {BigInt} number to truncate
+ * @return BigInt
+ */
+function truncate(value) {
+	return BigInt.asUintN(64, value);
+}
+
+/**
  * XXH64 object used as a constructor or a function
  * @constructor
  * or
@@ -79,11 +139,11 @@
  * @return ThisExpression
  */
  function init (seed) {
-	this.seed = seed instanceof UINT64 ? seed.clone() : UINT64(seed)
-	this.v1 = this.seed.clone().add(PRIME64_1).add(PRIME64_2)
-	this.v2 = this.seed.clone().add(PRIME64_2)
-	this.v3 = this.seed.clone()
-	this.v4 = this.seed.clone().subtract(PRIME64_1)
+	this.seed = BigInt.asUintN(32, BigInt(seed))
+	this.v1 = truncate(this.seed + PRIME64_1 + PRIME64_2)
+	this.v2 = truncate(this.seed + PRIME64_2)
+	this.v3 = this.seed
+	this.v4 = truncate(this.seed - PRIME64_1)
 	this.total_len = 0
 	this.memsize = 0
 	this.memory = null
@@ -154,37 +214,17 @@
 
 		var p64 = 0
 		var other
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v1.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v1 = truncate(rotateLeft(truncate(this.v1 + other * PRIME64_2), 31n) * PRIME64_1);
 		p64 += 8
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v2.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v2 = truncate(rotateLeft(truncate(this.v2 + other * PRIME64_2), 31n) * PRIME64_1);
 		p64 += 8
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v3.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v3 = truncate(rotateLeft(truncate(this.v3 + other * PRIME64_2), 31n) * PRIME64_1);
 		p64 += 8
-		other = UINT64(
-				(this.memory[p64+1] << 8) | this.memory[p64]
-			,	(this.memory[p64+3] << 8) | this.memory[p64+2]
-			,	(this.memory[p64+5] << 8) | this.memory[p64+4]
-			,	(this.memory[p64+7] << 8) | this.memory[p64+6]
-			)
-		this.v4.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+		other = memoryToBigInt(this.memory, p64)
+		this.v4 = truncate(rotateLeft(truncate(this.v4 + other * PRIME64_2), 31n) * PRIME64_1);
 
 		p += 32 - this.memsize
 		this.memsize = 0
@@ -197,37 +237,17 @@
 		do
 		{
 			var other
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v1.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v1 = truncate(rotateLeft(truncate(this.v1 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v2.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v2 = truncate(rotateLeft(truncate(this.v2 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v3.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v3 = truncate(rotateLeft(truncate(this.v3 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
-			other = UINT64(
-					(input[p+1] << 8) | input[p]
-				,	(input[p+3] << 8) | input[p+2]
-				,	(input[p+5] << 8) | input[p+4]
-				,	(input[p+7] << 8) | input[p+6]
-				)
-			this.v4.add( other.multiply(PRIME64_2) ).rotl(31).multiply(PRIME64_1);
+			other = memoryToBigInt(input, p)
+			this.v4 = truncate(rotateLeft(truncate(this.v4 + other * PRIME64_2), 31n) * PRIME64_1);
 			p += 8
 		} while (p <= limit)
 	}
@@ -257,83 +277,68 @@
 	var p = 0
 	var bEnd = this.memsize
 	var h64, h
-	var u = new UINT64
+	var u
 
 	if (this.total_len >= 32)
 	{
-		h64 = this.v1.clone().rotl(1)
-		h64.add( this.v2.clone().rotl(7) )
-		h64.add( this.v3.clone().rotl(12) )
-		h64.add( this.v4.clone().rotl(18) )
-
-		h64.xor( this.v1.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = rotateLeft(this.v1, 1n) + 
+			rotateLeft(this.v2, 7n) + 
+			rotateLeft(this.v3, 12n) + 
+			rotateLeft(this.v4, 18n)
+
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v1 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 
-		h64.xor( this.v2.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v2 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 
-		h64.xor( this.v3.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v3 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 
-		h64.xor( this.v4.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1) )
-		h64.multiply(PRIME64_1).add(PRIME64_4)
+		h64 = truncate(h64 ^ (rotateLeft(truncate(this.v4 * PRIME64_2), 31n) * PRIME64_1))
+		h64 = truncate(h64 * PRIME64_1 + PRIME64_4)
 	}
 	else
 	{
-		h64  = this.seed.clone().add( PRIME64_5 )
+		h64  = truncate(this.seed + PRIME64_5)
 	}
 
-	h64.add( u.fromNumber(this.total_len) )
+	h64 += BigInt(this.total_len)
 
 	while (p <= bEnd - 8)
 	{
-		u.fromBits(
-			(input[p+1] << 8) | input[p]
-		,	(input[p+3] << 8) | input[p+2]
-		,	(input[p+5] << 8) | input[p+4]
-		,	(input[p+7] << 8) | input[p+6]
-			)
-		u.multiply(PRIME64_2).rotl(31).multiply(PRIME64_1)
-		h64
-			.xor(u)
-			.rotl(27)
-			.multiply( PRIME64_1 )
-			.add( PRIME64_4 )
+		u = memoryToBigInt(input, p)
+		u = truncate(rotateLeft(truncate(u * PRIME64_2), 31n) * PRIME64_1)
+		
+		h64 = truncate((rotateLeft(h64 ^ u, 27n) * PRIME64_1) + PRIME64_4)
 		p += 8
 	}
 
 	if (p + 4 <= bEnd) {
-		u.fromBits(
+		u = bitsToBigInt(
 			(input[p+1] << 8) | input[p]
 		,	(input[p+3] << 8) | input[p+2]
 		,	0
 		,	0
 		)
-		h64
-			.xor( u.multiply(PRIME64_1) )
-			.rotl(23)
-			.multiply( PRIME64_2 )
-			.add( PRIME64_3 )
+		h64 = truncate((rotateLeft(h64 ^ truncate((u * PRIME64_1)), 23n) * PRIME64_2) + PRIME64_3)
 		p += 4
 	}
 
 	while (p < bEnd)
 	{
-		u.fromBits( input[p++], 0, 0, 0 )
-		h64
-			.xor( u.multiply(PRIME64_5) )
-			.rotl(11)
-			.multiply(PRIME64_1)
+		u = bitsToBigInt( input[p++], 0, 0, 0 )
+		h64 = truncate(rotateLeft(h64 ^ truncate(u * PRIME64_5), 11n) * PRIME64_1)
 	}
 
-	h = h64.clone().shiftRight(33)
-	h64.xor(h).multiply(PRIME64_2)
+	h = truncate(h64 >> 33n)
+	h64 = truncate((h64 ^ h) * PRIME64_2)
 
-	h = h64.clone().shiftRight(29)
-	h64.xor(h).multiply(PRIME64_3)
+	h = truncate(h64 >> 29n)
+	h64 = truncate((h64 ^ h) * PRIME64_3)
 
-	h = h64.clone().shiftRight(32)
-	h64.xor(h)
+	h = truncate(h64 >> 32n)
+	h64 = truncate(h64 ^ h)
 
 	// Reset the state
 	this.init( this.seed )

Closes #22

Daniel15 avatar Jul 28 '19 00:07 Daniel15