minizip-asm.js icon indicating copy to clipboard operation
minizip-asm.js copied to clipboard

Not minified minizip-asm.js

Open username1565 opened this issue 6 years ago • 2 comments

Do you have full minizip-asm.js with full source code on native JavaScript, not minified minizip-asm.min.js.

I want to add unicode supporting for filenames, but minified file is very-very not readable.

username1565 avatar Nov 21 '19 22:11 username1565

Let's do together a fix the UTF-16, and Unicode pathways and filenames! This is working incorrectly, because Uint8Array incorrect saving UTF-16 characters.

Test string: mz.append("haha/тест.txt", text_bytes, {password: "~~~"}); //incorrect filename in zip.

That's all.


TL;DR Near ./lib/minizip-asm.min.js, in the same folder, I created new file browser_test.html with following code:

<html>
<!-- this file can working locally, without NodeJS, require() and Buffer()-->
<meta charset="utf-8">

	<a id="download_encrypted_zip" href="" download="" style="display;none;">Download encrypted zip (password "~~~")</a>

</html>

<script src="minizip-asm.min.js"></script>
<script>

//	test creation zip-file, encrypted by password "~~~"
var text = "Abc~~~"; 		//text, inside ./haha/тест.txt
var text_bytes = text.split('').map(function(c) { return c.charCodeAt(); });

var mz = new Minizip();
mz.append("haha/тест.txt", text_bytes, {password: "~~~"});		//add this to zip-archive, in the folder "haha", and encrypt by password "~~~"

var blob = new Blob([mz.zip()]);						//create blob with zip-archive
var url = window.URL.createObjectURL(blob);				//create url-object
download_encrypted_zip.href = url;						//add url-object in the href of "a"-element
download_encrypted_zip.download = "abc.zip";			//append filename
download_encrypted_zip.style.display = "inline-block;";	//show "a"-element

</script>

After this, I opened in the browser tab, this file, browser_test.html, and I see download link. zip-archive was been sucessfully downloaded. I see folder /haha/, but inside this, I see .txt-file with incorrect filename: TВ¦¦TБTВ.txt

What happened? The pathway was been specified: "haha/тест.txt". Maybe, incorrect encoding? Yes.


I just go to the source code of minizip-asm.min.js, and try to find ".append" there, inside. At line 14, I see the following function:



this.append=function(e,r,t){var n=0;if(buf||len){if(!buf||!len)throw new Error("Can't get buffer length")}else n=1;if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");r=Buffer.from(r);var i={password:null,compressLevel:5};for(var o in t)i[o]=t[o];t=i,Module.HEAPU8.fill(0,reterr,reterr+BYTE),Module.HEAPU8.fill(0,newLen,newLen+SIZE_T),Module.HEAPU8.fill(0,filename,filename+SIZE_FILENAME),Module.HEAPU8.set(e,filename);var a=null;if(t.password){var u=Buffer.from(t.password);a=Module._malloc(u.length+BYTE),Module.HEAPU8.fill(0,a,a+u.length+BYTE),Module.HEAPU8.set(u,a)}var f=r.length,s=Module._malloc(f);Module.HEAPU8.set(r,s);var c=t.compressLevel,l=Module.ccall("append","number",["number","number","number","number","number","number","number","number","number","number"],[n,buf,len,newLen,filename,a,s,f,c,reterr]);if(Module.HEAPU8[reterr])throw new Error(Module.Pointer_stringify(l));buf=l,len=Buffer.from(Module.HEAPU8.subarray(newLen,newLen+SIZE_T)).readUInt32LE(),a&&Module._free(a),Module._free(s)},


I did rewrite this with tabs, to see what this function doing in this script:


this.append=function(e,r,t){
	var n=0;
	if(buf||len){
		if(!buf||!len)throw new Error("Can't get buffer length")
	}else n=1;
	if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");
	r=Buffer.from(r);
	var i={password:null,compressLevel:5};
	for(var o in t)i[o]=t[o];
	t=i,
	Module.HEAPU8.fill(0,reterr,reterr+BYTE),
	Module.HEAPU8.fill(0,newLen,newLen+SIZE_T),
	Module.HEAPU8.fill(0,filename,filename+SIZE_FILENAME),
	Module.HEAPU8.set(e,filename);
	var a=null;
	if(t.password){
		var u=Buffer.from(t.password);
		a=Module._malloc(u.length+BYTE),
		Module.HEAPU8.fill(0,a,a+u.length+BYTE),
		Module.HEAPU8.set(u,a)
	}
	var f=r.length,
	s=Module._malloc(f);
	Module.HEAPU8.set(r,s);
	var c=t.compressLevel,
	l=Module.ccall(
		"append",
		"number",
		["number","number","number","number","number","number","number","number","number","number"],
		[n,buf,len,newLen,filename,a,s,f,c,reterr]
	);
	if(Module.HEAPU8[reterr])throw new Error(Module.Pointer_stringify(l));
	buf=l,
	len=Buffer.from(Module.HEAPU8.subarray(newLen,newLen+SIZE_T)).readUInt32LE(),
	a&&Module._free(a),
	Module._free(s)
},

Then, I added one line, at beginning of this function:

this.append=function(e,r,t){
	console.log(".append(): e =", e, "r =", r, "t =", t);
	//... blah-blah-blah other code...
}

After reloading the page browser_test.html, I see in console.log of the browser the following info:

.append(): e = haha/тест.txt r = [65, 98, 99, 126, 126, 126] t = Object {password: "~~~"}

That means, pathway go into this function as string.

What then?

if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");
e=Buffer.from(e)

here, this string is modified...

how to?

	console.log("e before: ", e);
	var saved_pathway = e;
	if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");
	console.log("e after: ", e, "this is Uint8Array");
	try{
		var bytearray = Array.from(e);
	}catch(ex){
		var bytearray = [].slice.call(e);
	}
	console.log(
			"e as array: ", bytearray,
	"\n"+	"e as string from this bytearray: ", String.fromCharCode.apply(null, bytearray),
	"\n"+	"is original string?", (String.fromCharCode.apply(null, bytearray) === saved_pathway)
	);

/*
.append(): e = haha/тест.txt r = [65, 98, 99, 126, 126, 126] t = Object {password: "~~~"} minizip-asm.min.js:18
e before:  haha/тест.txt minizip-asm.min.js:25
e after:  
[104, 97, 104, 97, 47, 209, 130, 208, 181, 209, 129, 209, 130, 46, 116, 120, 116, _isBuffer: true, swap16: function, swap32: function, swap64: function, toString: function…]
 this is Uint8Array minizip-asm.min.js:28
e as array:  [104, 97, 104, 97, 47, 209, 130, 208, 181, 209, 129, 209, 130, 46, 116, 120, 116] 
e as string from this bytearray:  haha/тест.txt 
is original string? false minizip-asm.min.js:34
*/

As you can see, this go to buffer, as Uint8Array, and this bytearray cann't be converted to the source pathway-string.


I just did try to encode this string to array-buffer, and decode this back, using the following javascript functions:

// source: http://stackoverflow.com/a/11058858
function str2ab(str) {
  var buf = new ArrayBuffer(str.length * 2); // 2 bytes for each char
  var bufView = new Uint16Array(buf);
  for (var i = 0, strLen = str.length; i < strLen; i++) {
    bufView[i] = str.charCodeAt(i);
  }
  return buf;
}

// source: http://stackoverflow.com/a/11058858
function ab2str(buf) {
  return String.fromCharCode.apply(null, new Uint16Array(buf));
}

var filepath = "haha/тест.txt";
var filepath_as_ArrayBuffer = str2ab(filepath);
var filepath_from_ArrayBuffer = ab2str(filepath_as_ArrayBuffer);

console.log(
		"filepath", filepath,
"\n"+	"filepath_as_ArrayBuffer", filepath_as_ArrayBuffer,
"\n"+	"new Uint8Array(filepath_as_ArrayBuffer)", new Uint8Array(filepath_as_ArrayBuffer),
"\n"+	"filepath_from_ArrayBuffer", filepath_from_ArrayBuffer,
"\n"+	"(filepath_from_ArrayBuffer === filepath)", (filepath_from_ArrayBuffer === filepath)
);
/*
filepath haha/тест.txt 
filepath_as_ArrayBuffer ArrayBuffer {} 
new Uint8Array(filepath_as_ArrayBuffer) [104, 0, 97, 0, 104, 0, 97, 0, 47, 0, 66, 4, 53, 4, 65, 4, 66, 4, 46, 0, 116, 0, 120, 0, 116, 0] 
filepath_from_ArrayBuffer haha/тест.txt 
(filepath_from_ArrayBuffer === filepath) true 
*/

Also, I did try to encode this to bytearray, and encode this back, using next two functions:

// https://codereview.stackexchange.com/a/3589/75693
function stringToBytes(str) {
    var bytes = [];
    for(var i = 0, n = str.length; i < n; i++) {
        var char = str.charCodeAt(i);
        bytes.push(char >>> 8, char & 0xFF);
    }
    return bytes;
}

// https://codereview.stackexchange.com/a/3589/75693
function bytesToSring(bytes) {
    var chars = [];
    for(var i = 0, n = bytes.length; i < n;) {
        chars.push(((bytes[i++] & 0xff) << 8) | (bytes[i++] & 0xff));
    }
    return String.fromCharCode.apply(null, chars);
}


var filepath = "haha/тест.txt";
var filepath_as_Bytes = stringToBytes(filepath);
var filepath_from_Bytes = bytesToSring(filepath_as_Bytes);

console.log(
		"filepath", filepath,
"\n"+	"filepath_as_Bytes", filepath_as_Bytes,
"\n"+	"new Uint8Array(filepath_as_Bytes)", new Uint8Array(filepath_as_Bytes),
"\n"+	"filepath_from_Bytes", filepath_from_Bytes,
"\n"+	"(filepath_from_Bytes === filepath)", (filepath_from_Bytes === filepath)
);
/*
filepath haha/тест.txt 
filepath_as_Bytes [0, 104, 0, 97, 0, 104, 0, 97, 0, 47, 4, 66, 4, 53, 4, 65, 4, 66, 0, 46, 0, 116, 0, 120, 0, 116] 
new Uint8Array(filepath_as_Bytes) [0, 104, 0, 97, 0, 104, 0, 97, 0, 47, 4, 66, 4, 53, 4, 65, 4, 66, 0, 46, 0, 116, 0, 120, 0, 116] 
filepath_from_Bytes haha/тест.txt 
(filepath_from_Bytes === filepath) true 
*/

And as you can see, both methods are allow to decode source pathway string back, and this corresponding, after this decoding, and strings are equal, but this contains different byte-arrays:

ArrayBuffer: 	[104, 0, 97, 0, 104, 0, 97, 0, 47, 0, 66, 4, 53, 4, 65, 4, 66, 4, 46, 0, 116, 0, 120, 0, 116, 0]
ByteArray:		[0, 104, 0, 97, 0, 104, 0, 97, 0, 47, 4, 66, 4, 53, 4, 65, 4, 66, 0, 46, 0, 116, 0, 120, 0, 116]
Minizip_Buffer:	[104, 97, 104, 97, 47, 209, 130, 208, 181, 209, 129, 209, 130, 46, 116, 120, 116]

And as you can see, this two arrays, seems, like Uint16Array, to encode UTF-16. ASCII-symbols was encoded there just by one byte, but writted as two bytes in array (with null byte), and sub-arrays [66, 4, 53, 4, 65, 4, 66, 4] and [4, 66, 4, 53, 4, 65, 4, 66], this is 'тест' substring, encoded as UTF-16 bytes, then '.txt' - this is an ASCII-symbols.


So, as I understand, after this all, we need to do something with this Uint8Array, to do supporting UTF-16, and moreover - 4-bytes UNICODE-pathways (32 bits, Uint32Array). I see this using HEAPU8, so Uint8Array, but in the source code, I can see HEAPU16, and HEAPU32 which may working with Uint16 and Uint32 arrays.

function updateGlobalBufferViews(){
	Module.HEAP8=HEAP8=new Int8Array(buffer),
	Module.HEAP16=HEAP16=new Int16Array(buffer),
	Module.HEAP32=HEAP32=new Int32Array(buffer),
	Module.HEAPU8=HEAPU8=new Uint8Array(buffer),
	Module.HEAPU16=HEAPU16=new Uint16Array(buffer),
	Module.HEAPU32=HEAPU32=new Uint32Array(buffer),
	Module.HEAPF32=HEAPF32=new Float32Array(buffer),
	Module.HEAPF64=HEAPF64=new Float64Array(buffer)
}

But all my experiments are failed. Anyone can help?

Best regards.

username1565 avatar Nov 22 '19 01:11 username1565

I did already found the working unminifier: https://www.unminify2.com/

Ciryllic encoding for Windows is fixed in this commit: https://github.com/username1565/minizip-asm.js/commit/64b827f957141c0decc8753e64f04a5e157db15a

Two encoding was been added: "windows-1251" and "cp866". One more case - for "cp866" pathways and "windows-1251"-password, was been added too. The functions to encode-decode this two encoding to unicode (UTF-8), and from this, you can see inside the script source code.

Now this using three additional parameters:

@encoding {
	"utf8"|"buffer"|"windows-1251"|"Windows-1251"|"cp866"
	|"pathways_and_filenames-encoding-is-cp866_password-encoding-is-windows-1251"
} <default="buffer"> - File can return in text.

This issue can be closed, but I leave this opened, to you see changes, understand it, and do this inside C/C++ code. Also, there is one more bug. Too big .zip (~64KB) for to small text file, and here, you can download zip with size 367 bytes, by click on "Run"-button.

Maybe, you can fix this zip-size, too.

Have a nice day.

username1565 avatar Nov 24 '19 01:11 username1565