Not minified minizip-asm.js
Do you have full minizip-asm.js with full source code on native JavaScript, not minified
minizip-asm.min.js.
I want to add unicode supporting for filenames, but minified file is very-very not readable.
Let's do together a fix the UTF-16, and Unicode pathways and filenames! This is working incorrectly, because Uint8Array incorrect saving UTF-16 characters.
Test string:
mz.append("haha/тест.txt", text_bytes, {password: "~~~"}); //incorrect filename in zip.
That's all.
TL;DR
Near ./lib/minizip-asm.min.js, in the same folder, I created new file browser_test.html
with following code:
<html>
<!-- this file can working locally, without NodeJS, require() and Buffer()-->
<meta charset="utf-8">
<a id="download_encrypted_zip" href="" download="" style="display;none;">Download encrypted zip (password "~~~")</a>
</html>
<script src="minizip-asm.min.js"></script>
<script>
// test creation zip-file, encrypted by password "~~~"
var text = "Abc~~~"; //text, inside ./haha/тест.txt
var text_bytes = text.split('').map(function(c) { return c.charCodeAt(); });
var mz = new Minizip();
mz.append("haha/тест.txt", text_bytes, {password: "~~~"}); //add this to zip-archive, in the folder "haha", and encrypt by password "~~~"
var blob = new Blob([mz.zip()]); //create blob with zip-archive
var url = window.URL.createObjectURL(blob); //create url-object
download_encrypted_zip.href = url; //add url-object in the href of "a"-element
download_encrypted_zip.download = "abc.zip"; //append filename
download_encrypted_zip.style.display = "inline-block;"; //show "a"-element
</script>
After this, I opened in the browser tab, this file, browser_test.html, and I see download link.
zip-archive was been sucessfully downloaded.
I see folder /haha/, but inside this, I see .txt-file with incorrect filename: TВ¦¦TБTВ.txt
What happened? The pathway was been specified: "haha/тест.txt". Maybe, incorrect encoding? Yes.
I just go to the source code of minizip-asm.min.js, and try to find ".append" there, inside. At line 14, I see the following function:
this.append=function(e,r,t){var n=0;if(buf||len){if(!buf||!len)throw new Error("Can't get buffer length")}else n=1;if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");r=Buffer.from(r);var i={password:null,compressLevel:5};for(var o in t)i[o]=t[o];t=i,Module.HEAPU8.fill(0,reterr,reterr+BYTE),Module.HEAPU8.fill(0,newLen,newLen+SIZE_T),Module.HEAPU8.fill(0,filename,filename+SIZE_FILENAME),Module.HEAPU8.set(e,filename);var a=null;if(t.password){var u=Buffer.from(t.password);a=Module._malloc(u.length+BYTE),Module.HEAPU8.fill(0,a,a+u.length+BYTE),Module.HEAPU8.set(u,a)}var f=r.length,s=Module._malloc(f);Module.HEAPU8.set(r,s);var c=t.compressLevel,l=Module.ccall("append","number",["number","number","number","number","number","number","number","number","number","number"],[n,buf,len,newLen,filename,a,s,f,c,reterr]);if(Module.HEAPU8[reterr])throw new Error(Module.Pointer_stringify(l));buf=l,len=Buffer.from(Module.HEAPU8.subarray(newLen,newLen+SIZE_T)).readUInt32LE(),a&&Module._free(a),Module._free(s)},
I did rewrite this with tabs, to see what this function doing in this script:
this.append=function(e,r,t){
var n=0;
if(buf||len){
if(!buf||!len)throw new Error("Can't get buffer length")
}else n=1;
if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");
r=Buffer.from(r);
var i={password:null,compressLevel:5};
for(var o in t)i[o]=t[o];
t=i,
Module.HEAPU8.fill(0,reterr,reterr+BYTE),
Module.HEAPU8.fill(0,newLen,newLen+SIZE_T),
Module.HEAPU8.fill(0,filename,filename+SIZE_FILENAME),
Module.HEAPU8.set(e,filename);
var a=null;
if(t.password){
var u=Buffer.from(t.password);
a=Module._malloc(u.length+BYTE),
Module.HEAPU8.fill(0,a,a+u.length+BYTE),
Module.HEAPU8.set(u,a)
}
var f=r.length,
s=Module._malloc(f);
Module.HEAPU8.set(r,s);
var c=t.compressLevel,
l=Module.ccall(
"append",
"number",
["number","number","number","number","number","number","number","number","number","number"],
[n,buf,len,newLen,filename,a,s,f,c,reterr]
);
if(Module.HEAPU8[reterr])throw new Error(Module.Pointer_stringify(l));
buf=l,
len=Buffer.from(Module.HEAPU8.subarray(newLen,newLen+SIZE_T)).readUInt32LE(),
a&&Module._free(a),
Module._free(s)
},
Then, I added one line, at beginning of this function:
this.append=function(e,r,t){
console.log(".append(): e =", e, "r =", r, "t =", t);
//... blah-blah-blah other code...
}
After reloading the page browser_test.html, I see in console.log of the browser the following info:
.append(): e = haha/тест.txt r = [65, 98, 99, 126, 126, 126] t = Object {password: "~~~"}
That means, pathway go into this function as string.
What then?
if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");
e=Buffer.from(e)
here, this string is modified...
how to?
console.log("e before: ", e);
var saved_pathway = e;
if(e=Buffer.from(e),e.length>=SIZE_FILENAME)throw new Error("Exceed max filename length");
console.log("e after: ", e, "this is Uint8Array");
try{
var bytearray = Array.from(e);
}catch(ex){
var bytearray = [].slice.call(e);
}
console.log(
"e as array: ", bytearray,
"\n"+ "e as string from this bytearray: ", String.fromCharCode.apply(null, bytearray),
"\n"+ "is original string?", (String.fromCharCode.apply(null, bytearray) === saved_pathway)
);
/*
.append(): e = haha/тест.txt r = [65, 98, 99, 126, 126, 126] t = Object {password: "~~~"} minizip-asm.min.js:18
e before: haha/тест.txt minizip-asm.min.js:25
e after:
[104, 97, 104, 97, 47, 209, 130, 208, 181, 209, 129, 209, 130, 46, 116, 120, 116, _isBuffer: true, swap16: function, swap32: function, swap64: function, toString: function…]
this is Uint8Array minizip-asm.min.js:28
e as array: [104, 97, 104, 97, 47, 209, 130, 208, 181, 209, 129, 209, 130, 46, 116, 120, 116]
e as string from this bytearray: haha/ÑеÑÑ.txt
is original string? false minizip-asm.min.js:34
*/
As you can see, this go to buffer, as Uint8Array, and this bytearray cann't be converted to the source pathway-string.
I just did try to encode this string to array-buffer, and decode this back, using the following javascript functions:
// source: http://stackoverflow.com/a/11058858
function str2ab(str) {
var buf = new ArrayBuffer(str.length * 2); // 2 bytes for each char
var bufView = new Uint16Array(buf);
for (var i = 0, strLen = str.length; i < strLen; i++) {
bufView[i] = str.charCodeAt(i);
}
return buf;
}
// source: http://stackoverflow.com/a/11058858
function ab2str(buf) {
return String.fromCharCode.apply(null, new Uint16Array(buf));
}
var filepath = "haha/тест.txt";
var filepath_as_ArrayBuffer = str2ab(filepath);
var filepath_from_ArrayBuffer = ab2str(filepath_as_ArrayBuffer);
console.log(
"filepath", filepath,
"\n"+ "filepath_as_ArrayBuffer", filepath_as_ArrayBuffer,
"\n"+ "new Uint8Array(filepath_as_ArrayBuffer)", new Uint8Array(filepath_as_ArrayBuffer),
"\n"+ "filepath_from_ArrayBuffer", filepath_from_ArrayBuffer,
"\n"+ "(filepath_from_ArrayBuffer === filepath)", (filepath_from_ArrayBuffer === filepath)
);
/*
filepath haha/тест.txt
filepath_as_ArrayBuffer ArrayBuffer {}
new Uint8Array(filepath_as_ArrayBuffer) [104, 0, 97, 0, 104, 0, 97, 0, 47, 0, 66, 4, 53, 4, 65, 4, 66, 4, 46, 0, 116, 0, 120, 0, 116, 0]
filepath_from_ArrayBuffer haha/тест.txt
(filepath_from_ArrayBuffer === filepath) true
*/
Also, I did try to encode this to bytearray, and encode this back, using next two functions:
// https://codereview.stackexchange.com/a/3589/75693
function stringToBytes(str) {
var bytes = [];
for(var i = 0, n = str.length; i < n; i++) {
var char = str.charCodeAt(i);
bytes.push(char >>> 8, char & 0xFF);
}
return bytes;
}
// https://codereview.stackexchange.com/a/3589/75693
function bytesToSring(bytes) {
var chars = [];
for(var i = 0, n = bytes.length; i < n;) {
chars.push(((bytes[i++] & 0xff) << 8) | (bytes[i++] & 0xff));
}
return String.fromCharCode.apply(null, chars);
}
var filepath = "haha/тест.txt";
var filepath_as_Bytes = stringToBytes(filepath);
var filepath_from_Bytes = bytesToSring(filepath_as_Bytes);
console.log(
"filepath", filepath,
"\n"+ "filepath_as_Bytes", filepath_as_Bytes,
"\n"+ "new Uint8Array(filepath_as_Bytes)", new Uint8Array(filepath_as_Bytes),
"\n"+ "filepath_from_Bytes", filepath_from_Bytes,
"\n"+ "(filepath_from_Bytes === filepath)", (filepath_from_Bytes === filepath)
);
/*
filepath haha/тест.txt
filepath_as_Bytes [0, 104, 0, 97, 0, 104, 0, 97, 0, 47, 4, 66, 4, 53, 4, 65, 4, 66, 0, 46, 0, 116, 0, 120, 0, 116]
new Uint8Array(filepath_as_Bytes) [0, 104, 0, 97, 0, 104, 0, 97, 0, 47, 4, 66, 4, 53, 4, 65, 4, 66, 0, 46, 0, 116, 0, 120, 0, 116]
filepath_from_Bytes haha/тест.txt
(filepath_from_Bytes === filepath) true
*/
And as you can see, both methods are allow to decode source pathway string back, and this corresponding, after this decoding, and strings are equal, but this contains different byte-arrays:
ArrayBuffer: [104, 0, 97, 0, 104, 0, 97, 0, 47, 0, 66, 4, 53, 4, 65, 4, 66, 4, 46, 0, 116, 0, 120, 0, 116, 0]
ByteArray: [0, 104, 0, 97, 0, 104, 0, 97, 0, 47, 4, 66, 4, 53, 4, 65, 4, 66, 0, 46, 0, 116, 0, 120, 0, 116]
Minizip_Buffer: [104, 97, 104, 97, 47, 209, 130, 208, 181, 209, 129, 209, 130, 46, 116, 120, 116]
And as you can see, this two arrays, seems, like Uint16Array, to encode UTF-16.
ASCII-symbols was encoded there just by one byte,
but writted as two bytes in array (with null byte),
and sub-arrays [66, 4, 53, 4, 65, 4, 66, 4] and [4, 66, 4, 53, 4, 65, 4, 66],
this is 'тест' substring, encoded as UTF-16 bytes, then '.txt' - this is an ASCII-symbols.
So, as I understand, after this all, we need to do something with this Uint8Array, to do supporting UTF-16, and moreover - 4-bytes UNICODE-pathways (32 bits, Uint32Array).
I see this using HEAPU8, so Uint8Array, but in the source code, I can see HEAPU16, and HEAPU32 which may working with Uint16 and Uint32 arrays.
function updateGlobalBufferViews(){
Module.HEAP8=HEAP8=new Int8Array(buffer),
Module.HEAP16=HEAP16=new Int16Array(buffer),
Module.HEAP32=HEAP32=new Int32Array(buffer),
Module.HEAPU8=HEAPU8=new Uint8Array(buffer),
Module.HEAPU16=HEAPU16=new Uint16Array(buffer),
Module.HEAPU32=HEAPU32=new Uint32Array(buffer),
Module.HEAPF32=HEAPF32=new Float32Array(buffer),
Module.HEAPF64=HEAPF64=new Float64Array(buffer)
}
But all my experiments are failed. Anyone can help?
Best regards.
I did already found the working unminifier: https://www.unminify2.com/
Ciryllic encoding for Windows is fixed in this commit: https://github.com/username1565/minizip-asm.js/commit/64b827f957141c0decc8753e64f04a5e157db15a
Two encoding was been added: "windows-1251" and "cp866". One more case - for "cp866" pathways and "windows-1251"-password, was been added too. The functions to encode-decode this two encoding to unicode (UTF-8), and from this, you can see inside the script source code.
Now this using three additional parameters:
@encoding {
"utf8"|"buffer"|"windows-1251"|"Windows-1251"|"cp866"
|"pathways_and_filenames-encoding-is-cp866_password-encoding-is-windows-1251"
} <default="buffer"> - File can return in text.
This issue can be closed, but I leave this opened, to you see changes, understand it, and do this inside C/C++ code.
Also, there is one more bug. Too big .zip (~64KB) for to small text file,
and here, you can download zip
with size 367 bytes, by click on "Run"-button.
Maybe, you can fix this zip-size, too.
Have a nice day.