node-unzip
node-unzip copied to clipboard
Duplicate entry events
I'm piping a request stream into the zip stream, and getting duplicate entry logs:
dist/
dist/css/
dist/css/bootstrap-theme.css
dist/css/bootstrap-theme.min.css
dist/css/bootstrap.css
dist/css/bootstrap.css
dist/css/bootstrap.min.css
dist/css/bootstrap.min.css
dist/fonts/
dist/fonts/glyphicons-halflings-regular.eot
dist/fonts/
dist/fonts/glyphicons-halflings-regular.eot
dist/fonts/glyphicons-halflings-regular.svg
dist/fonts/glyphicons-halflings-regular.svg
dist/fonts/glyphicons-halflings-regular.ttf
dist/fonts/glyphicons-halflings-regular.woff
dist/js/
dist/js/bootstrap.js
dist/js/bootstrap.min.js
var fileCnt = 0;
var fileComplete = function() {
fileCnt--;
if (fileCnt == -1)
done();
}
outPipe = unzip.Parse()
.on('entry', function (entry) {
if (entry.type != 'File')
return entry.autodrain();
console.log(entry.path);
fileCnt++;
var outPath = path.resolve(outDir, entry.path);
mkdirp(path.dirname(outPath), function(err) {
entry.pipe(
fs.createWriteStream(outPath)
.on('finish', fileComplete)
);
});
})
.on('close', fileComplete);
request({
uri: 'https://github.com/twbs/bootstrap/releases/download/v3.0.0/bootstrap-3.0.0-dist.zip',
headers: { 'accept': 'application/octet-stream' },
strictSSL: false
})
.on('response', function(res) {
res.pause();
res.pipe(outPipe);
res.on('error', err);
res.resume();
})
.on('error', err);
The finish event is never calling for each file, resulting in the operation timing out.
I also tried using the direct unzip stream and this is stalling too.
Also it seems that the "close" event of the Parse stream is causing any of the individual entry stream write streams to stop instantaneously. These files are then never written.
A simplified version of the issue is simply that:
fs.createReadStream('bootstrap.zip').pipe(unzip.Extract({ path: outDir }));
Is missing out an entire folder in the archive. The archive definitely extracts ok with other unzip methods.
I am also noticing duplicate entry events. I tried using this file with the following code:
fs.createReadStream("xalan-j_2_7_1-bin.zip")
.pipe(unzip.Parse())
.on('entry', function(entry) {
if (entry.type == 'File') {
console.log("Found entry " + entry.path);
}
return entry.autodrain();
});
It ends up overflowing the stack.
I met the same problem. I use this package to extract a xlsx file:
excel-push-pull Bypass entry: [Content_Types].xml +0ms
excel-push-pull Bypass entry: _rels/.rels +4ms
excel-push-pull Bypass entry: _rels/.rels +2ms
excel-push-pull Bypass entry: xl/_rels/workbook.xml.rels +1ms
excel-push-pull Bypass entry: xl/_rels/workbook.xml.rels +1ms
excel-push-pull Bypass entry: xl/workbook.xml +1ms
excel-push-pull Bypass entry: xl/workbook.xml +1ms
excel-push-pull Bypass entry: xl/theme/theme1.xml +1ms
excel-push-pull Bypass entry: xl/theme/theme1.xml +2ms
excel-push-pull Bypass entry: xl/styles.xml +2ms
excel-push-pull Bypass entry: xl/styles.xml +1ms
excel-push-pull Bypass entry: docProps/core.xml +2ms
excel-push-pull Bypass entry: docProps/core.xml +1ms
excel-push-pull Bypass entry: docProps/app.xml +0ms
excel-push-pull Bypass entry: docProps/app.xml +1ms
excel-push-pull Found strings entry: xl/sharedStrings.xml +1ms
excel-push-pull Picking: xl/sharedStrings.xml +0ms
excel-push-pull Found strings entry: xl/sharedStrings.xml +2ms
excel-push-pull Picking: xl/sharedStrings.xml +0ms
excel-push-pull Found worksheet entry: xl/worksheets/sheet1.xml +1ms
excel-push-pull Picking: xl/worksheets/sheet1.xml +0ms
excel-push-pull Pick: xl/sharedStrings.xml +7ms
excel-push-pull Found worksheet entry: xl/worksheets/sheet1.xml +1ms
excel-push-pull Picking: xl/worksheets/sheet1.xml +0ms
excel-push-pull Pick: xl/sharedStrings.xml +11ms
excel-push-pull Picking left when closed: 2 +4ms
excel-push-pull Pick: xl/worksheets/sheet1.xml +170ms
Piping of the second xl/worksheets/sheet1.xml was stopped instantaneously. But not every xlsx file has the problem, I think there must be some different between zips.
Following is the structure of this xlsx file listed by unzip
unzip -l worksheet.xlsx
Archive: worksheet.xlsx
Length Date Time Name
-------- ---- ---- ----
1168 08-14-14 10:07 [Content_Types].xml
588 08-14-14 10:07 _rels/.rels
698 08-14-14 10:07 xl/_rels/workbook.xml.rels
739 08-14-14 10:07 xl/workbook.xml
7140 08-14-14 10:07 xl/theme/theme1.xml
3220 08-14-14 10:07 xl/styles.xml
593 08-14-14 10:07 docProps/core.xml
784 08-14-14 10:07 docProps/app.xml
21806 08-14-14 10:07 xl/sharedStrings.xml
671612 08-14-14 10:07 xl/worksheets/sheet1.xml
-------- -------
708348 10 files
If load the zip file to buffer, and streamify (I use pull-stream) the buffer to unzip, no duplicated entry found.
@idy thanks for following up. Could you perhaps provide a code example of what you did to bypass the issue? It would be very useful to see.
@guybedford I think this should work:
var unzip = require('unzip');
var concat = require('concat-stream');
var rs = ReadStreamOfZipFile;
rs.pipe(concat(function(buf) {
var ps = require('pullstream');
ps.write(buf);
ps.end();
var stream = ps.pipe(unzip.Parse());
stream.on('entry', function(entry) { console.log(entry.path); });
}));
I use this way in this file of excel-push-pull.
BTW, this is just a workaround, and it's slower than the stream way, as it loads all data to memory.
@idy thanks so much for posting - I will try this out soon.