node-unzip icon indicating copy to clipboard operation
node-unzip copied to clipboard

Duplicate entry events

Open guybedford opened this issue 12 years ago • 8 comments

I'm piping a request stream into the zip stream, and getting duplicate entry logs:

dist/
dist/css/
dist/css/bootstrap-theme.css
dist/css/bootstrap-theme.min.css
dist/css/bootstrap.css
dist/css/bootstrap.css
dist/css/bootstrap.min.css
dist/css/bootstrap.min.css
dist/fonts/
dist/fonts/glyphicons-halflings-regular.eot
dist/fonts/
dist/fonts/glyphicons-halflings-regular.eot
dist/fonts/glyphicons-halflings-regular.svg
dist/fonts/glyphicons-halflings-regular.svg
dist/fonts/glyphicons-halflings-regular.ttf
dist/fonts/glyphicons-halflings-regular.woff
dist/js/
dist/js/bootstrap.js
dist/js/bootstrap.min.js
          var fileCnt = 0;
          var fileComplete = function() {
            fileCnt--;
            if (fileCnt == -1)
              done();
          }

          outPipe = unzip.Parse()
          .on('entry', function (entry) {
            if (entry.type != 'File')
              return entry.autodrain();

            console.log(entry.path);
            fileCnt++;

            var outPath = path.resolve(outDir, entry.path);
            mkdirp(path.dirname(outPath), function(err) {
              entry.pipe(
                fs.createWriteStream(outPath)
                .on('finish', fileComplete)
              );
            });
          })
          .on('close', fileComplete);

        request({
          uri: 'https://github.com/twbs/bootstrap/releases/download/v3.0.0/bootstrap-3.0.0-dist.zip', 
          headers: { 'accept': 'application/octet-stream' },
          strictSSL: false
        })
        .on('response', function(res) {
          res.pause();
          res.pipe(outPipe);
          res.on('error', err);
          res.resume();
        })
        .on('error', err);

The finish event is never calling for each file, resulting in the operation timing out.

I also tried using the direct unzip stream and this is stalling too.

guybedford avatar Oct 04 '13 08:10 guybedford

Also it seems that the "close" event of the Parse stream is causing any of the individual entry stream write streams to stop instantaneously. These files are then never written.

guybedford avatar Oct 04 '13 08:10 guybedford

A simplified version of the issue is simply that:

  fs.createReadStream('bootstrap.zip').pipe(unzip.Extract({ path: outDir }));

Is missing out an entire folder in the archive. The archive definitely extracts ok with other unzip methods.

guybedford avatar Oct 04 '13 09:10 guybedford

I am also noticing duplicate entry events. I tried using this file with the following code:

    fs.createReadStream("xalan-j_2_7_1-bin.zip")
        .pipe(unzip.Parse())
        .on('entry', function(entry) {
            if (entry.type == 'File') {
                console.log("Found entry " + entry.path);
            }
            return entry.autodrain();
        });

It ends up overflowing the stack.

rhoegg avatar Mar 18 '14 14:03 rhoegg

I met the same problem. I use this package to extract a xlsx file:

  excel-push-pull Bypass entry: [Content_Types].xml +0ms
  excel-push-pull Bypass entry: _rels/.rels +4ms
  excel-push-pull Bypass entry: _rels/.rels +2ms
  excel-push-pull Bypass entry: xl/_rels/workbook.xml.rels +1ms
  excel-push-pull Bypass entry: xl/_rels/workbook.xml.rels +1ms
  excel-push-pull Bypass entry: xl/workbook.xml +1ms
  excel-push-pull Bypass entry: xl/workbook.xml +1ms
  excel-push-pull Bypass entry: xl/theme/theme1.xml +1ms
  excel-push-pull Bypass entry: xl/theme/theme1.xml +2ms
  excel-push-pull Bypass entry: xl/styles.xml +2ms
  excel-push-pull Bypass entry: xl/styles.xml +1ms
  excel-push-pull Bypass entry: docProps/core.xml +2ms
  excel-push-pull Bypass entry: docProps/core.xml +1ms
  excel-push-pull Bypass entry: docProps/app.xml +0ms
  excel-push-pull Bypass entry: docProps/app.xml +1ms
  excel-push-pull Found strings entry: xl/sharedStrings.xml +1ms
  excel-push-pull Picking: xl/sharedStrings.xml +0ms
  excel-push-pull Found strings entry: xl/sharedStrings.xml +2ms
  excel-push-pull Picking: xl/sharedStrings.xml +0ms
  excel-push-pull Found worksheet entry: xl/worksheets/sheet1.xml +1ms
  excel-push-pull Picking: xl/worksheets/sheet1.xml +0ms
  excel-push-pull Pick: xl/sharedStrings.xml +7ms
  excel-push-pull Found worksheet entry: xl/worksheets/sheet1.xml +1ms
  excel-push-pull Picking: xl/worksheets/sheet1.xml +0ms
  excel-push-pull Pick: xl/sharedStrings.xml +11ms
  excel-push-pull Picking left when closed: 2 +4ms
  excel-push-pull Pick: xl/worksheets/sheet1.xml +170ms

Piping of the second xl/worksheets/sheet1.xml was stopped instantaneously. But not every xlsx file has the problem, I think there must be some different between zips.

Following is the structure of this xlsx file listed by unzip

unzip -l worksheet.xlsx
Archive:  worksheet.xlsx
  Length     Date   Time    Name
 --------    ----   ----    ----
     1168  08-14-14 10:07   [Content_Types].xml
      588  08-14-14 10:07   _rels/.rels
      698  08-14-14 10:07   xl/_rels/workbook.xml.rels
      739  08-14-14 10:07   xl/workbook.xml
     7140  08-14-14 10:07   xl/theme/theme1.xml
     3220  08-14-14 10:07   xl/styles.xml
      593  08-14-14 10:07   docProps/core.xml
      784  08-14-14 10:07   docProps/app.xml
    21806  08-14-14 10:07   xl/sharedStrings.xml
   671612  08-14-14 10:07   xl/worksheets/sheet1.xml
 --------                   -------
   708348                   10 files

idy avatar Aug 14 '14 03:08 idy

If load the zip file to buffer, and streamify (I use pull-stream) the buffer to unzip, no duplicated entry found.

idy avatar Sep 01 '14 02:09 idy

@idy thanks for following up. Could you perhaps provide a code example of what you did to bypass the issue? It would be very useful to see.

guybedford avatar Sep 01 '14 10:09 guybedford

@guybedford I think this should work:

var unzip = require('unzip');
var concat = require('concat-stream');
var rs = ReadStreamOfZipFile;
rs.pipe(concat(function(buf) {
  var ps = require('pullstream');
  ps.write(buf);
  ps.end();
  var stream = ps.pipe(unzip.Parse());
  stream.on('entry', function(entry) { console.log(entry.path); });
}));

I use this way in this file of excel-push-pull.

BTW, this is just a workaround, and it's slower than the stream way, as it loads all data to memory.

idy avatar Sep 02 '14 07:09 idy

@idy thanks so much for posting - I will try this out soon.

guybedford avatar Sep 02 '14 08:09 guybedford