node-chunking-streams icon indicating copy to clipboard operation
node-chunking-streams copied to clipboard

SizeChunker returning partial chunks on large files.

Open Axiverse opened this issue 11 years ago • 2 comments

I'm running the following code on hip_main.dat (50+ mb) and sometimes the chunks are broken up into two 'data' events.

var fs = require('fs');
var chunking = require('chunking-streams');
var SizeChunker = chunking.SizeChunker;



var input = fs.createReadStream('hip_main.dat');
input.setEncoding('ascii');

var chunker = new SizeChunker({
    chunkSize: 451,
    flushTail: false
});

var output = fs.createWriteStream('hip.json');
var i = 0;
chunker.on('data', function( chunk ) {

    // console.log( chunk.data.length );

    if (chunk.data.length > 400) {


        var RAdeg = chunk.data.toString( 'ascii', 51, 63 );
        var DEdeg = chunk.data.toString( 'ascii', 64, 76 );

        var Plx = chunk.data.toString( 'ascii', 79, 86 );

        //output.write(Plx);
        //output.write('\n');

        //console.log( RAdeg + ', ' + DEdeg + ', ' + Plx );
    } else {

        console.log( chunk.id );
        i++;
    }


});

chunker.on('end', function() {

    console.log( i );

});

//chunker.pipe( output );
input.pipe( chunker );

Axiverse avatar Dec 30 '14 12:12 Axiverse

Nevermind, I see that the data event can be partial, but the chunkEnd is where to look. Here's the code I have for working with contiguous blocks of data. This would be nice to have as a option for SizeChunker - to have complete data events.

var buffers = [];

chunker.on('chunkEnd', function( id, done ) {

    var buffer = (buffers.length == 1) ?
                    buffers[0] :
                    Buffer.concat(buffers, 451);

    buffers.length = 0;

    // logic here

    done();

});

chunker.on('data', function( chunk ) {

    buffers.push( chunk.data );

});

Axiverse avatar Dec 30 '14 12:12 Axiverse

Ran into this myself - I find multiple data events very confusing here; in @Axiverse 's comment, he's accumulating a lot of data into a buffer, but the whole point of this plugin is that I would prefer not to do this chunking myself. Maybe a way to get around this would be to have the chunkEnd event emit that buffer, so that I could just ignore the data events and get the complete chunk when it's ready.

moxious avatar Jun 10 '16 17:06 moxious