SizeChunker returning partial chunks on large files.
I'm running the following code on hip_main.dat (50+ mb) and sometimes the chunks are broken up into two 'data' events.
var fs = require('fs');
var chunking = require('chunking-streams');
var SizeChunker = chunking.SizeChunker;
var input = fs.createReadStream('hip_main.dat');
input.setEncoding('ascii');
var chunker = new SizeChunker({
chunkSize: 451,
flushTail: false
});
var output = fs.createWriteStream('hip.json');
var i = 0;
chunker.on('data', function( chunk ) {
// console.log( chunk.data.length );
if (chunk.data.length > 400) {
var RAdeg = chunk.data.toString( 'ascii', 51, 63 );
var DEdeg = chunk.data.toString( 'ascii', 64, 76 );
var Plx = chunk.data.toString( 'ascii', 79, 86 );
//output.write(Plx);
//output.write('\n');
//console.log( RAdeg + ', ' + DEdeg + ', ' + Plx );
} else {
console.log( chunk.id );
i++;
}
});
chunker.on('end', function() {
console.log( i );
});
//chunker.pipe( output );
input.pipe( chunker );
Nevermind, I see that the data event can be partial, but the chunkEnd is where to look. Here's the code I have for working with contiguous blocks of data. This would be nice to have as a option for SizeChunker - to have complete data events.
var buffers = [];
chunker.on('chunkEnd', function( id, done ) {
var buffer = (buffers.length == 1) ?
buffers[0] :
Buffer.concat(buffers, 451);
buffers.length = 0;
// logic here
done();
});
chunker.on('data', function( chunk ) {
buffers.push( chunk.data );
});
Ran into this myself - I find multiple data events very confusing here; in @Axiverse 's comment, he's accumulating a lot of data into a buffer, but the whole point of this plugin is that I would prefer not to do this chunking myself. Maybe a way to get around this would be to have the chunkEnd event emit that buffer, so that I could just ignore the data events and get the complete chunk when it's ready.