gridfs-stream
gridfs-stream copied to clipboard
GridWriteStream throws RangeError: Maximum call stack size exceeded
At this time I am not absolutely certain if this is an issue with GridWriteStream
or a prior Duplex
in the pipeline, and, while I have simplified code to reproduce the issue, I don't yet have an example I can release publicly - I'll work on that over the next week and report back. For the time being I'm opening this issue to see if the community has come across the problem and I'm providing a work-around.
It seems that with an instance of GridWriteStream
(gfs.createWriteStream({filename:'...'})
) and placing it in a pipeline, e.g.:
var gfs = new require('gridfs-stream')({});
someReadStream
.pipe(someTransform)
.pipe(someDuplex)
.pipe(gfs.createWriteStream({filename:'file.txt'}));
...when there is an abundance of data moving rapidly through the pipeline, a "RangeError: Maximum call stack size exceeded" exception is thrown. In my specific case, I'm reading data via a MongoDB cursor, transforming to a text string (record by record), and saving the text string to GridFS as a file, all within a pipeline similar to what is shown above. After ~50k records or so at ~4k records/second, the exception is thrown, however this is not a definitive limit/bounds; the data set subject is in the millions fyi.
I've narrowed it down to GrideWriteStream.prototype._flush
. I have two versions of a recommendation for mitigating the RangeError problem. The first tracks and limits recursion, while the second simply invokes _flush
on nextTick
for every call.
GridWriteStream.prototype._flush = function _flush (_force,_recursion) {
if (!this._opened) return;
if (!_force && this._flushing) return;
if ( !_recursion ) _recursion = 0;
this._flushing = true;
// write the entire q to gridfs
if (!this._q.length) {
this._flushing = false;
this.emit('drain');
if (this._destroying) {
this.destroy();
}
return;
}
var self = this;
this._store.write(this._q.shift(), function (err, store) {
if (err) return self._error(err);
self.emit('progress', store.position);
var f = self._flush.bind(self,true,++_recursion);
if ( _recursion >= 100 ) {
process.nextTick(f);
} else {
f();
}
});
}
or
GridWriteStream.prototype._flush = function _flush (_force) {
if (!this._opened) return;
if (!_force && this._flushing) return;
this._flushing = true;
// write the entire q to gridfs
if (!this._q.length) {
this._flushing = false;
this.emit('drain');
if (this._destroying) {
this.destroy();
}
return;
}
var self = this;
this._store.write(this._q.shift(), function (err, store) {
if (err) return self._error(err);
self.emit('progress', store.position);
process.nextTick(self._flush.bind(self,true)); // just this line
});
}
I can issue a PR, although that may not be appropriate at this time. At the least, I'm wondering if gridfs-stream is the actual culprit, and I'm looking for feedback.
v1.0.0 is just released and has all new (node v0.10) stream handling. :-)
Is this issue still applicable for this new version?