gridfs-stream icon indicating copy to clipboard operation
gridfs-stream copied to clipboard

GridWriteStream throws RangeError: Maximum call stack size exceeded

Open zamnuts opened this issue 10 years ago • 1 comments

At this time I am not absolutely certain if this is an issue with GridWriteStream or a prior Duplex in the pipeline, and, while I have simplified code to reproduce the issue, I don't yet have an example I can release publicly - I'll work on that over the next week and report back. For the time being I'm opening this issue to see if the community has come across the problem and I'm providing a work-around.

It seems that with an instance of GridWriteStream (gfs.createWriteStream({filename:'...'})) and placing it in a pipeline, e.g.:

var gfs = new require('gridfs-stream')({});
someReadStream
   .pipe(someTransform)
   .pipe(someDuplex)
   .pipe(gfs.createWriteStream({filename:'file.txt'}));

...when there is an abundance of data moving rapidly through the pipeline, a "RangeError: Maximum call stack size exceeded" exception is thrown. In my specific case, I'm reading data via a MongoDB cursor, transforming to a text string (record by record), and saving the text string to GridFS as a file, all within a pipeline similar to what is shown above. After ~50k records or so at ~4k records/second, the exception is thrown, however this is not a definitive limit/bounds; the data set subject is in the millions fyi.

I've narrowed it down to GrideWriteStream.prototype._flush. I have two versions of a recommendation for mitigating the RangeError problem. The first tracks and limits recursion, while the second simply invokes _flush on nextTick for every call.

GridWriteStream.prototype._flush = function _flush (_force,_recursion) {
  if (!this._opened) return;
  if (!_force && this._flushing) return;
  if ( !_recursion ) _recursion = 0;
  this._flushing = true;

  // write the entire q to gridfs
  if (!this._q.length) {
    this._flushing = false;
    this.emit('drain');

    if (this._destroying) {
      this.destroy();
    }
    return;
  }

  var self = this;
  this._store.write(this._q.shift(), function (err, store) {
    if (err) return self._error(err);
    self.emit('progress', store.position);
    var f = self._flush.bind(self,true,++_recursion);
    if ( _recursion >= 100 ) {
        process.nextTick(f);
    } else {
        f();
    }
  });
}

or

GridWriteStream.prototype._flush = function _flush (_force) {
  if (!this._opened) return;
  if (!_force && this._flushing) return;
  this._flushing = true;

  // write the entire q to gridfs
  if (!this._q.length) {
    this._flushing = false;
    this.emit('drain');

    if (this._destroying) {
      this.destroy();
    }
    return;
  }

  var self = this;
  this._store.write(this._q.shift(), function (err, store) {
    if (err) return self._error(err);
    self.emit('progress', store.position);
    process.nextTick(self._flush.bind(self,true)); // just this line
  });
}

I can issue a PR, although that may not be appropriate at this time. At the least, I'm wondering if gridfs-stream is the actual culprit, and I'm looking for feedback.

zamnuts avatar Dec 23 '14 03:12 zamnuts

v1.0.0 is just released and has all new (node v0.10) stream handling. :-)

Is this issue still applicable for this new version?

Reggino avatar Feb 11 '15 20:02 Reggino