phantom-render-stream icon indicating copy to clipboard operation
phantom-render-stream copied to clipboard

Pooling + Memory Leaks

Open davisford opened this issue 10 years ago • 14 comments

Hiya, I'm seeing some serious leakage going on when using the pool feature. I may very well be using the API wrong, but can you take a quick look?

Here's what's going on with htop (showing threads) under normal conditions (after doing a few print jobs):

 6969 ubuntu     20   0  679M 23440  5104 S  0.0  0.6  0:00.41 ├─ pm2: Daemon
20465 ubuntu     20   0  967M 51288  6772 S  0.0  1.3  0:00.83 │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20493 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:06.33 │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stre
23500 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20593 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.01 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20568 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.01 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20527 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20511 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20500 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.00 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20497 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.03 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20496 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.00 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20495 ubuntu     20   0 2146M  320M 19344 S  0.0  8.5  0:00.00 │  │  │  └─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20492 ubuntu     20   0  967M 51288  6772 S  0.0  1.3  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20491 ubuntu     20   0  967M 51288  6772 S  0.0  1.3  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20490 ubuntu     20   0  967M 51288  6772 S  0.0  1.3  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20489 ubuntu     20   0  967M 51288  6772 S  0.0  1.3  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20466 ubuntu     20   0  967M 51288  6772 S  0.0  1.3  0:00.02 │  │  └─ node /home/ubuntu/git/warehouse-print/app.js
 7008 ubuntu     20   0  679M 23440  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 7007 ubuntu     20   0  679M 23440  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 7006 ubuntu     20   0  679M 23440  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 7005 ubuntu     20   0  679M 23440  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 6970 ubuntu     20   0  679M 23440  5104 S  0.0  0.6  0:00.01 │  └─ pm2: Daemon

Now, I run a few more print jobs:

36816  6136  1528 S  0.0  0.2  0:01.38 /sbin/init
 6969 ubuntu     20   0  679M 23536  5104 S  0.0  0.6  0:00.46 ├─ pm2: Daemon
20465 ubuntu     20   0  970M 55028  6772 S  0.0  1.4  0:01.05 │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20493 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:14.16 │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stre
29349 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.01 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29341 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.01 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29336 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.00 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29331 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29322 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29314 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29296 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29285 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
23500 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20593 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.01 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20568 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.01 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20527 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20511 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.02 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20500 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.00 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20497 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.03 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20496 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.00 │  │  │  ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20495 ubuntu     20   0 2871M  636M 19344 S  0.0 16.9  0:00.00 │  │  │  └─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20492 ubuntu     20   0  970M 55028  6772 S  0.0  1.4  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20491 ubuntu     20   0  970M 55028  6772 S  0.0  1.4  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20490 ubuntu     20   0  970M 55028  6772 S  0.0  1.4  0:00.01 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20489 ubuntu     20   0  970M 55028  6772 S  0.0  1.4  0:00.00 │  │  ├─ node /home/ubuntu/git/warehouse-print/app.js
20466 ubuntu     20   0  970M 55028  6772 S  0.0  1.4  0:00.02 │  │  └─ node /home/ubuntu/git/warehouse-print/app.js
 7008 ubuntu     20   0  679M 23536  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 7007 ubuntu     20   0  679M 23536  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 7006 ubuntu     20   0  679M 23536  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 7005 ubuntu     20   0  679M 23536  5104 S  0.0  0.6  0:00.00 │  ├─ pm2: Daemon
 6970 ubuntu     20   0  679M 2

The number of phantom instances has increased and it doesn't seem to go back down. Over a longer period of time (~ couple weeks), eventually I exhaust the physical RAM and the machine has to be reset.

I'm using the API like so:

var render = phantom({ 
  /* other options */
  pool: 5
});

// I'm uploading the pdfs to Amazon S3 via streams (that is what `upload` is):

var upload = s2stream.upload({ /* options */ });
var compress = zlib.createGzip();

upload.on('error', function (err) {
  log.error(err);
  done(err);
});

upload.on('uploaded', function (info) {
  log.info(info);
  done (null, info);
});

render(url, {
  /* options */
}).pipe(compress)
  .pipe(upload);

If I do render.destroy( ) like this:

upload.on('error', function (err) { render.destroy(); done(err); });
upload.on('uploaded', function (info) { render.destroy(); done(); });

It seems to clean up and not have the leaks, but it also doesn't seem to keep the pool available, so on every new print job, it spins up 5 more instances of phantom, processes via one of them, and then shuts it all down. In essence, I can stop the leak from happening, but I no longer have the advantage of using the pool at all.

So, I'm wondering if I'm using the API wrong, or is there another issue here?

I do have a fork of your repo that is older and not up to date with the latest, so I will investigate a fetch upstream and see what has changed, but I wanted to just pose the question here.

Thanks!

davisford avatar Jan 31 '15 20:01 davisford

What is your maxRenders value set to? Recycling the workers periodically should stabilize the memory usage if its in the phantom process. The default value is 20.

markstos avatar Jan 31 '15 21:01 markstos

maxRenders is not set currently, so I guess it would be 20.

The box is an EC2 c3.large with 3.7GB RAM. With the current workload / frequency on this particular node, that seems to last for a few weeks before it just gets bogged down and the system can't allocate RAM anymore. I've noticed just watching htop and issuing jobs manually that sometimes it does not spawn new phantom instances and uses one of the available...but sometimes it does..even under quiescent conditions where I am the only one triggering it in sequential fashion. I've also noticed that sometimes those "new" phantom instances get cleaned up, and sometimes they don't, and after a while -- they pile up.

I haven't investigated the code in great detail on how the pooling works here...haven't had time, just thought I'd reach out quickly first to see if this has been seen before.

For now, I've injected the render.destroy() call just to monitor it and rule out anything else on the box that might be the culprit. That undoubtedly seems to be doing the trick. After about 50 jobs, the memory stays resident ~250MB...spiking only briefly when the job runs.

davisford avatar Feb 01 '15 02:02 davisford

I ran into memory growth today after the tool was used over 100 times today, growing a PhantomJS process to 6.7 Megs.

It turns out that maxRender is only documented to have a default value of 20. The actual value in the code is 500. So try a low maxRenders and see if that helps. You had probably not reached the 500 limit yet. I'm going to try the same.

I would also think an even lower default value than 500 is sensible. At least a couple of use have already run into exhausting RAM before we reach that limit.

markstos avatar Jun 16 '15 23:06 markstos

Oh wow -- I missed that it was being set to 500. Will try to force it to a more sane value.

davisford avatar Jun 28 '15 01:06 davisford

@davisford @markstos Do we still have an issue here?

It might be relevant to re-test when merging #93

hulvei3 avatar Feb 22 '16 10:02 hulvei3

The leakage should be re-tested with Phantom 2.

Having a default maxRender value of 20 vs 500 may still be a better default value.

markstos avatar Feb 22 '16 14:02 markstos

Yes, AFAIK it still leaks. I just basically disabled the pool to keep it stable, so each request spawns a whole new phantom process for me and destroys it at the end. That works for me now.

Unfortunately, I looked into Phantom2 and it has some major issues printing PDFs (i.e. zoom factor is all screwed up) and most people are waiting for a proper fix so I haven't bothered trying to upgrade.

The core tech from atom.io (electro-something) looks more promising as a PDF generator as an alternative to phantom. Might be worth investigating and wrapping it up with the same API here.

davisford avatar Feb 26 '16 14:02 davisford

thanks for the input - definitely worth checking out

hulvei3 avatar Feb 29 '16 09:02 hulvei3

It leaks on Debian GNU/Linux 7.10 (Linux local 3.2.0-4-amd64 SMP Debian 3.2.78-1 x86_64 GNU/Linux), but stable works on Darwin Kernel Version 16.1.0 (MacBook).

The problem is saved with the pool of 1 — it created ~10 phantom processes!

petrgrishin avatar Oct 28 '16 13:10 petrgrishin

@davisford Did you end up looking at atom.io's solution for PDF generation?

FYI to those on the thread, there's a fork of this project at https://github.com/SamMorrowDrums/phantom-render-stream that is currently trying to take many of the outstanding pull requests from this repo and get them out in a new release. Ideally this would be an update to maintainership, but could be published as a fork if this repo remains inactive.

markstos avatar Oct 28 '16 15:10 markstos

Yes, we switched to nightmare.js with electron.io -- it works great. It was a bit of a pain to get working but it has been running in production for months now generating tons of PDFs for us.

davisford avatar Dec 09 '16 12:12 davisford

@davisford Do you recall any tips to make it Nightmare.js for those who might follow in your footsteps to make it less painful?

One thing I notice is that Nightmare.js doesn't seem to have a streaming option, so if you are using phantom-render-stream in a stream-based workflow, a little code will need to be rewritten to adapt to it's file-based APIs.

markstos avatar Dec 09 '16 15:12 markstos

Sorry for the lag in response. One thing...with electron/nightmare, you need to use xvfb..example script to start process:

#!/bin/sh
xvfb-run -a /usr/bin/node $(pwd)/app.js

Here's a snippet of how we use nightmare, we create our own read stream and return...there are some workarounds in there that may not be necessary, and were unique to our own situation where we are generating pdfs of d3/chart-heavy and table heavy pages.

var Nightmare = require('nightmare'),
  fs = require('fs'),
  path = require('path');

module.exports = {
  renderToPDF: function (reportData, reportId) {
    var tmpFileName = path.join(__dirname, reportId + '.pdf');

    return new Nightmare(nightmareOpts)
      .goto(reportData.url + '&format=pdf', {
        'Authorization': 'Bearer ' + reportData.token
      })
      .evaluate(function(token){
        window.printFixJwtToken = token;
      }, reportData.token)
      .forcePrintMedia()
      .wait(function () {
        //Client app sets this to true when data has been
        //  processed and report fully generated
        return window.renderable;
      })
      .wait(300) //to give individual charts animations time to finish
      .pdf(tmpFileName, pdfOpts)
      .end()
      .then(function () {
        //this is what gets passed to consumer in .then function
        var readStream = fs.createReadStream(tmpFileName);
        readStream.on('end', function () {
          fs.unlink(tmpFileName);
        });
        return readStream;
      });
  }
};

davisford avatar Jan 06 '17 16:01 davisford

Thanks.

markstos avatar Jan 06 '17 19:01 markstos