phantom-render-stream
phantom-render-stream copied to clipboard
Pooling + Memory Leaks
Hiya, I'm seeing some serious leakage going on when using the pool feature. I may very well be using the API wrong, but can you take a quick look?
Here's what's going on with htop (showing threads) under normal conditions (after doing a few print jobs):
6969 ubuntu 20 0 679M 23440 5104 S 0.0 0.6 0:00.41 ├─ pm2: Daemon
20465 ubuntu 20 0 967M 51288 6772 S 0.0 1.3 0:00.83 │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20493 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:06.33 │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stre
23500 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20593 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.01 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20568 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.01 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20527 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20511 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20500 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.00 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20497 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.03 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20496 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.00 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20495 ubuntu 20 0 2146M 320M 19344 S 0.0 8.5 0:00.00 │ │ │ └─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20492 ubuntu 20 0 967M 51288 6772 S 0.0 1.3 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20491 ubuntu 20 0 967M 51288 6772 S 0.0 1.3 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20490 ubuntu 20 0 967M 51288 6772 S 0.0 1.3 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20489 ubuntu 20 0 967M 51288 6772 S 0.0 1.3 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20466 ubuntu 20 0 967M 51288 6772 S 0.0 1.3 0:00.02 │ │ └─ node /home/ubuntu/git/warehouse-print/app.js
7008 ubuntu 20 0 679M 23440 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
7007 ubuntu 20 0 679M 23440 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
7006 ubuntu 20 0 679M 23440 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
7005 ubuntu 20 0 679M 23440 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
6970 ubuntu 20 0 679M 23440 5104 S 0.0 0.6 0:00.01 │ └─ pm2: Daemon
Now, I run a few more print jobs:
36816 6136 1528 S 0.0 0.2 0:01.38 /sbin/init
6969 ubuntu 20 0 679M 23536 5104 S 0.0 0.6 0:00.46 ├─ pm2: Daemon
20465 ubuntu 20 0 970M 55028 6772 S 0.0 1.4 0:01.05 │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20493 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:14.16 │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stre
29349 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.01 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29341 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.01 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29336 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.00 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29331 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29322 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29314 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29296 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
29285 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
23500 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20593 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.01 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20568 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.01 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20527 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20511 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.02 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20500 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.00 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20497 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.03 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20496 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.00 │ │ │ ├─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20495 ubuntu 20 0 2871M 636M 19344 S 0.0 16.9 0:00.00 │ │ │ └─ /home/ubuntu/git/warehouse-print/node_modules/phantom-render-stream/node_modules/phantomjs/lib/phantom/bin/phantomjs --disk-cache=true /home/ubuntu/git/warehouse-print/node_modules/phantom-render-s
20492 ubuntu 20 0 970M 55028 6772 S 0.0 1.4 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20491 ubuntu 20 0 970M 55028 6772 S 0.0 1.4 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20490 ubuntu 20 0 970M 55028 6772 S 0.0 1.4 0:00.01 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20489 ubuntu 20 0 970M 55028 6772 S 0.0 1.4 0:00.00 │ │ ├─ node /home/ubuntu/git/warehouse-print/app.js
20466 ubuntu 20 0 970M 55028 6772 S 0.0 1.4 0:00.02 │ │ └─ node /home/ubuntu/git/warehouse-print/app.js
7008 ubuntu 20 0 679M 23536 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
7007 ubuntu 20 0 679M 23536 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
7006 ubuntu 20 0 679M 23536 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
7005 ubuntu 20 0 679M 23536 5104 S 0.0 0.6 0:00.00 │ ├─ pm2: Daemon
6970 ubuntu 20 0 679M 2
The number of phantom instances has increased and it doesn't seem to go back down. Over a longer period of time (~ couple weeks), eventually I exhaust the physical RAM and the machine has to be reset.
I'm using the API like so:
var render = phantom({
/* other options */
pool: 5
});
// I'm uploading the pdfs to Amazon S3 via streams (that is what `upload` is):
var upload = s2stream.upload({ /* options */ });
var compress = zlib.createGzip();
upload.on('error', function (err) {
log.error(err);
done(err);
});
upload.on('uploaded', function (info) {
log.info(info);
done (null, info);
});
render(url, {
/* options */
}).pipe(compress)
.pipe(upload);
If I do render.destroy( )
like this:
upload.on('error', function (err) { render.destroy(); done(err); });
upload.on('uploaded', function (info) { render.destroy(); done(); });
It seems to clean up and not have the leaks, but it also doesn't seem to keep the pool available, so on every new print job, it spins up 5 more instances of phantom, processes via one of them, and then shuts it all down. In essence, I can stop the leak from happening, but I no longer have the advantage of using the pool at all.
So, I'm wondering if I'm using the API wrong, or is there another issue here?
I do have a fork of your repo that is older and not up to date with the latest, so I will investigate a fetch upstream and see what has changed, but I wanted to just pose the question here.
Thanks!
What is your maxRenders
value set to? Recycling the workers periodically should stabilize the memory usage if its in the phantom process. The default value is 20.
maxRenders
is not set currently, so I guess it would be 20.
The box is an EC2 c3.large with 3.7GB RAM. With the current workload / frequency on this particular node, that seems to last for a few weeks before it just gets bogged down and the system can't allocate RAM anymore. I've noticed just watching htop
and issuing jobs manually that sometimes it does not spawn new phantom instances and uses one of the available...but sometimes it does..even under quiescent conditions where I am the only one triggering it in sequential fashion. I've also noticed that sometimes those "new" phantom instances get cleaned up, and sometimes they don't, and after a while -- they pile up.
I haven't investigated the code in great detail on how the pooling works here...haven't had time, just thought I'd reach out quickly first to see if this has been seen before.
For now, I've injected the render.destroy()
call just to monitor it and rule out anything else on the box that might be the culprit. That undoubtedly seems to be doing the trick. After about 50 jobs, the memory stays resident ~250MB...spiking only briefly when the job runs.
I ran into memory growth today after the tool was used over 100 times today, growing a PhantomJS process to 6.7 Megs.
It turns out that maxRender is only documented to have a default value of 20
. The actual value in the code is 500
. So try a low maxRenders
and see if that helps. You had probably not reached the 500
limit yet. I'm going to try the same.
I would also think an even lower default value than 500 is sensible. At least a couple of use have already run into exhausting RAM before we reach that limit.
Oh wow -- I missed that it was being set to 500. Will try to force it to a more sane value.
@davisford @markstos Do we still have an issue here?
It might be relevant to re-test when merging #93
The leakage should be re-tested with Phantom 2.
Having a default maxRender value of 20
vs 500
may still be a better default value.
Yes, AFAIK it still leaks. I just basically disabled the pool to keep it stable, so each request spawns a whole new phantom process for me and destroys it at the end. That works for me now.
Unfortunately, I looked into Phantom2 and it has some major issues printing PDFs (i.e. zoom factor is all screwed up) and most people are waiting for a proper fix so I haven't bothered trying to upgrade.
The core tech from atom.io (electro-something) looks more promising as a PDF generator as an alternative to phantom. Might be worth investigating and wrapping it up with the same API here.
thanks for the input - definitely worth checking out
It leaks on Debian GNU/Linux 7.10 (Linux local 3.2.0-4-amd64 SMP Debian 3.2.78-1 x86_64 GNU/Linux), but stable works on Darwin Kernel Version 16.1.0 (MacBook).
The problem is saved with the pool of 1 — it created ~10 phantom processes!
@davisford Did you end up looking at atom.io's solution for PDF generation?
FYI to those on the thread, there's a fork of this project at https://github.com/SamMorrowDrums/phantom-render-stream that is currently trying to take many of the outstanding pull requests from this repo and get them out in a new release. Ideally this would be an update to maintainership, but could be published as a fork if this repo remains inactive.
Yes, we switched to nightmare.js with electron.io -- it works great. It was a bit of a pain to get working but it has been running in production for months now generating tons of PDFs for us.
@davisford Do you recall any tips to make it Nightmare.js for those who might follow in your footsteps to make it less painful?
One thing I notice is that Nightmare.js doesn't seem to have a streaming option, so if you are using phantom-render-stream
in a stream-based workflow, a little code will need to be rewritten to adapt to it's file-based APIs.
Sorry for the lag in response. One thing...with electron/nightmare, you need to use xvfb..example script to start process:
#!/bin/sh
xvfb-run -a /usr/bin/node $(pwd)/app.js
Here's a snippet of how we use nightmare, we create our own read stream and return...there are some workarounds in there that may not be necessary, and were unique to our own situation where we are generating pdfs of d3/chart-heavy and table heavy pages.
var Nightmare = require('nightmare'),
fs = require('fs'),
path = require('path');
module.exports = {
renderToPDF: function (reportData, reportId) {
var tmpFileName = path.join(__dirname, reportId + '.pdf');
return new Nightmare(nightmareOpts)
.goto(reportData.url + '&format=pdf', {
'Authorization': 'Bearer ' + reportData.token
})
.evaluate(function(token){
window.printFixJwtToken = token;
}, reportData.token)
.forcePrintMedia()
.wait(function () {
//Client app sets this to true when data has been
// processed and report fully generated
return window.renderable;
})
.wait(300) //to give individual charts animations time to finish
.pdf(tmpFileName, pdfOpts)
.end()
.then(function () {
//this is what gets passed to consumer in .then function
var readStream = fs.createReadStream(tmpFileName);
readStream.on('end', function () {
fs.unlink(tmpFileName);
});
return readStream;
});
}
};
Thanks.