puppeteer-extra
puppeteer-extra copied to clipboard
[Bug] production build cant launch puppeteer
Describe the bug
I am writing a web application for scraping a site using the Nuxt 3 framework.
in development mode, everything works, but in the production build, the server throws an error:
Code Snippet
my puppeteer service
import puppeteer from 'puppeteer-extra';
import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import BlockResourcesPlugin from 'puppeteer-extra-plugin-block-resources';
import proxyChain from 'proxy-chain';
import type { Browser } from 'puppeteer';
const ProxyUrl = 'xxxxxxxx';
const userAgents = [
'Mozilla/5.0 (Linux; Android 4.4; Nexus 5 Build/_BuildID_) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1',
'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19',
'Mozilla/5.0 (iPhone; CPU iPhone OS 11_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
];
export async function getPagesContent(urls: string[], targetSelector: string) {
const newProxyUrl = await proxyChain.anonymizeProxy(ProxyUrl);
puppeteer.use(StealthPlugin());
puppeteer.use(AdblockerPlugin({ blockTrackers: true, blockTrackersAndAnnoyances: true }));
puppeteer.use(
BlockResourcesPlugin({
blockedTypes: new Set(['font', 'image']),
}),
);
try {
const browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true,
defaultViewport: {
height: 800,
width: 480,
},
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=480,800',
'--lang=ru,ru-RU,en-US,en',
`--proxy-server=${newProxyUrl}`,
],
executablePath: '/usr/bin/chromium-browser',
});
const currentUserAgent = getUserAgent();
const reqList: Promise<any>[] = [];
console.log(urls);
urls?.forEach?.((url) => {
reqList.push(getPageContent(browser, url, targetSelector, currentUserAgent));
});
const data = await Promise.all(reqList);
await browser.close();
await proxyChain.closeAnonymizedProxy(newProxyUrl, true);
return data as string[];
} catch (error) {
throw error;
}
}
async function getPageContent(
browser: Browser,
url: string,
targetSelector: string,
userAgent: string,
) {
const page = await browser.newPage();
await page.setUserAgent(userAgent);
await page.setExtraHTTPHeaders({
'Accept-Language': 'ru,ru-RU,en-US,en',
});
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'language', {
get: function () {
return 'ru-RU';
},
});
Object.defineProperty(navigator, 'languages', {
get: function () {
return ['ru', 'ru-RU', 'en-US', 'en'];
},
});
});
await page.goto(url, { waitUntil: 'networkidle2' });
const searchedBlock = await page.waitForSelector(targetSelector);
let content = await searchedBlock.evaluate((el) => el.textContent);
content = content.replaceAll(/[₽\s]/gm, '').trim();
await page.close();
return content;
}
function getUserAgent() {
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
my package.json
{
"private": true,
"version": "0.0.0",
"scripts": {
"build": "nuxt build",
"dev": "nuxt dev",
"generate": "nuxt generate",
"preview": "nuxt preview ./build",
"bns": "yarn build; yarn preview"
},
"devDependencies": {
"@heroicons/vue": "^2.0.10",
"@nuxtjs/tailwindcss": "^5.3.2",
"@tailwindcss/forms": "^0.5.3",
"@types/bcrypt": "^5.0.0",
"@types/jsonwebtoken": "^8.5.9",
"@types/jwt-decode": "^3.1.0",
"@types/node": "^18.7.15",
"@types/nodemailer": "^6.4.5",
"@vueuse/core": "^9.1.1",
"@vueuse/nuxt": "^9.1.1",
"nuxt": "^3.0.0-rc.9",
"@types/puppeteer": "^5.4.6",
"sass": "^1.54.8"
},
"dependencies": {
"bcrypt": "^5.0.1",
"dotenv": "^16.0.2",
"jsonwebtoken": "^8.5.1",
"jwt-decode": "^3.1.2",
"nodemailer": "^6.7.8",
"pg": "^8.8.0",
"proxy-chain": "^2.0.7",
"puppeteer": "^17.1.1",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-adblocker": "^2.13.5",
"puppeteer-extra-plugin-block-resources": "^2.4.2",
"puppeteer-extra-plugin-stealth": "^2.11.1",
"reflect-metadata": "^0.1.13",
"typeorm": "^0.3.9"
}
}
Versions
System: OS: Linux 5.19 Nobara Linux 36 (Thirty Six) CPU: (4) x64 Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz Memory: 8.61 GB / 15.59 GB Container: Yes Shell: 5.8.1 - /usr/bin/zsh Binaries: Node: 16.14.0 - /usr/bin/node Yarn: 1.22.17 - /usr/bin/yarn npm: 8.3.1 - /usr/bin/npm npmPackages: puppeteer: ^17.1.1 => 17.1.1 puppeteer-extra: ^3.3.4 => 3.3.4 puppeteer-extra-plugin-adblocker: ^2.13.5 => 2.13.5 puppeteer-extra-plugin-block-resources: ^2.4.2 => 2.4.2 puppeteer-extra-plugin-stealth: ^2.11.1 => 2.11.1
the solution for me was - installing on the hosting in the project folder, after the project was built, two dependencies
yarn add puppeteer-extra-plugin-user-preferences
yarn add puppeteer-extra-plugin-stealth
the most interesting thing is that I don't use puppeteer-extra-plugin-user-preferences
inside the project
Happened to me after upgrading the stealth plugin and puppeteer-extra. It is listed as a regular dependency though, not a peer-dependency which is just weird. The same for data-dir plugin.