puppeteer-extra icon indicating copy to clipboard operation
puppeteer-extra copied to clipboard

[Bug] production build cant launch puppeteer

Open mihail727 opened this issue 1 year ago • 1 comments

Describe the bug

I am writing a web application for scraping a site using the Nuxt 3 framework. in development mode, everything works, but in the production build, the server throws an error:

Code Snippet

my puppeteer service
import puppeteer from 'puppeteer-extra';
import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import BlockResourcesPlugin from 'puppeteer-extra-plugin-block-resources';
import proxyChain from 'proxy-chain';
import type { Browser } from 'puppeteer';

const ProxyUrl = 'xxxxxxxx';
const userAgents = [
	'Mozilla/5.0 (Linux; Android 4.4; Nexus 5 Build/_BuildID_) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36',
	'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75	Mobile/14E5239e Safari/602.1',
	'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19',
	'Mozilla/5.0 (iPhone; CPU iPhone OS 11_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
];

export async function getPagesContent(urls: string[], targetSelector: string) {
	const newProxyUrl = await proxyChain.anonymizeProxy(ProxyUrl);

	puppeteer.use(StealthPlugin());
	puppeteer.use(AdblockerPlugin({ blockTrackers: true, blockTrackersAndAnnoyances: true }));
	puppeteer.use(
		BlockResourcesPlugin({
			blockedTypes: new Set(['font', 'image']),
		}),
	);

	try {
		const browser = await puppeteer.launch({
			headless: false,
			ignoreHTTPSErrors: true,
			defaultViewport: {
				height: 800,
				width: 480,
			},
			args: [
				'--no-sandbox',
				'--disable-setuid-sandbox',
				'--disable-dev-shm-usage',
				'--disable-accelerated-2d-canvas',
				'--disable-gpu',
				'--window-size=480,800',
				'--lang=ru,ru-RU,en-US,en',
				`--proxy-server=${newProxyUrl}`,
			],
			executablePath: '/usr/bin/chromium-browser',
		});

		const currentUserAgent = getUserAgent();
		const reqList: Promise<any>[] = [];

		console.log(urls);
		urls?.forEach?.((url) => {
			reqList.push(getPageContent(browser, url, targetSelector, currentUserAgent));
		});

		const data = await Promise.all(reqList);

		await browser.close();
		await proxyChain.closeAnonymizedProxy(newProxyUrl, true);

		return data as string[];
	} catch (error) {
		throw error;
	}
}

async function getPageContent(
	browser: Browser,
	url: string,
	targetSelector: string,
	userAgent: string,
) {
	const page = await browser.newPage();
	await page.setUserAgent(userAgent);
	await page.setExtraHTTPHeaders({
		'Accept-Language': 'ru,ru-RU,en-US,en',
	});
	await page.evaluateOnNewDocument(() => {
		Object.defineProperty(navigator, 'language', {
			get: function () {
				return 'ru-RU';
			},
		});
		Object.defineProperty(navigator, 'languages', {
			get: function () {
				return ['ru', 'ru-RU', 'en-US', 'en'];
			},
		});
	});
	await page.goto(url, { waitUntil: 'networkidle2' });

	const searchedBlock = await page.waitForSelector(targetSelector);

	let content = await searchedBlock.evaluate((el) => el.textContent);
	content = content.replaceAll(/[₽\s]/gm, '').trim();
	await page.close();
	return content;
}

function getUserAgent() {
	return userAgents[Math.floor(Math.random() * userAgents.length)];
}

my package.json
{
	"private": true,
	"version": "0.0.0",
	"scripts": {
		"build": "nuxt build",
		"dev": "nuxt dev",
		"generate": "nuxt generate",
		"preview": "nuxt preview ./build",
		"bns": "yarn build; yarn preview"
	},
	"devDependencies": {
		"@heroicons/vue": "^2.0.10",
		"@nuxtjs/tailwindcss": "^5.3.2",
		"@tailwindcss/forms": "^0.5.3",
		"@types/bcrypt": "^5.0.0",
		"@types/jsonwebtoken": "^8.5.9",
		"@types/jwt-decode": "^3.1.0",
		"@types/node": "^18.7.15",
		"@types/nodemailer": "^6.4.5",
		"@vueuse/core": "^9.1.1",
		"@vueuse/nuxt": "^9.1.1",
		"nuxt": "^3.0.0-rc.9",
		"@types/puppeteer": "^5.4.6",
		"sass": "^1.54.8"
	},
	"dependencies": {
		"bcrypt": "^5.0.1",
		"dotenv": "^16.0.2",
		"jsonwebtoken": "^8.5.1",
		"jwt-decode": "^3.1.2",
		"nodemailer": "^6.7.8",
		"pg": "^8.8.0",
		"proxy-chain": "^2.0.7",
		"puppeteer": "^17.1.1",
		"puppeteer-extra": "^3.3.4",
		"puppeteer-extra-plugin-adblocker": "^2.13.5",
		"puppeteer-extra-plugin-block-resources": "^2.4.2",
		"puppeteer-extra-plugin-stealth": "^2.11.1",
		"reflect-metadata": "^0.1.13",
		"typeorm": "^0.3.9"
	}
}

Versions

System: OS: Linux 5.19 Nobara Linux 36 (Thirty Six) CPU: (4) x64 Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz Memory: 8.61 GB / 15.59 GB Container: Yes Shell: 5.8.1 - /usr/bin/zsh Binaries: Node: 16.14.0 - /usr/bin/node Yarn: 1.22.17 - /usr/bin/yarn npm: 8.3.1 - /usr/bin/npm npmPackages: puppeteer: ^17.1.1 => 17.1.1 puppeteer-extra: ^3.3.4 => 3.3.4 puppeteer-extra-plugin-adblocker: ^2.13.5 => 2.13.5 puppeteer-extra-plugin-block-resources: ^2.4.2 => 2.4.2 puppeteer-extra-plugin-stealth: ^2.11.1 => 2.11.1

mihail727 avatar Sep 06 '22 19:09 mihail727

the solution for me was - installing on the hosting in the project folder, after the project was built, two dependencies yarn add puppeteer-extra-plugin-user-preferences yarn add puppeteer-extra-plugin-stealth the most interesting thing is that I don't use puppeteer-extra-plugin-user-preferences inside the project

mihail727 avatar Sep 09 '22 17:09 mihail727

Happened to me after upgrading the stealth plugin and puppeteer-extra. It is listed as a regular dependency though, not a peer-dependency which is just weird. The same for data-dir plugin.

radoslavkarlik avatar Oct 02 '22 08:10 radoslavkarlik