selenium-wire
selenium-wire copied to clipboard
Cloudflare on v2 and No Proxies on V1
from seleniumwire.undetected_chromedriver import Chrome, ChromeOptions
from seleniumwire import webdriver
import time
chrome_options = ChromeOptions()
chrome_options.headless = False
driver = Chrome(options=chrome_options, seleniumwire_options={
'disable_capture':True,
'proxy': {
'http': 'http://user:[email protected]:30000'
}
}, use_subprocess=True)
driver.get('https://api.myip.com/')
time.sleep(2)
driver.quit()
Using v2 triggers Cloudflare, so it's not possible to use but it does allow the use of proxies. However, if you use v1 you can evade Cloudflare but you can't add proxies, it just bypasses them.
Any help greatly appreciated.
Unfortunately, I'm using SmartProxy as a provider and the format is slightly different, so I'm unable to use the above.
http://user:pass@host:port
Hopefully, however, some of you find the above useful. I noted that some users were charging for this information, which on an open-source project is quite outrageous.
Hope it's okay to share here @wkeeling but a workaround that should be okay for most people is as follows
import os
import zipfile
import time
import undetected_chromedriver.v2 as uc
import threading
from selenium.webdriver.common.keys import Keys
def create_chromedriver(PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS, USER_AGENT):
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS )
def get_chromedriver(use_proxy=True, user_agent=USER_AGENT):
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = uc.ChromeOptions()
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
chrome_options.add_experimental_option("detach", True)
chrome_options.add_argument("--mute-audio")
if user_agent:
chrome_options.add_argument('--user-agent=%s' % USER_AGENT)
driver = uc.Chrome(chrome_options=chrome_options)
return driver
driver = get_chromedriver(use_proxy=True)
# driver.get('https://www.google.com/search?q=my+ip+address')
driver.get("https://whatismyipaddress.com/")
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
create_chromedriver(PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS, USER_AGENT)
I really need this to work, and it seems like I am so close...but although this code works and beats Cloudflare, it won't use the specified proxy, but doesn't throw any kind of error. The added code is for headless use and a specific binary.
Environment: Ubuntu 21, headless Python 3.9.7
import os
import time
import zipfile
from selenium.webdriver.chrome.service import Service
from loguru import logger
import random
import undetected_chromedriver.v2 as uc
# proxy settings for testing
proxy_data = {'proxy_ip': 'gate.dc.smartproxy.com', 'proxy_port': 27923, 'proxy_auth': 'LP', 'proxy_user': 'xxx', 'proxy_pass': 'JfAp7xxxxxxxeXP8k'}
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
def main():
# setting up proxy
proxy_ip = proxy_data["proxy_ip"]
proxy_port = proxy_data["proxy_port"]
proxy_user = proxy_data["proxy_user"]
proxy_pass = proxy_data["proxy_pass"]
# -------------------------------
logger.info("starting application")
# -------
logger.info("cresting extension")
create_extension(proxy_ip, proxy_port, proxy_user, proxy_pass, user_agent)
# setup binary
dir_path = os.path.dirname(os.path.realpath(__file__))
chromium_path = os.path.join(dir_path, "bin",
"chromium-browser")
chromedriver_path = os.path.join(dir_path, "bin", "chromedriver")
ser = Service(chromedriver_path)
options = uc.ChromeOptions()
options.binary_location = chromium_path
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument("--disable-gpu")
options.add_argument("--disable-setuid-sandbox")
dbug_port = random.randrange(20000,30000)
options.add_argument(f"--remote-debugging-port={dbug_port}")
options.add_argument("--disable-dev-shm-using")
#options.add_argument("--disable-extensions")
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
# -------------------------------
pluginfile = 'proxy_auth_plugin.zip'
ext_path = os.path.join(dir_path, pluginfile)
options.add_extension(ext_path)
time.sleep(1)
#options.set_capability("detach", True)
#options.add_experimental_option("excludeSwitches", ["enable-automation"])
#options.add_experimental_option('useAutomationExtension', False)
#options.add_experimental_option("detach", True)
options.add_argument("--mute-audio")
# --------------
logger.info("chromium_path:")
logger.debug(chromium_path)
logger.info("chromedriver_path:")
logger.debug(chromedriver_path)
try:
driver = uc.Chrome(
service=ser,
options=options,
#seleniumwire_options=sw_options,
)
time.sleep(1)
brow_version = driver.capabilities['browserVersion']
cdri_version = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
data = dict(driver.capabilities)
logger.debug(data)
logger.info(f"browser: {brow_version}")
logger.info(f"driver: {cdri_version}")
logger.success("Good binary")
logger.info("Now checking Corbot IP")
driver.get("https://www.whatismyip.com")
html = driver.page_source
print(html)
time.sleep(5)
driver.quit()
except Exception as e:
logger.warning("Failed")
error = str(e)
logger.warning(e)
def create_extension(PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS, USER_AGENT):
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["*jssdevgroup"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
main()
Ah, it seems the no extension works in headless mode in any variation of Chrome. So...none of the above is going to work in headless mode.
@krypterro If you want to run the code in server with extension then you have to use virtual display such as Xvfb and donot set headless mode to True in options and you can follow a python package for it xvfbwrapper
scheme: "http",
Good solution, but how to work with SOCKS5 proxies with authentication? I've tried with schemes socks, socks4 , socks5, but without any success. Any ideas how to do that?