selenium-wire icon indicating copy to clipboard operation
selenium-wire copied to clipboard

Cloudflare on v2 and No Proxies on V1

Open ENUM1 opened this issue 3 years ago • 6 comments

from seleniumwire.undetected_chromedriver import Chrome, ChromeOptions
from seleniumwire import webdriver
import time

chrome_options = ChromeOptions()
chrome_options.headless = False

driver = Chrome(options=chrome_options, seleniumwire_options={
    'disable_capture':True,
    'proxy': {
        'http': 'http://user:[email protected]:30000'
    }
}, use_subprocess=True)

driver.get('https://api.myip.com/')

time.sleep(2)
driver.quit()

Using v2 triggers Cloudflare, so it's not possible to use but it does allow the use of proxies. However, if you use v1 you can evade Cloudflare but you can't add proxies, it just bypasses them.

Any help greatly appreciated.

ENUM1 avatar Feb 05 '22 10:02 ENUM1

Unfortunately, I'm using SmartProxy as a provider and the format is slightly different, so I'm unable to use the above. http://user:pass@host:port

Hopefully, however, some of you find the above useful. I noted that some users were charging for this information, which on an open-source project is quite outrageous.

ENUM1 avatar Feb 07 '22 10:02 ENUM1

Hope it's okay to share here @wkeeling but a workaround that should be okay for most people is as follows

import os
import zipfile
import time
import undetected_chromedriver.v2 as uc
import threading
from selenium.webdriver.common.keys import Keys

def create_chromedriver(PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS, USER_AGENT):
    
    manifest_json = """
    {
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    },
    "minimum_chrome_version":"22.0.0"
    }
    """

    background_js = """
    var config = {
        mode: "fixed_servers",
        rules: {
        singleProxy: {
            scheme: "http",
            host: "%s",
            port: parseInt(%s)
        },
        bypassList: ["localhost"]
        }
    };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
            username: "%s",
            password: "%s"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
    );
    """ % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS )


    def get_chromedriver(use_proxy=True, user_agent=USER_AGENT):
        path = os.path.dirname(os.path.abspath(__file__))
        chrome_options = uc.ChromeOptions()
        if use_proxy:
            pluginfile = 'proxy_auth_plugin.zip'
            with zipfile.ZipFile(pluginfile, 'w') as zp:
                zp.writestr("manifest.json", manifest_json)
                zp.writestr("background.js", background_js)
            chrome_options.add_extension(pluginfile)
            chrome_options.add_experimental_option("detach", True)
            chrome_options.add_argument("--mute-audio")
        if user_agent:
            chrome_options.add_argument('--user-agent=%s' % USER_AGENT)
        driver =  uc.Chrome(chrome_options=chrome_options)
        return driver

    driver = get_chromedriver(use_proxy=True)
    # driver.get('https://www.google.com/search?q=my+ip+address')
    driver.get("https://whatismyipaddress.com/")
    
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'

create_chromedriver(PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS, USER_AGENT)

ENUM1 avatar Feb 07 '22 10:02 ENUM1

I really need this to work, and it seems like I am so close...but although this code works and beats Cloudflare, it won't use the specified proxy, but doesn't throw any kind of error. The added code is for headless use and a specific binary.

Environment: Ubuntu 21, headless Python 3.9.7

import os
import time
import zipfile
from selenium.webdriver.chrome.service import Service
from loguru import logger
import random
import undetected_chromedriver.v2 as uc

# proxy settings for testing
proxy_data =  {'proxy_ip': 'gate.dc.smartproxy.com', 'proxy_port': 27923, 'proxy_auth': 'LP', 'proxy_user': 'xxx', 'proxy_pass': 'JfAp7xxxxxxxeXP8k'}
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'

def main():
    # setting up proxy
    proxy_ip = proxy_data["proxy_ip"]
    proxy_port = proxy_data["proxy_port"]
    proxy_user = proxy_data["proxy_user"]
    proxy_pass = proxy_data["proxy_pass"]
    # -------------------------------
    logger.info("starting application")
    # -------
    logger.info("cresting extension")
    create_extension(proxy_ip, proxy_port, proxy_user, proxy_pass, user_agent)
    # setup binary
    dir_path = os.path.dirname(os.path.realpath(__file__))
    chromium_path = os.path.join(dir_path, "bin",
            "chromium-browser")
    chromedriver_path = os.path.join(dir_path, "bin", "chromedriver")
    ser = Service(chromedriver_path)
    options = uc.ChromeOptions()
    options.binary_location = chromium_path
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument("--disable-gpu")
    options.add_argument("--disable-setuid-sandbox")
    dbug_port = random.randrange(20000,30000)
    options.add_argument(f"--remote-debugging-port={dbug_port}")
    options.add_argument("--disable-dev-shm-using")
    #options.add_argument("--disable-extensions")
    options.add_argument("start-maximized")
    options.add_argument("disable-infobars")
    # -------------------------------
    pluginfile = 'proxy_auth_plugin.zip'
    ext_path = os.path.join(dir_path, pluginfile)
    options.add_extension(ext_path)
    time.sleep(1)
    #options.set_capability("detach", True)
    #options.add_experimental_option("excludeSwitches", ["enable-automation"])

    #options.add_experimental_option('useAutomationExtension', False)
    #options.add_experimental_option("detach", True)
    options.add_argument("--mute-audio")
    # --------------
    logger.info("chromium_path:")
    logger.debug(chromium_path)
    logger.info("chromedriver_path:")
    logger.debug(chromedriver_path)
    try:
        driver = uc.Chrome(
                        service=ser,
                        options=options,
                        #seleniumwire_options=sw_options,
                    )
        time.sleep(1)
        brow_version = driver.capabilities['browserVersion']
        cdri_version = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
        data = dict(driver.capabilities)
        logger.debug(data)
        logger.info(f"browser: {brow_version}")
        logger.info(f"driver: {cdri_version}")
        logger.success("Good binary")
    
        logger.info("Now checking Corbot IP")
        driver.get("https://www.whatismyip.com")
        html = driver.page_source
        print(html)
        time.sleep(5)
        driver.quit()

    except Exception as e:
        logger.warning("Failed")
        error = str(e)
        logger.warning(e)

def create_extension(PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS, USER_AGENT):
    
    manifest_json = """

    {

        "version": "1.0.0",
        "manifest_version": 2,
        "name": "Chrome Proxy",
        "permissions": [
            "proxy",
            "tabs",
            "unlimitedStorage",
            "storage",
            "<all_urls>",
            "webRequest",
            "webRequestBlocking"
        ],
        "background": {
            "scripts": ["background.js"]
        },
        "minimum_chrome_version":"22.0.0"
    }

    """

    background_js = """
    var config = {
            mode: "fixed_servers",
            rules: {
            singleProxy: {
                scheme: "http",
                host: "%s",
                port: parseInt(%s)
            },
            bypassList: ["*jssdevgroup"]
            }
        };

    chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

    function callbackFn(details) {
        return {
            authCredentials: {
                username: "%s",
                password: "%s"
            }
        };
    }

    chrome.webRequest.onAuthRequired.addListener(
                callbackFn,
                {urls: ["<all_urls>"]},
                ['blocking']
    );
    """ % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
    
    pluginfile = 'proxy_auth_plugin.zip'
    with zipfile.ZipFile(pluginfile, 'w') as zp:
        zp.writestr("manifest.json", manifest_json)
        zp.writestr("background.js", background_js)

main()

krypterro avatar Mar 22 '22 03:03 krypterro

Ah, it seems the no extension works in headless mode in any variation of Chrome. So...none of the above is going to work in headless mode.

krypterro avatar Mar 23 '22 00:03 krypterro

@krypterro If you want to run the code in server with extension then you have to use virtual display such as Xvfb and donot set headless mode to True in options and you can follow a python package for it xvfbwrapper

pawanpaudel93 avatar Apr 06 '22 11:04 pawanpaudel93

            scheme: "http",

Good solution, but how to work with SOCKS5 proxies with authentication? I've tried with schemes socks, socks4 , socks5, but without any success. Any ideas how to do that?

kyxaxa avatar Jun 30 '22 09:06 kyxaxa