botasaurus icon indicating copy to clipboard operation
botasaurus copied to clipboard

Unable to Bypass Cloudflare Captcha

Open hazhayder opened this issue 4 months ago • 0 comments

I have been trying to scrape Upwork and I am unable to bypass the captcha

Image
from botasaurus.browser import browser, Driver
from selenium.webdriver.common.by import By
import re

@browser(
    wait_for_complete_page_load=True,
    headless=False
)
def scrape_heading_task(driver: Driver, data):
    # Visit the Omkar Cloud website
    # Retrieve the heading element's text
    driver.get(data, True)
    links = driver.get_all_links()
    linkedin_links = [link for link in links if "linkedin.com" in link]
    emails = extract_emails(links)
    return {
        "linkedin_links": linkedin_links,
        "emails": emails
    }


def extract_emails(strings):
    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
    emails = []

    for string in strings:
        found_emails = re.findall(email_pattern, string)
        emails.extend(found_emails)

    return emails


# Initiate the web scraping task
scrape_heading_task(["https://www.upwork.com/nx/search/jobs/?amount=1000-4999,5000-&client_hires=1-9,10-&hourly_rate=15-&nbs=1&payment_verified=1&q=NOT%20%28Wordpress,%20OR%20Drupal,%20OR%20Joomla,%20OR%20Typo3,%20OR%20Shopify,%20OR%20Shopify%20OR%20Templates,%20OR%20Shopify%20OR%20Theme,%20OR%20Shopify%20OR%20Apps,%20OR%20Shopify%20OR%20Development,%20OR%20Shopify%20OR%20Website%20OR%20Redesign%29&sort=recency&subcategory2_uid=531770282589057029,531770282584862733&t=0,1"])

Please note the code and what I am trying would not make sense but I just wanted to see the capability and verify claims.

hazhayder avatar Aug 22 '25 07:08 hazhayder