botasaurus
botasaurus copied to clipboard
Unable to Bypass Cloudflare Captcha
I have been trying to scrape Upwork and I am unable to bypass the captcha
from botasaurus.browser import browser, Driver
from selenium.webdriver.common.by import By
import re
@browser(
wait_for_complete_page_load=True,
headless=False
)
def scrape_heading_task(driver: Driver, data):
# Visit the Omkar Cloud website
# Retrieve the heading element's text
driver.get(data, True)
links = driver.get_all_links()
linkedin_links = [link for link in links if "linkedin.com" in link]
emails = extract_emails(links)
return {
"linkedin_links": linkedin_links,
"emails": emails
}
def extract_emails(strings):
email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
emails = []
for string in strings:
found_emails = re.findall(email_pattern, string)
emails.extend(found_emails)
return emails
# Initiate the web scraping task
scrape_heading_task(["https://www.upwork.com/nx/search/jobs/?amount=1000-4999,5000-&client_hires=1-9,10-&hourly_rate=15-&nbs=1&payment_verified=1&q=NOT%20%28Wordpress,%20OR%20Drupal,%20OR%20Joomla,%20OR%20Typo3,%20OR%20Shopify,%20OR%20Shopify%20OR%20Templates,%20OR%20Shopify%20OR%20Theme,%20OR%20Shopify%20OR%20Apps,%20OR%20Shopify%20OR%20Development,%20OR%20Shopify%20OR%20Website%20OR%20Redesign%29&sort=recency&subcategory2_uid=531770282589057029,531770282584862733&t=0,1"])
Please note the code and what I am trying would not make sense but I just wanted to see the capability and verify claims.