requests-html icon indicating copy to clipboard operation
requests-html copied to clipboard

A Browser closed issue

Open defaul0t opened this issue 1 year ago • 3 comments

Unhandled error: Browser closed unexpectedly:

closedUnhandled error: Browser closed unexpectedly:

my code

from asyncio import events import uvloop import requests import asyncio, time import re import argparse import sys import threading from requests_html import AsyncHTMLSession, HTMLSession import urllib3 from pyppeteer import launch import hashlib import os

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0', 'Content-Encoding': 'gzip' }

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

def get_url(url_txt): with open(url_txt, "r") as f: s = f.readlines() lt = [i.strip() for i in s] return lt

def output_data(i, out_name): with open(out_name, "a", encoding='utf-8') as f: f.write(i + "\n")

def get_md5_value(src): myMd5 = hashlib.md5() myMd5.update(src.encode("utf8")) myMd5_Digest = myMd5.hexdigest() return myMd5_Digest

async def process_data(sem, s, i, None_data_list): async with sem: try:

        r = await s.get(url=i, timeout=30, headers=headers, verify=False)

        await r.html.arender(wait=30, sleep=30, timeout=30, retries=1)
        content_length = len(r.content)
        code = r.status_code
        content = r.html.html.replace('\r', '').replace('\n', '').replace(' ', '')
        body_md5 = get_md5_value(str(content))

        if '<title>' in content:
            title = re.findall('(?<=<title>)(.+?)(?=</title>)', content)[0]
        elif r.html.find('title', first=True):
            title = r.html.find('title', first=True).text
        else:
            title = 'None'
            output_data(i, 'real_None.txt')
        print(f'{i} {r.status_code}, {title}')
        data = [str(code), str(title), str(content_length), body_md5, str(i)]
        None_data_list.append(data)

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
    except BaseException as e:
        print(f"Unhandled error: {e}")
        # Close only the current browser instance if possible

async def start_up(urls, None_data_list, timeout_duration=3000): s = AsyncHTMLSession(verify=False) sem = asyncio.Semaphore(3) tasks = (process_data(sem, s, url, None_data_list) for url in urls) await asyncio.wait_for(asyncio.gather(*tasks), timeout=timeout_duration) await s.close()

def main(urls): None_data_list = [] try: start = time.perf_counter() print(urls) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(start_up(urls, None_data_list)) end = time.perf_counter() print(f'None_Scan : {end - start} ') output_data(str(end - start), 'debug_time.txt') print('') except asyncio.TimeoutError: print("Timeout occurred") except Exception as e: print(e) finally: print(len(None_data_list)) os.system('pkill -f -9 chrome') return None_data_list

test.py new_request_None_url = ['http://bi-mokadisplay.tcl.com:83','http://tmsa.cmp.tcl.com:88']

update_data_list = nonetitle_info.main(new_request_None_url)

print(update_data_list)

#data_info.none_update(False, update_data_list)

defaul0t avatar May 14 '23 13:05 defaul0t

figure it out?

aehlke avatar May 23 '23 23:05 aehlke

This project uses pyppeteer which is uses very old version of Chromium. This is easily fixable. You can check my comment on another issue here. Let me know if this helps.

ajatkj avatar Aug 01 '23 15:08 ajatkj

I forked this project and updated it to use playwright. see: #573

cboin1996 avatar Apr 17 '24 02:04 cboin1996