crawl4ai icon indicating copy to clipboard operation
crawl4ai copied to clipboard

[Bug]: - api - ERROR - Crawl error: cannot access local variable 'sig' where it is not associated with a value

Open main888 opened this issue 4 weeks ago β€’ 2 comments

crawl4ai version

0.7.6

Expected Behavior

crawler_pool.py (new file)

import asyncio, json, hashlib, time, psutil from contextlib import suppress from typing import Dict from crawl4ai import AsyncWebCrawler, BrowserConfig from typing import Dict from utils import load_config

CONFIG = load_config()

POOL: Dict[str, AsyncWebCrawler] = {} LAST_USED: Dict[str, float] = {} LOCK = asyncio.Lock()

MEM_LIMIT = CONFIG.get("crawler", {}).get("memory_threshold_percent", 95.0) # % RAM – refuse new browsers above this IDLE_TTL = CONFIG.get("crawler", {}).get("pool", {}).get("idle_ttl_sec", 1800) # close if unused for 30β€―min

def sig(cfg: BrowserConfig) -> str: """Generate a signature for the browser config, handling non-serializable objects like ProxyConfig.""" def make_serializable(obj): """Recursively convert objects to JSON-serializable format.""" if isinstance(obj, dict): return {k: make_serializable(v) for k, v in obj.items()} elif isinstance(obj, (list, tuple)): return [make_serializable(item) for item in obj] elif hasattr(obj, 'dict'): # Handle objects (like ProxyConfig), extract public attributes result = {} for key, value in obj.dict.items(): if not key.startswith(''): result[key] = make_serializable(value) return result elif hasattr(obj, 'class'): # Other object types, convert to string representation return str(obj) else: return obj

try:
    # Convert config to dict
    if hasattr(cfg, 'to_dict'):
        cfg_dict = cfg.to_dict()
    elif hasattr(cfg, '__dict__'):
        cfg_dict = cfg.__dict__.copy()
    else:
        cfg_dict = dict(cfg) if isinstance(cfg, dict) else str(cfg)
    
    # Serialize config, handling ProxyConfig and other non-serializable objects
    serialized = make_serializable(cfg_dict)
    payload = json.dumps(serialized, sort_keys=True, separators=(",", ":"))
    return hashlib.sha1(payload.encode()).hexdigest()
except Exception as e:
    # Fallback: use object ID and type name if serialization fails
    fallback = f"{type(cfg).__name__}_{id(cfg)}"
    return hashlib.sha1(fallback.encode()).hexdigest()

async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler: sig = None # Initialize sig before try block to avoid UnboundLocalError try: sig = _sig(cfg) async with LOCK: if sig in POOL: LAST_USED[sig] = time.time() return POOL[sig] if psutil.virtual_memory().percent >= MEM_LIMIT: raise MemoryError("RAM pressure – new browser denied") crawler = AsyncWebCrawler(config=cfg, thread_safe=False) await crawler.start() POOL[sig] = crawler LAST_USED[sig] = time.time() return crawler except MemoryError as e: raise MemoryError(f"RAM pressure – new browser denied: {e}") except Exception as e: # Ensure sig has a value even if _sig() failed if sig is None: try: sig = _sig(cfg) except: sig = "error_fallback" raise RuntimeError(f"Failed to start browser: {e}") finally: # Only access sig if it was successfully initialized if sig is not None: if sig in POOL: LAST_USED[sig] = time.time() else: # If we failed to start the browser, ensure it's not in the pool POOL.pop(sig, None) LAST_USED.pop(sig, None) async def close_all(): async with LOCK: await asyncio.gather(*(c.close() for c in POOL.values()), return_exceptions=True) POOL.clear(); LAST_USED.clear()

async def janitor(): while True: await asyncio.sleep(60) now = time.time() async with LOCK: for sig, crawler in list(POOL.items()): if now - LAST_USED[sig] > IDLE_TTL: with suppress(Exception): await crawler.close() POOL.pop(sig, None); LAST_USED.pop(sig, None)

Current Behavior

An error occurred when using a proxy during local deployment

Is this reproducible?

Yes

Inputs Causing the Bug

Traceback (most recent call last):
  File "/app/crawler_pool.py", line 24, in get_crawler
    sig = _sig(cfg)
          ^^^^^^^^^
  File "/app/crawler_pool.py", line 19, in _sig
    payload = json.dumps(cfg.to_dict(), sort_keys=True, separators=(",",":"))
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/json/__init__.py", line 238, in dumps
    **kw).encode(obj)
          ^^^^^^^^^^^
  File "/usr/local/lib/python3.12/json/encoder.py", line 200, in encode
    chunks = self.iterencode(o, _one_shot=True)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/json/encoder.py", line 258, in iterencode
    return _iterencode(o, 0)
           ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/json/encoder.py", line 180, in default
    raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type ProxyConfig is not JSON serializable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/app/crawler_pool.py", line 38, in get_crawler
    raise RuntimeError(f"Failed to start browser: {e}")
RuntimeError: Failed to start browser: Object of type ProxyConfig is not JSON serializable

Steps to Reproduce


Code snippets


OS

windows

Python version

3.9.0

Browser

No response

Browser version

No response

Error logs & Screenshots (if applicable)

No response

main888 avatar Nov 12 '25 06:11 main888