botasaurus
botasaurus copied to clipboard
solved browser tabs issue
this is a function for future and any one who straggled with the tab switching
# tab_switcher.py (extract)
from typing import Any, Optional, Sequence
import time
import logging
import traceback
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)
class TabSwitchError(RuntimeError):
"""Raised when a tab switch fails and raise_on_fail=True."""
def _attempt_bring_to_front(selenium_like_driver: Any) -> (bool):
"""Try common CDP / Selenium calls to bring browser to front."""
tried = []
try:
if hasattr(selenium_like_driver, "execute_cdp_cmd"):
selenium_like_driver.execute_cdp_cmd("Page.bringToFront", {})
return True, "execute_cdp_cmd(Page.bringToFront)"
except Exception as e:
tried.append(("execute_cdp_cmd", str(e)))
try:
exec_func = getattr(selenium_like_driver, "execute_cdp_cmd", None)
if callable(exec_func):
exec_func("Page.bringToFront", {})
return True, "execute_cdp_cmd via getattr"
except Exception as e:
tried.append(("execute_cdp_cmd-getattr", str(e)))
try:
handles = None
if hasattr(selenium_like_driver, "window_handles"):
handles = selenium_like_driver.window_handles
elif hasattr(selenium_like_driver, "get_window_handles"):
handles = selenium_like_driver.get_window_handles()
if handles:
selenium_like_driver.switch_to.window(handles[-1])
return True, "switch_to.window(last_handle)"
except Exception as e:
tried.append(("switch_to.window", str(e)))
try:
if hasattr(selenium_like_driver, "maximize_window"):
selenium_like_driver.maximize_window()
return True, "maximize_window"
except Exception as e:
tried.append(("maximize_window", str(e)))
return False, tried
def _get_underlying_driver(driver: Any) -> Optional[Any]:
"""Detect underlying selenium-like driver inside wrapper."""
candidates = ["_driver", "driver", "_browser", "_webdriver", "raw_driver", "_raw_driver"]
for name in candidates:
if hasattr(driver, name):
obj = getattr(driver, name)
if obj is None:
continue
if any(hasattr(obj, a) for a in ("execute_cdp_cmd", "switch_to", "window_handles", "maximize_window")):
return obj
if hasattr(obj, "driver"):
inner = getattr(obj, "driver")
if inner and any(hasattr(inner, a) for a in ("execute_cdp_cmd", "switch_to", "window_handles", "maximize_window")):
return inner
if any(hasattr(driver, a) for a in ("execute_cdp_cmd", "switch_to", "window_handles", "maximize_window")):
return driver
return None
def _resolve_tabs(driver: Any, tabs: Optional[Sequence[Any]]) -> Sequence[Any]:
"""Return the tabs sequence or auto-discover from driver._browser.tabs."""
if tabs:
return tabs
maybe = getattr(driver, "_browser", None) or getattr(driver, "browser", None)
if maybe is not None:
if isinstance(maybe, (list, tuple)):
return maybe
if hasattr(maybe, "tabs"):
return getattr(maybe, "tabs")
raise ValueError("Unable to resolve tabs. Provide 'tabs' or ensure driver._browser.tabs exists.")
def switch_to_tab_by_index(
driver: Any,
tabs: Optional[Sequence[Any]],
index: int,
*,
base: int = 0,
timeout: float = 5.0,
bring_to_front: bool = True,
raise_on_fail: bool = False
) -> Optional[Any]:
"""
Switch to a tab by integer index in a robust, reusable way.
Parameters
----------
driver : Botasaurus Driver or Selenium-like driver instance.
tabs : sequence of tab objects (or None to auto-discover driver._browser.tabs).
index : integer selecting the tab. Interpretation depends on `base`.
base : 0 for zero-based indexing (default). 1 for one-based indexing (1 -> first tab).
timeout : seconds to wait / confirm (default 5.0).
bring_to_front : attempt CDP / window handle fallbacks to show the tab visually.
raise_on_fail : raise TabSwitchError on final failure if True.
Returns
-------
The tab object on success, else None (or raises if raise_on_fail=True).
"""
try:
tabs_seq = _resolve_tabs(driver, tabs)
except Exception as e:
msg = f"Failed to resolve tabs: {e}"
logger.error(msg)
if raise_on_fail:
raise TabSwitchError(msg)
return None
# normalize index according to base
try:
if base not in (0, 1):
raise ValueError("base must be 0 or 1")
# map to python 0-based index
idx = index if base == 0 else (index - 1)
# allow negative (like -1)
if idx < 0:
idx = len(tabs_seq) + idx
except Exception as e:
msg = f"Invalid index/base combination: {e}"
logger.error(msg)
if raise_on_fail:
raise TabSwitchError(msg)
return None
if not (0 <= idx < len(tabs_seq)):
msg = f"Index out of range: idx={idx} (tabs={len(tabs_seq)})"
logger.warning(msg)
if raise_on_fail:
raise TabSwitchError(msg)
return None
tab_obj = tabs_seq[idx]
logger.info("Switch target -> index=%d tab=%r", idx, getattr(tab_obj, "url", getattr(tab_obj, "title", repr(tab_obj))))
# 1) high-level API
try:
if hasattr(driver, "switch_to_tab"):
try:
driver.switch_to_tab(tab_obj)
logger.debug("Called driver.switch_to_tab(tab_obj)")
except Exception as e:
logger.debug("driver.switch_to_tab raised: %s", e)
except Exception:
pass
# 2) set internal code-context selection (driver._tab)
try:
if hasattr(driver, "_browser") and hasattr(driver._browser, "tabs"):
browser_tabs = getattr(driver._browser, "tabs")
if 0 <= idx < len(browser_tabs):
driver._tab = browser_tabs[idx]
logger.debug("driver._tab set to driver._browser.tabs[%d]", idx)
else:
driver._tab = browser_tabs[-1]
logger.debug("driver._tab fallback to last tab")
except Exception as e:
logger.debug("setting driver._tab failed: %s", e)
# 3) harmless action to bind context
tried_action = False
try:
for meth in ("get_url", "get", "url", "current_url", "get_current_url", "title"):
if hasattr(tab_obj, meth):
try:
v = getattr(tab_obj, meth)
if callable(v):
_ = v()
else:
_ = v
tried_action = True
logger.debug("Performed harmless tab action via %s", meth)
break
except Exception:
continue
if not tried_action and hasattr(driver, "eval_js"):
try:
driver.eval_js("1+1")
tried_action = True
except Exception:
pass
except Exception:
logger.debug("harmless action raised:\n%s", traceback.format_exc())
# 4) bring-to-front fallbacks
underlying = _get_underlying_driver(driver)
brought = False
if bring_to_front and underlying is not None:
try:
ok, info = _attempt_bring_to_front(underlying)
if ok:
brought = True
logger.debug("Bring-to-front succeeded: %s", info)
else:
logger.debug("Bring-to-front attempts returned: %s", info)
except Exception:
logger.debug("attempt_bring_to_front raised:\n%s", traceback.format_exc())
# 5) attempt tab-level activation methods if present
activated_by_tab_method = False
try:
for attr in ("activate", "select", "bring_to_front", "focus"):
if hasattr(tab_obj, attr):
try:
fn = getattr(tab_obj, attr)
if callable(fn):
fn()
activated_by_tab_method = True
logger.debug("Called tab_obj.%s()", attr)
break
except Exception:
continue
except Exception:
logger.debug("tab activation attempt raised:\n%s", traceback.format_exc())
# 6) confirmation loop
start = time.time()
success = False
while time.time() - start < timeout:
try:
if hasattr(driver, "_tab") and getattr(driver, "_tab") is tab_obj:
success = True
break
except Exception:
pass
try:
if underlying is not None:
u_cur = None
if hasattr(underlying, "current_url"):
u_cur = getattr(underlying, "current_url")
if callable(u_cur):
u_cur = u_cur()
tab_urls = []
for a in ("url", "current_url", "get_url", "get_current_url"):
if hasattr(tab_obj, a):
try:
v = getattr(tab_obj, a)
if callable(v):
v = v()
tab_urls.append(str(v))
except Exception:
pass
if u_cur:
for t_url in tab_urls:
if t_url and (t_url in str(u_cur) or str(u_cur) in t_url):
success = True
break
if success:
break
except Exception:
pass
if tried_action or brought or activated_by_tab_method:
success = True
break
time.sleep(0.1)
if success:
logger.info("Switched to tab index=%d successfully.", idx)
return tab_obj
msg = f"Failed to confirm switching to tab (idx={idx}) within {timeout}s."
logger.warning(msg)
if raise_on_fail:
raise TabSwitchError(msg)
return None
use case
from botasaurus.browser import browser, Driver
from tab_switcher import switch_to_tab_by_index
import time
@browser
def task(driver: Driver, data):
# open tabs
driver.get("https://example.com") # tab 0
driver.open_link_in_new_tab("https://www.wikipedia.org") # tab 1
driver.open_link_in_new_tab("https://www.python.org") # tab 2
time.sleep(1)
# switch to second tab (index 1, wikipedia)
switch_to_tab_by_index(driver, driver._browser.tabs, 1, base=0, timeout=6.0)
time.sleep(5)
# switch to first tab using one-based indexing (base=1: pass 1 -> first)
switch_to_tab_by_index(driver, driver._browser.tabs, 1, base=1)
time.sleep(3)
return {"status": "done"}