docker-selenium icon indicating copy to clipboard operation
docker-selenium copied to clipboard

[šŸ› Bug]: Facing '504 Gateway Time-out' for chrome-node session pods

Open Aditya27041999 opened this issue 5 months ago • 17 comments

What happened?

I am executing a python script in which I am spawning multiple chrome-node pods (10) for browser sessions. Some requests are honoured immediately but some are not able to create webDriver sessions Below is my Python script `#!/usr/bin/env python3 """ Selenium Grid Chrome Node Spawning and Validation Script This script creates 10 concurrent Chrome sessions and performs basic validation """

import concurrent.futures import time import logging from datetime import datetime from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException import requests import json

Configure logging

logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('selenium_test.log'), logging.StreamHandler() ] ) logger = logging.getLogger(name)

class SeleniumGridTester: def init(self, grid_url, max_workers=10): """ Initialize the Selenium Grid Tester

    Args:
        grid_url (str): Selenium Grid hub/router URL
        max_workers (int): Maximum number of concurrent sessions
    """
    self.grid_url = grid_url
    self.max_workers = max_workers
    self.results = []

def get_grid_status(self):
    """Check Selenium Grid status before starting tests"""
    try:
        status_url = f"{self.grid_url}/"
        response = requests.get(status_url, timeout=10)
        response.raise_for_status()

        status_data = response.json()
        logger.info(f"Grid Status: {json.dumps(status_data, indent=2)}")

        # Check if grid is ready
        if status_data.get('value', {}).get('ready', False):
            logger.info("āœ… Selenium Grid is ready")
            return True
        else:
            logger.error("āŒ Selenium Grid is not ready")
            return False

    except Exception as e:
        logger.error(f"āŒ Failed to get grid status: {e}")
        return False

def create_chrome_options(self):
    """Create Chrome options for the test"""
    options = Options()

    # Basic Chrome options
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-gpu')
    options.add_argument('--window-size=1920,1080')
    options.add_argument('--disable-extensions')
    options.add_argument('--disable-plugins')
    options.add_argument('--disable-images')  # Faster loading
    options.add_argument('--disable-javascript')  # For basic validation

    # Optional: Run in headless mode
    # options.add_argument('--headless')

    return options

def validate_session(self, session_id, driver):
    """
    Perform basic validation on a Chrome session

    Args:
        session_id (int): Session identifier
        driver: WebDriver instance

    Returns:
        dict: Validation results
    """
    validation_results = {
        'session_id': session_id,
        'start_time': datetime.now(),
        'success': False,
        'tests': {},
        'errors': [],
        'node_info': {}
    }

    try:
        # Test 1: Get session info
        logger.info(f"Session {session_id}: Getting session info...")
        session_info = driver.execute_script("return navigator.userAgent;")
        validation_results['tests']['user_agent'] = session_info
        logger.info(f"Session {session_id}: User Agent - {session_info}")

        # Test 2: Navigate to a simple page
        logger.info(f"Session {session_id}: Navigating to Google...")
        driver.get("https://www.google.com")

        # Wait for page to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )

        validation_results['tests']['navigation'] = True
        validation_results['tests']['page_title'] = driver.title
        logger.info(f"Session {session_id}: Page title - {driver.title}")

        # Test 3: Check page source
        page_source_length = len(driver.page_source)
        validation_results['tests']['page_source_length'] = page_source_length
        logger.info(f"Session {session_id}: Page source length - {page_source_length}")

        # Test 4: Take screenshot (optional)
        try:
            screenshot_path = f"screenshot_session_{session_id}.png"
            driver.save_screenshot(screenshot_path)
            validation_results['tests']['screenshot'] = screenshot_path
            logger.info(f"Session {session_id}: Screenshot saved - {screenshot_path}")
        except Exception as e:
            logger.warning(f"Session {session_id}: Screenshot failed - {e}")

        # Test 5: Get node information
        try:
            # Get the node URL from capabilities
            capabilities = driver.capabilities
            validation_results['node_info'] = {
                'browser_name': capabilities.get('browserName'),
                'browser_version': capabilities.get('browserVersion'),
                'platform': capabilities.get('platformName'),
                'node_id': capabilities.get('se:cdp', 'Unknown')
            }
            logger.info(f"Session {session_id}: Node info - {validation_results['node_info']}")
        except Exception as e:
            logger.warning(f"Session {session_id}: Failed to get node info - {e}")

        # Test 6: Simple JavaScript execution
        try:
            js_result = driver.execute_script("return document.readyState;")
            validation_results['tests']['javascript_execution'] = js_result
            logger.info(f"Session {session_id}: Document ready state - {js_result}")
        except Exception as e:
            logger.warning(f"Session {session_id}: JavaScript execution failed - {e}")

        validation_results['success'] = True
        logger.info(f"āœ… Session {session_id}: All validations passed")

    except TimeoutException as e:
        error_msg = f"Timeout in session {session_id}: {e}"
        validation_results['errors'].append(error_msg)
        logger.error(f"ā° {error_msg}")

    except WebDriverException as e:
        error_msg = f"WebDriver error in session {session_id}: {e}"
        validation_results['errors'].append(error_msg)
        logger.error(f"🚫 {error_msg}")

    except Exception as e:
        error_msg = f"Unexpected error in session {session_id}: {e}"
        validation_results['errors'].append(error_msg)
        logger.error(f"āŒ {error_msg}")

    finally:
        validation_results['end_time'] = datetime.now()
        validation_results['duration'] = (
            validation_results['end_time'] - validation_results['start_time']
        ).total_seconds()

    return validation_results

def run_single_session(self, session_id):
    """
    Run a single Chrome session with validation

    Args:
        session_id (int): Session identifier

    Returns:
        dict: Session results
    """
    driver = None
    session_result = {
        'session_id': session_id,
        'success': False,
        'error': None,
        'validation_results': None
    }

    try:
        logger.info(f"šŸš€ Starting session {session_id}...")

        # Create Chrome options
        options = self.create_chrome_options()

        # Retry mechanism for WebDriver creation
        retries = 3
        for attempt in range(1, retries + 1):
            try:
                # Create WebDriver instance
                driver = webdriver.Remote(
                    command_executor=f"{self.grid_url}/wd/hub",
                    options=options
                )
                logger.info(f"Session {session_id}: WebDriver created successfully on attempt {attempt}")
                break
            except Exception as e:
                logger.warning(f"Session {session_id}: WebDriver creation failed on attempt {attempt} - {e}")
                if attempt == retries:
                    raise Exception(f"Session {session_id}: Failed to create WebDriver after {retries} attempts")

        # Run validation tests
        validation_results = self.validate_session(session_id, driver)
        session_result['validation_results'] = validation_results
        session_result['success'] = validation_results['success']

    except Exception as e:
        error_msg = f"Failed to create session {session_id}: {e}"
        session_result['error'] = error_msg
        logger.error(f"āŒ {error_msg}")

    finally:
        # Clean up WebDriver
        if driver:
            try:
                driver.quit()
                logger.info(f"Session {session_id}: WebDriver closed")
            except Exception as e:
                logger.warning(f"Session {session_id}: Error closing WebDriver - {e}")

    return session_result

def run_concurrent_sessions(self):
    """Run multiple concurrent Chrome sessions"""
    logger.info(f"šŸŽÆ Starting {self.max_workers} concurrent Chrome sessions...")

    # Check grid status first
    # if not self.get_grid_status():
    #     logger.error("āŒ Cannot proceed - Selenium Grid is not ready")
    #     return False

    start_time = datetime.now()

    # Use ThreadPoolExecutor for concurrent execution
    with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
        # Submit all sessions
        future_to_session = {
            executor.submit(self.run_single_session, i): i 
            for i in range(1, self.max_workers + 1)
        }

        # Collect results as they complete
        for future in concurrent.futures.as_completed(future_to_session):
            session_id = future_to_session[future]
            try:
                result = future.result()
                self.results.append(result)

                if result['success']:
                    logger.info(f"āœ… Session {session_id} completed successfully")
                else:
                    logger.error(f"āŒ Session {session_id} failed")

            except Exception as e:
                logger.error(f"āŒ Session {session_id} generated an exception: {e}")

    end_time = datetime.now()
    total_duration = (end_time - start_time).total_seconds()

    # Generate summary report
    self.generate_report(total_duration)

    return True

def generate_report(self, total_duration):
    """Generate a summary report of all sessions"""
    logger.info("šŸ“Š Generating test report...")

    successful_sessions = [r for r in self.results if r['success']]
    failed_sessions = [r for r in self.results if not r['success']]

    print("\n" + "="*80)
    print("šŸŽÆ SELENIUM GRID TEST REPORT")
    print("="*80)
    print(f"Grid URL: {self.grid_url}")
    print(f"Total Sessions: {len(self.results)}")
    print(f"Successful Sessions: {len(successful_sessions)}")
    print(f"Failed Sessions: {len(failed_sessions)}")
    print(f"Success Rate: {len(successful_sessions)/len(self.results)*100:.1f}%")
    print(f"Total Duration: {total_duration:.2f} seconds")

    if successful_sessions:
        avg_duration = sum(
            r['validation_results']['duration'] 
            for r in successful_sessions
        ) / len(successful_sessions)
        print(f"Average Session Duration: {avg_duration:.2f} seconds")

    # Node distribution
    print("\nšŸ“ NODE DISTRIBUTION:")
    node_distribution = {}
    for result in successful_sessions:
        if result['validation_results'] and result['validation_results']['node_info']:
            node_id = result['validation_results']['node_info'].get('node_id', 'Unknown')
            node_distribution[node_id] = node_distribution.get(node_id, 0) + 1

    for node_id, count in node_distribution.items():
        print(f"  Node {node_id}: {count} sessions")

    # Failed sessions details
    if failed_sessions:
        print("\nāŒ FAILED SESSIONS:")
        for result in failed_sessions:
            print(f"  Session {result['session_id']}: {result.get('error', 'Unknown error')}")

    print("="*80)

    # Save detailed report to file
    report_filename = f"selenium_test_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(report_filename, 'w') as f:
        json.dump({
            'summary': {
                'grid_url': self.grid_url,
                'total_sessions': len(self.results),
                'successful_sessions': len(successful_sessions),
                'failed_sessions': len(failed_sessions),
                'success_rate': len(successful_sessions)/len(self.results)*100,
                'total_duration': total_duration,
                'node_distribution': node_distribution
            },
            'detailed_results': self.results
        }, f, indent=2, default=str)

    logger.info(f"šŸ“„ Detailed report saved to: {report_filename}")

def main(): """Main function to run the Selenium Grid test"""

# Configuration
GRID_URL = "http://selenium-hub-qas3274.io.coupadev.com"  # Your Selenium Grid URL
MAX_WORKERS = 10 # Number of concurrent Chrome sessions

# Alternative internal URL if running from within cluster
# GRID_URL = "http://selenium-cqe-qas-qaskub2625-selenium-router.selenium-cqe-qas.svc.cluster.local:4444"

logger.info("šŸŽ¬ Starting Selenium Grid Chrome Node Test")
logger.info(f"Grid URL: {GRID_URL}")
logger.info(f"Concurrent Sessions: {MAX_WORKERS}")

# Create tester instance
tester = SeleniumGridTester(GRID_URL, MAX_WORKERS)

# Run the test
success = tester.run_concurrent_sessions()

if success:
    logger.info("šŸŽ‰ Test completed successfully!")
else:
    logger.error("šŸ’„ Test failed!")
    return 1

return 0

if name == "main": exit(main())`

I am using selenium-grid distributed architecture where there are different pods like router, distributor, session-queue, session-map and event-bus. I am facing error logs in router pods """ 11:40:10.694 WARN [SpanWrappedHttpHandler.execute] - Unable to execute request: Build info: version: '4.31.0', revision: '4ae8fc9f8a' System info: os.name: 'Linux', os.arch: 'amd64', os.version: '5.10.236-228.935.amzn2.x86_64', java.version: '21.0.6' Driver info: driver.version: unknown org.openqa.selenium.WebDriverException: Build info: version: '4.31.0', revision: '4ae8fc9f8a' System info: os.name: 'Linux', os.arch: 'amd64', os.version: '5.10.236-228.935.amzn2.x86_64', java.version: '21.0.6' Driver info: driver.version: unknown at org.openqa.selenium.remote.http.jdk.JdkHttpClient.execute(JdkHttpClient.java:419) at org.openqa.selenium.remote.tracing.TracedHttpClient.execute(TracedHttpClient.java:54) at org.openqa.selenium.grid.security.AddSecretFilter.lambda$apply$0(AddSecretFilter.java:40) at org.openqa.selenium.remote.http.Filter.lambda$andFinally$1(Filter.java:55) at org.openqa.selenium.grid.sessionqueue.remote.RemoteNewSessionQueue.addToQueue(RemoteNewSessionQueue.java:110) at org.openqa.selenium.grid.sessionqueue.NewSessionQueue.lambda$new$0(NewSessionQueue.java:68) at org.openqa.selenium.remote.http.Route$TemplatizedRoute.handle(Route.java:192) at org.openqa.selenium.remote.http.Route.execute(Route.java:69) at org.openqa.selenium.remote.http.Route$CombinedRoute.handle(Route.java:360) at org.openqa.selenium.remote.http.Route.execute(Route.java:69) at org.openqa.selenium.grid.sessionqueue.NewSessionQueue.execute(NewSessionQueue.java:128) at org.openqa.selenium.remote.tracing.SpanWrappedHttpHandler.execute(SpanWrappedHttpHandler.java:87) at org.openqa.selenium.remote.http.Filter$1.execute(Filter.java:63) at org.openqa.selenium.remote.http.Route$CombinedRoute.handle(Route.java:360) at org.openqa.selenium.remote.http.Route.execute(Route.java:69) at org.openqa.selenium.grid.router.Router.execute(Router.java:89) at org.openqa.selenium.grid.web.EnsureSpecCompliantResponseHeaders.lambda$apply$0(EnsureSpecCompliantResponseHeaders.java:34) at org.openqa.selenium.remote.http.Filter$1.execute(Filter.java:63) at org.openqa.selenium.remote.http.Route$CombinedRoute.handle(Route.java:360) at org.openqa.selenium.remote.http.Route.execute(Route.java:69) at org.openqa.selenium.remote.http.Route$NestedRoute.handle(Route.java:270) at org.openqa.selenium.remote.http.Route.execute(Route.java:69) at org.openqa.selenium.remote.http.Route$CombinedRoute.handle(Route.java:360) at org.openqa.selenium.remote.http.Route.execute(Route.java:69) at org.openqa.selenium.remote.AddWebDriverSpecHeaders.lambda$apply$0(AddWebDriverSpecHeaders.java:35) at org.openqa.selenium.remote.ErrorFilter.lambda$apply$0(ErrorFilter.java:44) at org.openqa.selenium.remote.http.Filter$1.execute(Filter.java:63) at org.openqa.selenium.remote.ErrorFilter.lambda$apply$0(ErrorFilter.java:44) at org.openqa.selenium.remote.http.Filter$1.execute(Filter.java:63) at org.openqa.selenium.netty.server.SeleniumHandler.lambda$channelRead0$0(SeleniumHandler.java:49) at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:572) at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317) """

Router pods environment variables """ <qaskub2625-selenium-router-5bc8ccfb55-q2krq:/$ env | grep -i timeout SE_SESSION_REQUEST_TIMEOUT=3600 <qaskub2625-selenium-router-5bc8ccfb55-q2krq:/$ """ Node-chrome pods env variables """ SE_ENABLE_BROWSER_LEFTOVERS_CLEANUP=false SE_VNC_NO_PASSWORD=1 KUBERNETES_SERVICE_PORT_HTTPS=443 SE_START_NO_VNC=true KEDA_OPERATOR_METRICS_APISERVER_SERVICE_PORT_HTTPS=443 SE_JAVA_HTTPCLIENT_VERSION=HTTP_1_1 SE_NO_VNC_PORT=7900 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_PORT_5556_TCP_PORT=5556 KUBERNETES_SERVICE_PORT=443 SE_NODE_PRESTOP_WAIT_STRATEGY=DRAIN SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_SERVICE_PORT=5559 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4442_TCP=tcp://172.20.11.14:4442 KEDA_OPERATOR_SERVICE_HOST=172.20.166.180 SEL_DOWNLOAD_DIR=/home/seluser/Downloads SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_5557_TCP_ADDR=172.20.11.14 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_5557_TCP=tcp://172.20.11.14:5557 SE_NODE_CONTAINER_NAME=selenium-cqe-qas-qaskub2625-selenium-node-chrome-tj65k-dx92h HOSTNAME=selenium-cqe-qas-qaskub2625-selenium-node-chrome-tj65k-dx92h SE_BROWSER_LEFTOVERS_PROCESSES_SECS=7200 LANGUAGE=en_US.UTF-8 SE_NODE_MAX_SESSIONS=1 SE_SUPERVISORD_CHILD_LOG_DIR=/tmp SE_JAVA_SSL_TRUST_STORE=/opt/selenium/secrets/server.jks SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT=tcp://172.20.11.14:5557 SE_RECORD_AUDIO=false SE_BROWSER_ARGS_DISABLE_DSHM=--disable-dev-shm-usage SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_PORT=tcp://172.20.142.218:5553 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_PORT_5556_TCP=tcp://172.20.197.66:5556 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_SERVICE_HOST=172.20.105.4 SE_VIDEO_FILE_NAME=video.mp4 KEDA_OPERATOR_SERVICE_PORT_METRICSSERVICE=9666 SE_HTTPS_CERTIFICATE=/opt/selenium/secrets/tls.crt SE_SERVER_PROTOCOL=http KEDA_OPERATOR_METRICS_APISERVER_SERVICE_HOST=172.20.190.251 SE_SCREEN_WIDTH=1920 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_SERVICE_PORT_TCP_ROUTER=4444 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_PORT_5556_TCP_ADDR=172.20.197.66 SE_VIDEO_POLL_INTERVAL=1 SE_SCREEN_DPI=96 SE_NODE_RELAY_ONLY=true KEDA_OPERATOR_PORT_9666_TCP_ADDR=172.20.166.180 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_5557_TCP_PORT=5557 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_5557_TCP_PROTO=tcp KEDA_OPERATOR_METRICS_APISERVER_PORT=tcp://172.20.190.251:443 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4443_TCP_PROTO=tcp CONFIG_FILE=/opt/selenium/config.toml SE_ENABLE_TLS=false KEDA_OPERATOR_METRICS_APISERVER_PORT_8080_TCP_PROTO=tcp SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_SERVICE_HOST=172.20.197.66 SEL_UID=1200 PWD=/ RCLONE_CONFIG=/opt/selenium/upload.conf SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_PORT_5559_TCP=tcp://172.20.152.63:5559 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_SERVICE_PORT_TCP_DIST=5553 KEDA_OPERATOR_METRICS_APISERVER_SERVICE_PORT=443 SE_START_XVFB=true SE_VIDEO_UPLOAD_ENABLED=false TZ=UTC VIDEO_FOLDER=/videos SE_SUB_PATH= DISPLAY_NUM=99 SE_PRESET=-preset ultrafast SE_NODE_PORT=5555 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_PORT_4444_TCP_PORT=4444 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_PORT_4444_TCP_PROTO=tcp SE_NODE_HOST=192.168.24.255 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_PORT_5559_TCP_ADDR=172.20.152.63 DISPLAY_CONTAINER_NAME=localhost SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_PORT_5553_TCP_ADDR=172.20.142.218 SE_REJECT_UNSUPPORTED_CAPS=false SE_NODE_OVERRIDE_MAX_SESSIONS=true HOME=/home/seluser SE_HTTP_LOGS=true KEDA_OPERATOR_PORT=tcp://172.20.166.180:9666 LANG=en_US.UTF-8 KEDA_OPERATOR_PORT_9666_TCP_PROTO=tcp KUBERNETES_PORT_443_TCP=tcp://172.20.0.1:443 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_SERVICE_PORT=5557 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4442_TCP_ADDR=172.20.11.14 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_SERVICE_PORT_TCP_SSN_MAP=5556 LANG_WHERE=US SE_NODE_HEARTBEAT_PERIOD=30 VIRTUAL_ENV=/opt/venv SE_ROUTER_HOST=selenium-cqe-qas-qaskub2625-selenium-router SE_EVENT_BUS_SUBSCRIBE_PORT=4443 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_PORT=tcp://172.20.152.63:5559 SE_SUPERVISORD_UNIX_SERVER_PASSWORD=secret SE_OFFLINE=true KEDA_OPERATOR_METRICS_APISERVER_PORT_443_TCP_ADDR=172.20.190.251 ENCODING=UTF-8 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_PORT_4444_TCP=tcp://172.20.105.4:4444 KUBERNETES_NODE_HOST_IP=10.101.12.128 SE_NODE_GRID_URL=http://selenium-hub-qas3274.io.coupadev.com SE_JAVA_HEAP_DUMP=false SE_FRAME_RATE=15 SE_DISTRIBUTOR_PORT=5553 SE_NODE_PLATFORM_NAME= KEDA_OPERATOR_METRICS_APISERVER_SERVICE_PORT_METRICS=8080 GENERATE_CONFIG=true SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_PORT=tcp://172.20.197.66:5556 SE_BROWSER_LEFTOVERS_INTERVAL_SECS=3600 SE_AUDIO_SOURCE=-f pulse -ac 2 -i default SE_SUPERVISORD_PID_FILE=/tmp/supervisord.pid SE_ENABLE_TRACING=false SE_NODE_REGISTER_SHUTDOWN_ON_FAILURE=true SE_STRUCTURED_LOGS=false SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4443_TCP_ADDR=172.20.11.14 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_PORT_5553_TCP_PORT=5553 SE_NODE_BROWSER_VERSION= SE_CODEC=libx264 SE_HUB_PORT=4444 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_PORT=tcp://172.20.105.4:4444 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_SERVICE_PORT_HTTP_EVTBUS=5557 SE_EVENT_BUS_PUBLISH_PORT=4442 TERM=xterm SE_NODE_REGISTER_PERIOD=600 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_PORT_5553_TCP_PROTO=tcp SE_OTEL_JAVA_GLOBAL_AUTOCONFIGURE_ENABLED=true KEDA_OPERATOR_METRICS_APISERVER_PORT_8080_TCP_PORT=8080 SE_DRAIN_AFTER_SESSION_COUNT=1 DEBCONF_NONINTERACTIVE_SEEN=true SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4442_TCP_PROTO=tcp SE_NODE_REGISTER_CYCLE=60 SE_NODE_ENABLE_MANAGED_DOWNLOADS=true SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_PORT_5559_TCP_PROTO=tcp SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_SERVICE_HOST=172.20.152.63 SE_NODE_SESSION_TIMEOUT=3600 SE_VNC_PORT=5900 SE_BIND_HOST=false SE_BROWSER_LEFTOVERS_TEMPFILES_DAYS=1 SE_SUPERVISORD_AUTO_RESTART=true SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_PORT_5553_TCP=tcp://172.20.142.218:5553 SE_VNC_PASSWORD=secret DISPLAY=:99.0 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_SERVICE_PORT_TCP_EVTBUS_PUB=4442 SE_NODE_STEREOTYPE_EXTRA= SHLVL=1 SE_OTEL_SERVICE_NAME=selenium-cqe-qas-qaskub2625-selenium-node-chrome SE_VIDEO_INTERNAL_UPLOAD=true SE_ROUTER_PORT=4444 KEDA_OPERATOR_METRICS_APISERVER_PORT_443_TCP_PORT=443 KUBERNETES_PORT_443_TCP_PROTO=tcp SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_SERVICE_HOST=172.20.142.218 SEL_USER=seluser KEDA_OPERATOR_METRICS_APISERVER_PORT_443_TCP=tcp://172.20.190.251:443 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_PORT_5559_TCP_PORT=5559 SE_SUPERVISORD_LOG_LEVEL=info VIRTUAL_ENV_PROMPT=(venv) KUBERNETES_PORT_443_TCP_ADDR=172.20.0.1 SE_SCREEN_HEIGHT=1080 KEDA_OPERATOR_PORT_9666_TCP_PORT=9666 KEDA_OPERATOR_METRICS_APISERVER_PORT_8080_TCP=tcp://172.20.190.251:8080 SE_START_VNC=true SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_DISTRIBUTOR_SERVICE_PORT=5553 SE_LOG_LEVEL=INFO SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_SERVICE_HOST=172.20.11.14 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4443_TCP=tcp://172.20.11.14:4443 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_SERVICE_PORT=4444 PS1=(venv) ${debian_chroot:+($debian_chroot)}\u@\h:\w$ SE_RECORD_VIDEO=false SE_OTEL_TRACES_EXPORTER=otlp SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4443_TCP_PORT=4443 SE_LOG_TIMESTAMP_FORMAT=%Y-%m-%d %H:%M:%S,%3N KUBERNETES_SERVICE_HOST=172.20.0.1 LANG_WHICH=en KUBERNETES_PORT=tcp://172.20.0.1:443 KUBERNETES_PORT_443_TCP_PORT=443 NODE_CONFIG_DIRECTORY=/opt/bin KEDA_OPERATOR_METRICS_APISERVER_PORT_443_TCP_PROTO=tcp SE_NODE_BROWSER_NAME=chrome SE_SUPERVISORD_LOG_FILE=/tmp/supervisord.log SE_VIDEO_FILE_NAME_TRIM_REGEX=[:alnum:]-_ PATH=/opt/venv/bin:/opt/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin SE_EVENT_BUS_HOST=selenium-cqe-qas-qaskub2625-selenium-event-bus SE_HUB_HOST=selenium-cqe-qas-qaskub2625-selenium-hub SE_SCREEN_DEPTH=24 SE_JAVA_SSL_TRUST_STORE_PASSWORD=/opt/selenium/secrets/server.pass DBUS_SESSION_BUS_ADDRESS=/dev/null SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_PORT_5556_TCP_PROTO=tcp KEDA_OPERATOR_METRICS_APISERVER_PORT_8080_TCP_ADDR=172.20.190.251 SE_JAVA_DISABLE_HOSTNAME_VERIFICATION=true SE_SUPERVISORD_START_RETRIES=5 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_SERVICE_PORT_TCP_EVTBUS_SUB=4443 SE_DISTRIBUTOR_HOST=selenium-cqe-qas-qaskub2625-selenium-distributor KEDA_OPERATOR_PORT_9666_TCP=tcp://172.20.166.180:9666 VENV_PATH=/opt/venv SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_MAP_SERVICE_PORT=5556 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_SESSION_QUEUE_SERVICE_PORT_TCP_SSN_QUE=5559 DEBIAN_FRONTEND=noninteractive SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_EVENT_BUS_PORT_4442_TCP_PORT=4442 SEL_GID=1201 SE_NODE_GRID_GRAPHQL_URL=http://selenium-cqe-qas-qaskub2625-selenium-router.selenium-cqe-qas:4444/graphql SE_JAVA_OPTS=-XX:+UseG1GC -XX:MaxGCPauseMillis=1000 -XX:MaxRAMPercentage=50 SE_UPLOAD_DESTINATION_PREFIX= SE_JAVA_OPTS_DEFAULT= SE_SESSION_REQUEST_TIMEOUT=3600 SELENIUM_CQE_QAS_QASKUB2625_SELENIUM_ROUTER_PORT_4444_TCP_ADDR=172.20.105.4 KEDA_OPERATOR_SERVICE_PORT=9666 SE_HTTPS_PRIVATE_KEY=/opt/selenium/secrets/tls.key _=/usr/bin/env """

Image

Command used to start Selenium Grid with Docker (or Kubernetes)

Values.yaml file
""""
selenium-grid:
  isolateComponents: true
  httpLogs: true
  logLevel: ALL
  ingress:
    # Enable or disable ingress resource
    enabled: true
    # Name of ingress class to select which controller will implement ingress resource
    className: "nginx-external"
    # Custom annotations for ingress resource
    annotations:  
      nginx.ingress.kubernetes.io/ssl-redirect: "true"	
      nginx.ingress.kubernetes.io/force-ssl-redirect: "true"	
      nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
      # nginx.ingress.kubernetes.io/proxy-connect-timeout: "3600"
      # nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
      nginx.ingress.kubernetes.io/whitelist-source-range: 13.234.187.222/32,3.111.163.205/32,3.11.190.81/32,35.178.113.179/32,3.141.155.232/32,34.206.31.254/32,3.226.4.161/32,100.25.253.145/32,44.193.247.16/32,34.202.249.205/32,40.70.161.46/32,20.190.193.30/32,20.62.2.175/32,3.229.116.109/32,3.230.224.79/32,52.1.10.200/32,52.1.46.200/32,3.226.241.126/32,107.23.101.81/32,3.93.172.139/32,52.22.171.154/32,18.168.90.237/32,34.201.223.36/32,3.95.40.18/32,3.248.216.165/32,54.153.255.43/32,52.215.250.55/32,3.95.40.144/32,18.140.177.145/32,98.83.247.233/32,3.232.50.12/32
    hostname: "selenium-hub-qas3274.io.coupadev.com"  
    # Default host path for the ingress resource
    path: /
     
  autoscaling:
    enabled: true
    scalingType: job
    scaledOptions:
      minReplicaCount: 10
      maxReplicaCount: 100
      pollingInterval: 10
      
  components:
    extraEnvironmentVariables:
      - name: SE_SESSION_REQUEST_TIMEOUT
        value: "3600"
      # - name: SE_JAVA_HTTPCLIENT_VERSION
      #   value: HTTP_2
    router:
      replicas: 3
    distributor:
      newSessionThreadPoolSize: 5000
      replicas: 2
    sessionMap:
      replicas: 1
    sessionQueue:
      replicas: 2
      extraEnvironmentVariables:
      - name: SE_SESSION_RETRY_INTERVAL
        value: "5"
    eventBus:
      replicas: 1
    
  chromeNode:
    # replicas: 5
    nodeRegisterPeriod: 600
    nodeRegisterCycle: 60
    labels:
      role: selenium-chrome-node
    hpa:
      platformName: ""
    resources:
      requests:
        memory: "1Gi"
        cpu: "1"
      limits:
        memory: "2Gi"
        cpu: "1"
    extraVolumes:
      - name: selenium-efs
        persistentVolumeClaim:
          claimName: efs-pvc
    extraVolumeMounts:
      - name: selenium-efs
        mountPath: /home/seluser/Downloads 
    extraEnvironmentVariables:
      - name: SE_VNC_NO_PASSWORD
        value: "1"
      - name: SE_NODE_OVERRIDE_MAX_SESSIONS
        value: "true"
      - name: SE_NODE_SESSION_TIMEOUT
        value: "3600"
      - name: SE_SESSION_REQUEST_TIMEOUT
        value: "3600"
      - name: SE_HTTP_LOGS
        value: 'true'
      - name: SE_NODE_PRESTOP_WAIT_STRATEGY
        value: "DRAIN"
      # - name: SE_JAVA_HTTPCLIENT_VERSION
      #   value: HTTP_2
    # Pod anti-affinity to spread pods across nodes
    affinity:
      podAntiAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
        - weight: 100
          podAffinityTerm:
            labelSelector:
              matchExpressions:
              - key: role
                operator: In
                values:
                - selenium-chrome-node
            topologyKey: kubernetes.io/hostname
  keda:
    metricsServer:
      useHostNetwork: true
  edgeNode:
    enabled: false
    deploymentEnabled: false
  firefoxNode:
    enabled: false
    deploymentEnabled: false

Relevant log output

2025-07-22 16:45:16,710 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/6577cdeec48d1864e556144b409d0448 -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/6577cdeec48d1864e556144b409d0448
2025-07-22 16:45:16,790 - INFO - Session 4: WebDriver closed
2025-07-22 16:45:16,790 - INFO - āœ… Session 4 completed successfully
2025-07-22 16:45:16,856 - INFO - Session 2: WebDriver closed
2025-07-22 16:45:16,857 - INFO - āœ… Session 2 completed successfully
2025-07-22 16:45:17,140 - INFO - Session 6: WebDriver closed
2025-07-22 16:45:17,140 - INFO - āœ… Session 6 completed successfully
2025-07-22 16:45:17,449 - INFO - Session 1: Page source length - 259499
2025-07-22 16:45:17,775 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/83ae45c5f2de066d2a2e983f16a41642/screenshot -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/83ae45c5f2de066d2a2e983f16a41642/screenshot
2025-07-22 16:45:18,640 - INFO - Session 1: Screenshot saved - screenshot_session_1.png
2025-07-22 16:45:18,641 - INFO - Session 1: Node info - {'browser_name': 'chrome', 'browser_version': '135.0.7049.84', 'platform': 'any', 'node_id': 'ws://selenium-hub-qas3274.io.coupadev.com/session/83ae45c5f2de066d2a2e983f16a41642/se/cdp'}
2025-07-22 16:45:18,962 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/83ae45c5f2de066d2a2e983f16a41642/execute/sync -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/83ae45c5f2de066d2a2e983f16a41642/execute/sync
2025-07-22 16:45:19,666 - INFO - Session 1: Document ready state - complete
2025-07-22 16:45:19,666 - INFO - āœ… Session 1: All validations passed
2025-07-22 16:45:19,990 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/83ae45c5f2de066d2a2e983f16a41642 -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/83ae45c5f2de066d2a2e983f16a41642
2025-07-22 16:45:20,512 - INFO - Session 1: WebDriver closed
2025-07-22 16:45:20,512 - INFO - āœ… Session 1 completed successfully
2025-07-22 16:50:03,221 - WARNING - Session 9: WebDriver creation failed on attempt 1 - Message: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
</body>
</html>

2025-07-22 16:50:03,260 - WARNING - Session 7: WebDriver creation failed on attempt 1 - Message: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
</body>
</html>

2025-07-22 16:50:03,423 - WARNING - Session 10: WebDriver creation failed on attempt 1 - Message: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
</body>
</html>

2025-07-22 16:50:03,492 - WARNING - Session 5: WebDriver creation failed on attempt 1 - Message: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
</body>
</html>

2025-07-22 16:50:03,906 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session
2025-07-22 16:50:03,912 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session
2025-07-22 16:50:04,073 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session
2025-07-22 16:50:04,150 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session
2025-07-22 16:50:08,609 - INFO - Session 9: WebDriver created successfully on attempt 2
2025-07-22 16:50:08,610 - INFO - Session 9: Getting session info...
2025-07-22 16:50:08,944 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/b11723e752c7aaed3cc25724dc598334/execute/sync -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/b11723e752c7aaed3cc25724dc598334/execute/sync
2025-07-22 16:50:09,633 - INFO - Session 9: User Agent - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36
2025-07-22 16:50:09,634 - INFO - Session 9: Navigating to Google...
2025-07-22 16:50:09,958 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/b11723e752c7aaed3cc25724dc598334/url -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/b11723e752c7aaed3cc25724dc598334/url
2025-07-22 16:50:13,103 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/b11723e752c7aaed3cc25724dc598334/element -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/b11723e752c7aaed3cc25724dc598334/element
2025-07-22 16:50:14,193 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/b11723e752c7aaed3cc25724dc598334/title -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/b11723e752c7aaed3cc25724dc598334/title
2025-07-22 16:50:14,865 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/b11723e752c7aaed3cc25724dc598334/title -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/b11723e752c7aaed3cc25724dc598334/title
2025-07-22 16:50:14,884 - INFO - Session 10: WebDriver created successfully on attempt 2
2025-07-22 16:50:14,884 - INFO - Session 10: Getting session info...
2025-07-22 16:50:15,017 - INFO - Session 5: WebDriver created successfully on attempt 2
2025-07-22 16:50:15,018 - INFO - Session 5: Getting session info...
2025-07-22 16:50:15,205 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/b772ca3ad1ee92d27dd02adad7520785/execute/sync -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/b772ca3ad1ee92d27dd02adad7520785/execute/sync
2025-07-22 16:50:15,206 - INFO - Session 9: Page title - Google
2025-07-22 16:50:15,340 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/25fed402984b0f45fc5ad652195fd10d/execute/sync -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/25fed402984b0f45fc5ad652195fd10d/execute/sync
2025-07-22 16:50:15,536 - INF

Operating System

macOS and Ubuntu(on cloud) both

Docker Selenium version (image tag)

4.31.0-20250414

Selenium Grid chart version (chart version)

0.42.1

Aditya27041999 avatar Jul 22 '25 11:07 Aditya27041999

@Aditya27041999, thank you for creating this issue. We will troubleshoot it as soon as we can.


Info for maintainers

Triage this issue by using labels.

If information is missing, add a helpful comment and then I-issue-template label.

If the issue is a question, add the I-question label.

If the issue is valid but there is no time to troubleshoot it, consider adding the help wanted label.

If the issue requires changes or fixes from an external project (e.g., ChromeDriver, GeckoDriver, MSEdgeDriver, W3C), add the applicable G-* label, and it will provide the correct link and auto-close the issue.

After troubleshooting the issue, please add the R-awaiting answer label.

Thank you!

github-actions[bot] avatar Jul 22 '25 11:07 github-actions[bot]

Please post a few lines of code that can reproduce the issue. Nobody is going to debug hundreds of lines of your code to find a potential issue that may not even be related to this project.

cgoldberg avatar Jul 22 '25 12:07 cgoldberg

Sure @cgoldberg , is this enough to reproduce

import concurrent.futures
import logging
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger(__name__)

class SeleniumGridTester:
    def __init__(self, grid_url, max_workers=10):
        self.grid_url = grid_url
        self.max_workers = max_workers
        self.results = []

    def create_chrome_options(self):
        options = Options()
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-gpu')
        options.add_argument('--window-size=1920,1080')
        return options

    def run_single_session(self, session_id):
        driver = None
        try:
            logger.info(f"Starting session {session_id}...")
            
            options = self.create_chrome_options()
            driver = webdriver.Remote(
                command_executor=f"{self.grid_url}/wd/hub",
                options=options
            )
            
            # Basic validation
            driver.get("https://www.google.com")
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )
            
            logger.info(f"āœ… Session {session_id}: Success - {driver.title}")
            return {'session_id': session_id, 'success': True, 'title': driver.title}
            
        except Exception as e:
            logger.error(f"āŒ Session {session_id}: Failed - {e}")
            return {'session_id': session_id, 'success': False, 'error': str(e)}
        finally:
            if driver:
                driver.quit()

    def run_concurrent_sessions(self):
        logger.info(f"Starting {self.max_workers} concurrent sessions...")
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = [executor.submit(self.run_single_session, i) 
                      for i in range(1, self.max_workers + 1)]
            
            for future in concurrent.futures.as_completed(futures):
                result = future.result()
                self.results.append(result)
        
        # Print summary
        successful = sum(1 for r in self.results if r['success'])
        print(f"\nResults: {successful}/{len(self.results)} sessions successful")
        
        # Print failures
        failures = [r for r in self.results if not r['success']]
        if failures:
            print("\nFailures:")
            for f in failures:
                print(f"  Session {f['session_id']}: {f['error']}")

def main():
    # UPDATE THIS URL TO YOUR SELENIUM GRID
    GRID_URL = "http://selenium-hub-qas3274.io.coupadev.com"
    MAX_WORKERS = 10
    
    tester = SeleniumGridTester(GRID_URL, MAX_WORKERS)
    tester.run_concurrent_sessions()

if __name__ == "__main__":
    main()

Aditya27041999 avatar Jul 22 '25 12:07 Aditya27041999

I just replied to this in another thread. Via logs, I can see this Grid deployment on K8s and probably has Ingress in front, so please read to https://github.com/SeleniumHQ/docker-selenium/tree/trunk/charts/selenium-grid#ingress-configuration Ingress drop the connection due to its read timeout.

VietND96 avatar Jul 22 '25 12:07 VietND96

Might I know which cloud is used to host the Grid?

VietND96 avatar Jul 22 '25 12:07 VietND96

@VietND96 I am using AWS EKS to host the grid components, also I have included these annotations in my values files """ annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true" nginx.ingress.kubernetes.io/force-ssl-redirect: "true" nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" """

Aditya27041999 avatar Jul 22 '25 12:07 Aditya27041999

I don't remember exactly, but with AWS ELB, need to set another annotation to adjust it timeout If can access the console, you will see ingress controller creates an ELB and it's timeout Will check and let you know

VietND96 avatar Jul 22 '25 12:07 VietND96

Sure @VietND96 , thanks for the help. Also is there any provision to set up a call? zoom call or google meet or something ?

Aditya27041999 avatar Jul 22 '25 12:07 Aditya27041999

alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=3600
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"

One annotation for ALB, the second is for ELB classic, I am not sure which LB type is used in your deployment, so set both to see how it works

VietND96 avatar Jul 22 '25 13:07 VietND96

we are using ALB+NLB for our load balancing, also this would be directly added under annotations under ingress in values file ?

Aditya27041999 avatar Jul 22 '25 13:07 Aditya27041999

Yes, the same place where current nginx.ingress defined

VietND96 avatar Jul 22 '25 13:07 VietND96

@VietND96 I applied these changes, but still I can see 504 gateway timeout, I had to implement retry mechanism in my script

annotations:  
      nginx.ingress.kubernetes.io/ssl-redirect: "true"	
      nginx.ingress.kubernetes.io/force-ssl-redirect: "true"	
      nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
      nginx.ingress.kubernetes.io/proxy-connect-timeout: "3600"
      nginx.ingress.kubernetes.io/proxy-send-timeout: "1800"
      nginx.ingress.kubernetes.io/upstream-keepalive-timeout: "900"
      alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=900
2025-07-22 19:42:13,249 - INFO - āœ… Session 10: All validations passed
2025-07-22 19:42:13,570 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session/6422cc6b86e1de089f2144147a69c5fb -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session/6422cc6b86e1de089f2144147a69c5fb
2025-07-22 19:42:13,604 - INFO - Session 9: WebDriver closed
2025-07-22 19:42:13,605 - INFO - āœ… Session 9 completed successfully
2025-07-22 19:42:14,083 - INFO - Session 10: WebDriver closed
2025-07-22 19:42:14,083 - INFO - āœ… Session 10 completed successfully
2025-07-22 19:46:59,768 - WARNING - Session 7: WebDriver creation failed on attempt 1 - Message: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
</body>
</html>

2025-07-22 19:46:59,769 - WARNING - Session 2: WebDriver creation failed on attempt 1 - Message: <html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h1>504 Gateway Time-out</h1></center>
</body>
</html>

2025-07-22 19:47:00,460 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session
2025-07-22 19:47:00,462 - INFO - Redirecting http://selenium-hub-qas3274.io.coupadev.com/wd/hub/session -> https://selenium-hub-qas3274.io.coupadev.com:443/wd/hub/session
2025-07-22 19:47:03,514 - INFO - Session 7: WebDriver created successfully on attempt 2
2025-07-22 19:47:03,515 - INFO - Session 7: Getting session info...
2025-07-22 19:47:03,590 - INFO - Session 2: WebDriver created successfully on attempt 2
2025-07-22 19:47:03,590 - INFO - Session 2: Getting session info...

Aditya27041999 avatar Jul 22 '25 14:07 Aditya27041999

Can you refer to this https://www.selenium.dev/documentation/webdriver/drivers/http_client/ (Python) Use ClientConfig to set timeout when creating RemoteWebDriver also? However, I am still suspecting the issue around LB. Can you access the AWS console, check the ALB and NLB details to see if there is any configuration related to timeout or idle time, and see their values?

VietND96 avatar Jul 22 '25 15:07 VietND96

I do not know how I can include username and password here ? can I add simply the timeouts and retries?

client_config = ClientConfig(remote_server_addr=grid_server,
                                 proxy=proxy,
                                 init_args_for_pool_manager={
                                     "init_args_for_pool_manager": {"retries": retries, "timeout": timeout}},
                                 ca_certs=_get_resource_path("tls.crt"),
                                 username="admin", password="myStrongPassword")

Aditya27041999 avatar Jul 24 '25 12:07 Aditya27041999

Remove args username/password if your Grid is without basic auth

VietND96 avatar Jul 24 '25 12:07 VietND96

We also got 504 sometimes. We use nginx 1.18.0 (natively on Ubuntu 22.04) and selenium/hub:4.33.0-20250525 in Docker (natively on Ubuntu 22.04), so no Kubernetes. implicitly_wait == 0.

Here is the access logs of one session in trouble:

"GET /wd/hub/session/93dd0ce5762b447eee615e36b80a2899/element/f.3942F8E80DA0F1578534CB475A4CD4B4.d.AB8E6101A6C0183B18AA67AA5915D3C7.e.327/text HTTP/1.1" 200 37 "-" "selenium/4.33.0 (python linux)"
"GET /wd/hub/session/93dd0ce5762b447eee615e36b80a2899/screenshot HTTP/1.1" 200 398840 "-" "selenium/4.33.0 (python linux)"
"POST /wd/hub/session/93dd0ce5762b447eee615e36b80a2899/element HTTP/1.1" 200 127 "-" "selenium/4.33.0 (python linux)"

the rest requests are 504:

"POST /wd/hub/session/93dd0ce5762b447eee615e36b80a2899/element/f.3942F8E80DA0F1578534CB475A4CD4B4.d.AB8E6101A6C0183B18AA67AA5915D3C7.e.328/click HTTP/1.1" 504 176 "-" "selenium/4.33.0 (python linux)"
"GET /wd/hub/session/93dd0ce5762b447eee615e36b80a2899/screenshot HTTP/1.1" 504 176 "-" "selenium/4.33.0 (python linux)"
"DELETE /wd/hub/session/93dd0ce5762b447eee615e36b80a2899 HTTP/1.1" 504 176 "-" "selenium/4.33.0 (python linux)"

It looks like script goes well, but suddenly all consecutive requests are timed out.

The default nginx configuration for gateway timeout is 60 sec. Isn't it enough to complete a click request?

obrizan avatar Aug 22 '25 14:08 obrizan

@obrizan, actually, the connection from client to grid keeps alive after creating RemoteWebDriver. It does not open a new connection for each action (e.g, click, sendKeys, etc.). I guess the gateway will have a different timeout, focus on the read timeout, or the keep-alive timeout, etc. And the timeout config should be greater than or equal to the maximum duration of the longest test execution. Moreover, you can try to add a retry in ClientConfig when establishing RemoteWebDriver (refer to https://www.selenium.dev/documentation/webdriver/drivers/http_client/ ) to see if it helps

VietND96 avatar Aug 22 '25 17:08 VietND96