bottlerocket icon indicating copy to clipboard operation
bottlerocket copied to clipboard

EGL offsreen rendering wont initialize in cudagl image running under bottlerocket

Open BG4444 opened this issue 1 month ago • 2 comments

Image I'm using:

ARG UBUNTU_RELEASE=20.04
ARG CUDA_VERSION=11.2.2
FROM nvcr.io/nvidia/cudagl:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_RELEASE}

What I expected to happen: The code below should work with no issues

#!/usr/bin/env python3
import os
os.environ.setdefault("PYOPENGL_PLATFORM", "egl")

from OpenGL import EGL as egl
from OpenGL import GL as gl
import ctypes
import sys


def get_egl_devices():
    # Load extension functions
    eglQueryDevicesEXT = egl.eglGetProcAddress(b"eglQueryDevicesEXT")
    eglQueryDeviceStringEXT = egl.eglGetProcAddress(b"eglQueryDeviceStringEXT")

    if not eglQueryDevicesEXT or not eglQueryDeviceStringEXT:
        print("EGL_EXT_device_enumeration not available – will try DEFAULT_DISPLAY path.")
        return []

    PFNEGLQUERYDEVICESEXTPROC = ctypes.CFUNCTYPE(
        ctypes.c_uint, ctypes.c_int, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_int)
    )
    PFNEGLQUERYDEVICESTRINGEXTPROC = ctypes.CFUNCTYPE(
        ctypes.c_char_p, ctypes.c_void_p, ctypes.c_int
    )

    eglQueryDevicesEXT = PFNEGLQUERYDEVICESEXTPROC(eglQueryDevicesEXT)
    eglQueryDeviceStringEXT = PFNEGLQUERYDEVICESTRINGEXTPROC(eglQueryDeviceStringEXT)

    max_devices = 16
    devices = (ctypes.c_void_p * max_devices)()
    num = ctypes.c_int()

    ok = eglQueryDevicesEXT(max_devices, devices, ctypes.byref(num))
    if not ok:
        print("eglQueryDevicesEXT failed")
        return []

    print(f"Found {num.value} EGL device(s)")
    out = []
    for i in range(num.value):
        dev = devices[i]
        vendor = eglQueryDeviceStringEXT(dev, 0x3053)  # EGL_VENDOR
        exts = eglQueryDeviceStringEXT(dev, 0x3055)    # EGL_EXTENSIONS
        print(f"[{i}] device={dev} vendor={vendor.decode() if vendor else 'N/A'}")
        if exts:
            print(f"     extensions: {exts.decode()}")
        out.append(dev)
    return out


def create_display_from_device(dev):
    # eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, dev, NULL)
    eglGetPlatformDisplayEXT = egl.eglGetProcAddress(b"eglGetPlatformDisplayEXT")
    if not eglGetPlatformDisplayEXT:
        return None

    PFNEGLGETPLATFORMDISPLAYEXTPROC = ctypes.CFUNCTYPE(
        ctypes.c_void_p, ctypes.c_uint, ctypes.c_void_p, ctypes.POINTER(ctypes.c_int)
    )
    eglGetPlatformDisplayEXT = PFNEGLGETPLATFORMDISPLAYEXTPROC(eglGetPlatformDisplayEXT)

    EGL_PLATFORM_DEVICE_EXT = 0x313F
    dpy = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, dev, None)
    return dpy


def main():
    devices = get_egl_devices()

    if devices:
        dpy = create_display_from_device(devices[0])
        if not dpy:
            print("Could not create display from device, falling back to DEFAULT_DISPLAY.")
            dpy = egl.eglGetDisplay(egl.EGL_DEFAULT_DISPLAY)
    else:
        # fallback: default display
        dpy = egl.eglGetDisplay(egl.EGL_DEFAULT_DISPLAY)

    if dpy == egl.EGL_NO_DISPLAY:
        print("Failed to get EGL display")
        sys.exit(1)

    major, minor = ctypes.c_int(), ctypes.c_int()
    if not egl.eglInitialize(dpy, ctypes.byref(major), ctypes.byref(minor)):
        print("eglInitialize failed")
        sys.exit(1)
    print(f"EGL initialized: version {major.value}.{minor.value}")

    # Choose a config for pbuffer
    attribs = (ctypes.c_int * 13)(
        egl.EGL_SURFACE_TYPE, egl.EGL_PBUFFER_BIT,
        egl.EGL_RENDERABLE_TYPE, egl.EGL_OPENGL_BIT,
        egl.EGL_RED_SIZE, 8,
        egl.EGL_GREEN_SIZE, 8,
        egl.EGL_BLUE_SIZE, 8,
        egl.EGL_NONE
    )
    num_configs = ctypes.c_int()
    config = ctypes.c_void_p()
    if not egl.eglChooseConfig(dpy, attribs, ctypes.byref(config), 1, ctypes.byref(num_configs)) or not num_configs.value:
        print("eglChooseConfig failed")
        sys.exit(1)

    # Create a tiny 1x1 pbuffer
    pbuf_attribs = (ctypes.c_int * 5)(
        egl.EGL_WIDTH, 1,
        egl.EGL_HEIGHT, 1,
        egl.EGL_NONE
    )
    surf = egl.eglCreatePbufferSurface(dpy, config, pbuf_attribs)
    if surf == egl.EGL_NO_SURFACE:
        print("eglCreatePbufferSurface failed")
        sys.exit(1)

    # Bind OpenGL API
    if not egl.eglBindAPI(egl.EGL_OPENGL_API):
        print("eglBindAPI(EGL_OPENGL_API) failed")
        sys.exit(1)

    # Create an OpenGL context
    ctx = egl.eglCreateContext(dpy, config, egl.EGL_NO_CONTEXT, None)
    if ctx == egl.EGL_NO_CONTEXT:
        print("eglCreateContext failed")
        sys.exit(1)

    # Make it current
    if not egl.eglMakeCurrent(dpy, surf, surf, ctx):
        print("eglMakeCurrent failed")
        sys.exit(1)

    # Now we should have a GL context; test it
    vendor = gl.glGetString(gl.GL_VENDOR)
    renderer = gl.glGetString(gl.GL_RENDERER)
    version = gl.glGetString(gl.GL_VERSION)
    print("GL_VENDOR  :", vendor.decode() if vendor else "None")
    print("GL_RENDERER:", renderer.decode() if renderer else "None")
    print("GL_VERSION :", version.decode() if version else "None")

    # Try a trivial clear (offscreen)
    gl.glViewport(0, 0, 1, 1)
    gl.glClearColor(0.2, 0.4, 0.6, 1.0)
    gl.glClear(gl.GL_COLOR_BUFFER_BIT)
    print("Offscreen clear OK")

    # Cleanup
    egl.eglMakeCurrent(dpy, egl.EGL_NO_SURFACE, egl.EGL_NO_SURFACE, egl.EGL_NO_CONTEXT)
    egl.eglDestroySurface(dpy, surf)
    egl.eglDestroyContext(dpy, ctx)
    egl.eglTerminate(dpy)
    print("EGL cleanup done")


if __name__ == "__main__":
    main()

What actually happened:

egl.eglInitialize fails with error EGL_NOT_INITIALIZED

How to reproduce the problem:

run the docker image, then run the python code. PyOpenGL package needed

BG4444 avatar Nov 12 '25 19:11 BG4444

@BG4444 Can you provide information on what Bottlerocket version you are on? Did this work on a previous version, and is now broken? Or are you trying for the first time.

KCSesh avatar Nov 13 '25 15:11 KCSesh

Thank you for quick reply @KCSesh ! Here is my setup

ami-06b81755456694293 aws-k8s-1.32-nvidia/x86_64 bottlerocket-aws-k8s-1.32-nvidia-x86_64-v1.38.0-78594e2e

This was my first attempt to run this app on Bottlerocket. I have the same setup on a g4n instance running Ubuntu, and it works fine

BG4444 avatar Nov 13 '25 16:11 BG4444