OpenCL-CTS icon indicating copy to clipboard operation
OpenCL-CTS copied to clipboard

User events in command wait lists cause some implementations to hang

Open jansol opened this issue 2 years ago • 0 comments

When a command is enqueued with a wait list containing user events that have not yet been set to CL_COMPLETE status, the command queue will stop processing any commands on some implementations, even if the driver advertises support for out of order queues and the queue was created with the corresponding feature flag.

According to my understanding of the spec the following sample code should run without problems, but on some drivers it hangs:

#include <CL/opencl.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>

#define STR(x) #x

#define CHECK(x) \
do { \
  err = (x);  \
  if (err != CL_SUCCESS) { \
          printf("CL error %d in "__FILE__":%d: %s\n", err, __LINE__, STR(x)); \
	  return -1; \
  } \
} while (0)

int main (int argc, const char ** argv) {
    cl_int err;
    cl_uint num_platforms;
    CHECK( clGetPlatformIDs(0, NULL, &num_platforms) );
    cl_platform_id *platforms = (cl_platform_id*)malloc(sizeof (cl_platform_id) * num_platforms);
    CHECK( clGetPlatformIDs(num_platforms, platforms, NULL) );

    cl_uint num_devices;
    CHECK( clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices) );
    cl_device_id *devices = (cl_device_id*)malloc(sizeof (cl_device_id) * num_devices);
    CHECK( clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL) );

    cl_context ctx = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err);
    CHECK(err/*clCreateContext*/);

    const char *clsrc = "kernel void incr(global size_t *a) { *a = *a+1; }";
    cl_program prog = clCreateProgramWithSource(ctx, 1, &clsrc, NULL, &err);
    CHECK(err/*clCreateProgram*/);
    CHECK( clBuildProgram(prog, 0, NULL, NULL, NULL, NULL) );

    cl_uint num_kernels;
    CHECK( clCreateKernelsInProgram(prog, 0, NULL, &num_kernels) );
    assert (num_kernels == 1);
    cl_kernel *kernels = (cl_kernel*)malloc(sizeof (cl_kernel) * num_kernels);
    CHECK( clCreateKernelsInProgram(prog, num_kernels, kernels, NULL) );

    cl_queue_properties queue_props[3] = {
        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
        0
    };
    cl_command_queue queue = clCreateCommandQueueWithProperties(
        ctx,
        devices[0],
        queue_props,
        &err
    );
    CHECK(err/*clCreateCommandQueueWithProperties*/);

    size_t one = 1;
    cl_mem buf = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(one), NULL, &err);
    CHECK(err/*clCreateBuffer*/);

    CHECK( clSetKernelArg(kernels[0], 0, sizeof(one), &buf) );

    /*
     * Problems start here
     * */

    cl_event placeholder = clCreateUserEvent(ctx, &err);
    CHECK(err/*clCreateUserEvent*/);

    /* Queue has been created with the Out Of Order Execution property enabled,
     * so this should not block other commands */
    cl_event kernel_event;
    CHECK( clEnqueueNDRangeKernel(queue, kernels[0], 1, NULL, &one, &one, 1, &placeholder, &kernel_event) );

    cl_event buffer_event;
    CHECK( clEnqueueFillBuffer(queue, buf, &one, sizeof(one), 0, sizeof(one), 0, NULL, &buffer_event) );

    printf("Waiting for FillBuffer event...\n");
    CHECK( clWaitForEvents(1, &buffer_event) );

    printf("Marking placeholder user event as complete.\n");
    CHECK( clSetUserEventStatus(placeholder, CL_COMPLETE) );

    printf("Waiting for NDRangeKernel to finish...\n");
    CHECK( clWaitForEvents(1, &kernel_event) );

    printf("All done, exiting.\n");

    return 0;
}

jansol avatar Mar 03 '23 14:03 jansol