ROCR-Runtime icon indicating copy to clipboard operation
ROCR-Runtime copied to clipboard

leaks caused by pthread_attr_setaffinity_np / with fix / plz fix this in the next version

Open thenumbernine opened this issue 3 years ago • 0 comments

Hello, It looks like your use of CPU_ALLOC is incorrect and causing memory leaks. The use case at https://linux.die.net/man/3/cpu_alloc demonstrates the correct way to implement what you are attempting but without causing leaks. With your current implementation, valgrind says any single call to linux OpenCL is causing leaks in ROCR A diff of a fix using the linux.die.net example is provided below.

#include <CL/cl.h>
#include <stdio.h>
int main() {
	cl_int err = 0;

//count platforms ... causes memory leaks
#if 0
	cl_uint numPlatforms = 0;
	err = clGetPlatformIDs(0, NULL, &numPlatforms);
	if (err != CL_SUCCESS) {
		fprintf(stderr, "clGetPlatformIDs failed\n");
		return 1;
	}
	printf("numPlatforms %u\n", numPlatforms);
#endif

//how about a faulty call to opencl to do absolutely nothing?
//yup, leaks
#if 1
	err = clGetPlatformIDs(0, NULL, NULL);
	fprintf(stderr, "clGetPlatformIDs(0, NULL, NULL) error code: %d\n", err);
#endif

	return 0;
}

causes valgrind to complain:

==5429== Syscall param sched_setaffinity(mask) points to uninitialised byte(s)
==5429==    at 0x4A6A326: create_thread (createthread.c:124)
==5429==    by 0x4A6BE0F: pthread_create@@GLIBC_2.2.5 (pthread_create.c:817)
==5429==    by 0x51FF4A5: rocr::os::CreateThread(void (*)(void*), void*, unsigned int) (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x5257CDC: rocr::core::Runtime::SetAsyncSignalHandler(hsa_signal_s, hsa_signal_condition_t, long, bool (*)(long, void*), void*) (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x525D1D6: rocr::core::Runtime::Load() (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x525D4C4: rocr::core::Runtime::Acquire() (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x52358F9: rocr::HSA::hsa_init() (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x4F38514: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4F0365D: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4F2CA55: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4EEB9A4: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4A7447E: __pthread_once_slow (pthread_once.c:116)
==5429==  Address 0x5b86f62 is 2 bytes inside a block of size 8 alloc'd
==5429==    at 0x483B723: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==5429==    by 0x483E017: realloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==5429==    by 0x4A77839: pthread_attr_setaffinity_np@@GLIBC_2.3.4 (pthread_attr_setaffinity.c:45)
==5429==    by 0x51FF488: rocr::os::CreateThread(void (*)(void*), void*, unsigned int) (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x5257CDC: rocr::core::Runtime::SetAsyncSignalHandler(hsa_signal_s, hsa_signal_condition_t, long, bool (*)(long, void*), void*) (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x525D1D6: rocr::core::Runtime::Load() (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x525D4C4: rocr::core::Runtime::Acquire() (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x52358F9: rocr::HSA::hsa_init() (in /opt/rocm-4.5.1/lib/libhsa-runtime64.so.1.4.40501)
==5429==    by 0x4F38514: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4F0365D: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4F2CA55: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)
==5429==    by 0x4EEB9A4: ??? (in /opt/rocm-4.5.1/opencl/lib/libamdocl64.so)

and has accompanying stack trace from GDB:

Program received signal SIGTRAP, Trace/breakpoint trap.
create_thread (pd=pd@entry=0x6657700, attr=attr@entry=0x1ffefff780, stopped_start=stopped_start@entry=0x1ffefff6ee, stackaddr=stackaddr@entry=0x6656fc0, thread_ran=thread_ran@entry=0x1ffefff6ef) at ../sysdeps/unix/sysv/linux/createthread.c:124
124 ../sysdeps/unix/sysv/linux/createthread.c: No such file or directory.
(gdb) bt
#0  create_thread (pd=pd@entry=0x6657700, attr=attr@entry=0x1ffefff780, stopped_start=stopped_start@entry=0x1ffefff6ee, stackaddr=stackaddr@entry=0x6656fc0, thread_ran=thread_ran@entry=0x1ffefff6ef) at ../sysdeps/unix/sysv/linux/createthread.c:124
#1  0x0000000004a6be10 in __pthread_create_2_1 (newthread=<optimized out>, attr=<optimized out>, start_routine=<optimized out>, arg=<optimized out>) at pthread_create.c:817
#2  0x00000000051ff4a6 in rocr::os::CreateThread(void (*)(void*), void*, unsigned int) () from /opt/rocm-4.5.1/opencl/lib/../../lib/libhsa-runtime64.so.1
#3  0x0000000005257cdd in rocr::core::Runtime::SetAsyncSignalHandler(hsa_signal_s, hsa_signal_condition_t, long, bool (*)(long, void*), void*) () from /opt/rocm-4.5.1/opencl/lib/../../lib/libhsa-runtime64.so.1
#4  0x000000000525d1d7 in rocr::core::Runtime::Load() () from /opt/rocm-4.5.1/opencl/lib/../../lib/libhsa-runtime64.so.1
#5  0x000000000525d4c5 in rocr::core::Runtime::Acquire() () from /opt/rocm-4.5.1/opencl/lib/../../lib/libhsa-runtime64.so.1
#6  0x00000000052358fa in rocr::HSA::hsa_init() () from /opt/rocm-4.5.1/opencl/lib/../../lib/libhsa-runtime64.so.1
#7  0x0000000004f38515 in ?? () from /opt/rocm-4.5.1/opencl/lib/libamdocl64.so
#8  0x0000000004f0365e in ?? () from /opt/rocm-4.5.1/opencl/lib/libamdocl64.so
#9  0x0000000004f2ca56 in ?? () from /opt/rocm-4.5.1/opencl/lib/libamdocl64.so
#10 0x0000000004eeb9a5 in ?? () from /opt/rocm-4.5.1/opencl/lib/libamdocl64.so
#11 0x0000000004a7447f in __pthread_once_slow (once_control=0x51d1900, init_routine=0x5788c20 <__once_proxy>) at pthread_once.c:116
#12 0x0000000004eebab9 in clIcdGetPlatformIDsKHR () from /opt/rocm-4.5.1/opencl/lib/libamdocl64.so
#13 0x0000000004869565 in ?? () from /lib/x86_64-linux-gnu/libOpenCL.so.1
#14 0x000000000486b64e in ?? () from /lib/x86_64-linux-gnu/libOpenCL.so.1
#15 0x0000000004a7447f in __pthread_once_slow (once_control=0x486f100, init_routine=0x486b4e0) at pthread_once.c:116
#16 0x0000000004869be5 in clGetPlatformIDs () from /lib/x86_64-linux-gnu/libOpenCL.so.1
#17 0x0000000000109190 in main () at main.c:10
(gdb) cont
Continuing.
[New Thread 5459]

I didn't rebuild ROCR with the following fixes, but I did copy ROCR's thread code into a minimal test case and observed it leaked as well. And then fixed it. The fix is to call CPU_ZERO_S before CPU_SET, and to use CPU_SET_S with the CPU_ALLOC_SIZE

#if 1 //Original code leaks:
    int cores = get_nprocs_conf();
    cpu_set_t* cpuset = CPU_ALLOC(cores);
    for(int i=0; i<cores; i++){
      CPU_SET(i, cpuset);
    }
    int err = pthread_attr_setaffinity_np(&attrib, CPU_ALLOC_SIZE(cores), cpuset);
    assert(err == 0 && "pthread_attr_setaffinity_np failed.");
    CPU_FREE(cpuset);
#endif
#if 1
    { //using example at: https://linux.die.net/man/3/cpu_alloc_size 
        int cores = get_nprocs_conf();
        size_t cpuAllocSize = CPU_ALLOC_SIZE(cores);
        cpu_set_t* cpuset = CPU_ALLOC(cores);
        assert(cpuset);
        CPU_ZERO_S(cpuAllocSize, cpuset);
        for(int i=0; i<cores; i++){
          CPU_SET_S(i, cpuAllocSize, cpuset);
        }
        int err = pthread_attr_setaffinity_np(&attrib, cpuAllocSize, cpuset);
        assert(err == 0 && "pthread_attr_setaffinity_np failed.");
        CPU_FREE(cpuset);
    }
#endif

Here's a patch file for git:

diff --git a/src/core/util/lnx/os_linux.cpp b/src/core/util/lnx/os_linux.cpp
index 6c27611..f142e57 100644
--- a/src/core/util/lnx/os_linux.cpp
+++ b/src/core/util/lnx/os_linux.cpp
@@ -101,10 +101,12 @@ class os_thread {
 
     int cores = get_nprocs_conf();
     cpu_set_t* cpuset = CPU_ALLOC(cores);
-    for(int i=0; i<cores; i++){
-      CPU_SET(i, cpuset);
+    size_t cpuAllocSize = CPU_ALLOC_SIZE(cores);
+	CPU_ZERO_S(cpuAllocSize, cpuset);
+	for(int i=0; i<cores; i++){
+      CPU_SET_S(i, cpuAllocSize, cpuset);
     }
-    int err = pthread_attr_setaffinity_np(&attrib, CPU_ALLOC_SIZE(cores), cpuset);
+    int err = pthread_attr_setaffinity_np(&attrib, cpuAllocSize, cpuset);
     assert(err == 0 && "pthread_attr_setaffinity_np failed.");
     CPU_FREE(cpuset);
 

thenumbernine avatar Jan 26 '22 08:01 thenumbernine