gominer
gominer copied to clipboard
investigate memory leak in OpenCL code
gominer has a small memory leak under normal usage that grows over time.
With a tiny worksize where it runs through the OpenCL loop hundreds of times per second, it will leak a few MB in seconds.
Need to add some debug code to find the size of the leak and then pinpoint where exactly it's occurring.
I have a miner monitor which using gominer adl implementation, then found this memory leak(also higher cpu usage).
The problem is that gominer init adl every time when it call doADLCommand, move it to init_adl function solve this problem. I'm lazy to fork, so here is the patch:
diff --git a/gominer/adl/adl.c b/gominer/adl/adl.c
index 7b8b75a..5586f84 100644
--- a/gominer/adl/adl.c
+++ b/gominer/adl/adl.c
@@ -16,11 +16,16 @@
#define MAX_GPUDEVICES 16
+static int iNumberAdapters;
+static LPAdapterInfo lpInfo = NULL;
+static bool adl_active = 0;
+
// declarations in adl_functions.h for these are formatted for dynamic loading
int ADL_Adapter_AdapterInfo_Get(LPAdapterInfo lpInfo, int iInputSize);
int ADL_Adapter_ID_Get(int iAdapterIndex, int *lpAdapterID);
int ADL_Adapter_NumberOfAdapters_Get(int *lpNumAdapters);
int ADL_Main_Control_Create(ADL_MAIN_MALLOC_CALLBACK callback, int iEnumConnectedAdapters);
+int ADL_Main_Control_Destroy();
int ADL_Overdrive5_FanSpeed_Get(int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue);
int ADL_Overdrive5_FanSpeed_Set(int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue);
int ADL_Overdrive5_FanSpeedToDefault_Set(int iAdapaterIndex, int iThermalControllerIndex);
@@ -53,22 +58,17 @@ static void __stdcall ADL_Main_Memory_Free (void **lpBuffer)
}
}
-int doADLCommand(int deviceid, char field[64], int arg) {
- int result, i, j, devices = 0, last_adapter = -1, gpu = 0, dummy = 0;
- int iNumberAdapters;
- struct gpu_adapters adapters[MAX_GPUDEVICES], vadapters[MAX_GPUDEVICES];
- bool devs_match = true;
- ADLBiosInfo BiosInfo;
- LPAdapterInfo lpInfo = NULL;
+void init_adl() {
+ int result;
if (ADL_OK != ADL_Main_Control_Create(ADL_Main_Memory_Alloc, 1)) {
- return 0;
+ return;
}
// Obtain the number of adapters for the system
result = ADL_Adapter_NumberOfAdapters_Get(&iNumberAdapters);
if (result != ADL_OK) {
- return 0;
+ return;
}
if (iNumberAdapters > 0) {
@@ -79,9 +79,32 @@ int doADLCommand(int deviceid, char field[64], int arg) {
// Get the AdapterInfo structure for all adapters in the system
result = ADL_Adapter_AdapterInfo_Get (lpInfo, sizeof (AdapterInfo) * iNumberAdapters);
if (result != ADL_OK) {
- return 0;
+ return;
}
} else {
+ return;
+ }
+
+ /* Flag adl as active if any card is successfully activated */
+ adl_active = true;
+
+ return;
+}
+
+void free_adl(void)
+{
+ adl_active = false;
+ ADL_Main_Memory_Free((void **)&lpInfo);
+ ADL_Main_Control_Destroy();
+}
+
+int doADLCommand(int deviceid, char field[64], int arg) {
+ int result, i, j, devices = 0, last_adapter = -1, gpu = 0, dummy = 0;
+ struct gpu_adapters adapters[MAX_GPUDEVICES], vadapters[MAX_GPUDEVICES];
+ bool devs_match = true;
+ ADLBiosInfo BiosInfo;
+
+ if (!adl_active) {
return 0;
}
diff --git a/gominer/adl/adl.go b/gominer/adl/adl.go
index 636eb6f..e9762e2 100644
--- a/gominer/adl/adl.go
+++ b/gominer/adl/adl.go
@@ -8,6 +8,8 @@ package adl
#include <stddef.h>
#include <stdbool.h>
#include <adl_sdk.h>
+void init_adl();
+void free_adl();
int getADLFanPercent(int deviceid);
int getADLTemp(int deviceid);
int getADLActivity(int deviceid);
@@ -18,6 +20,14 @@ int setADLFanPercent(int deviceid, int fanPercent);
*/
import "C"
+func Init() {
+ C.init_adl()
+}
+
+func Release() {
+ C.free_adl()
+}
+
// DeviceFanGetPercent fetches and returns fan utilization for a device index
func DeviceFanGetPercent(index int) uint32 {
fanPercent := uint32(0)
I was looking at this again and the problem seems to lie in the OpenCL library that we use. I was able to reproduce the issue by modifying one of the demos/examples:
https://github.com/rainliu/gocl/issues/9