cuPCL icon indicating copy to clipboard operation
cuPCL copied to clipboard

------------checking CUDA VoxelGrid---------------- Cuda failure: an illegal memory access was encountered at line 138 in file cudaFilter.cpp error status: 700 Aborted

Open le-wei opened this issue 1 year ago • 2 comments

When I use CUDA VoxelGrid for filtering, and use the filtered data to prepare for Cluster. when doing the cudaExtractCluster object construction it reports "------------checking CUDA VoxelGrid---------------- Cuda failure: an illegal memory access was encountered at line 138 in file cudaFilter.cpp error status: 700 Aborted " error. The CUDA VoxelGrid function works fine without cudaExtractCluster object creation, this is my first time programming with this library, please tell me the reason for this error. And how can I program with a mix of library functions. Thanks, I appreciate your help. Here is one of the codes that I have tried several times.

void testCUDA(pcl::PointCloud<pcl::PointXYZ>::Ptr cloudSrc,
              pcl::PointCloud<pcl::PointXYZ>::Ptr cloudDst) {
  std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
  std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();
  std::chrono::duration<double, std::ratio<1, 1000>> time_span =
      std::chrono::duration_cast<
          std::chrono::duration<double, std::ratio<1, 1000>>>(t2 - t1);
  cudaStream_t stream = NULL;
  cudaStreamCreate(&stream);
  unsigned int nCount = cloudSrc->width * cloudSrc->height;
  float *inputData = (float *)cloudSrc->points.data();
  cloudDst->width = nCount;
  cloudDst->height = 1;
  cloudDst->resize(cloudDst->width * cloudDst->height);
  float *outputData = (float *)cloudDst->points.data();
  memset(outputData, 0, sizeof(float) * 4 * nCount);
  std::cout << "\n------------checking CUDA ---------------- " << std::endl;
  std::cout << "CUDA Loaded " << cloudSrc->width * cloudSrc->height
            << " data points from PCD file with the following fields: "
            << pcl::getFieldsList(*cloudSrc) << std::endl;
  float *input = NULL;
  cudaMallocManaged(&input, sizeof(float) * 4 * nCount, cudaMemAttachHost);
  cudaStreamAttachMemAsync(stream, input);
  cudaMemcpyAsync(input, inputData, sizeof(float) * 4 * nCount,
                  cudaMemcpyHostToDevice, stream);
  cudaStreamSynchronize(stream);
  float *output = NULL;
  cudaMallocManaged(&output, sizeof(float) * 4 * nCount, cudaMemAttachHost);
  cudaStreamAttachMemAsync(stream, output);
  cudaStreamSynchronize(stream);
  cudaFilter filterTest;
  FilterParam_t setP;
  FilterType_t type;
  unsigned int countLeft = 0;
  std::cout << "\n------------checking CUDA VoxelGrid---------------- "
            << std::endl;
  type = VOXELGRID;
  setP.type = type;
  setP.voxelX = 0.02;
  setP.voxelY = 0.02;
  setP.voxelZ = 0.02;
  filterTest.set(setP);
  int status = 0;
  cudaDeviceSynchronize();
  t1 = std::chrono::steady_clock::now();
  status = filterTest.filter(output, &countLeft, input, nCount);
  cudaDeviceSynchronize();
  t2 = std::chrono::steady_clock::now();
  if (status != 0)
    return;
  time_span = std::chrono::duration_cast<
      std::chrono::duration<double, std::ratio<1, 1000>>>(t2 - t1);
  std::cout << "CUDA VoxelGrid by Time: " << time_span.count() << " ms."
            << std::endl;
  std::cout << "CUDA VoxelGrid before filtering: " << nCount << std::endl;
  std::cout << "CUDA VoxelGrid after filtering: " << countLeft << std::endl;
  pcl::PointCloud<pcl::PointXYZ>::Ptr cloudNew(
      new pcl::PointCloud<pcl::PointXYZ>);
  cloudNew->width = countLeft;
  cloudNew->height = 1;
  cloudNew->points.resize(cloudNew->width * cloudNew->height);
  int check = 0;
  for (std::size_t i = 0; i < cloudNew->size(); ++i) {
    cloudNew->points[i].x = output[i * 4 + 0];
    cloudNew->points[i].y = output[i * 4 + 1];
    cloudNew->points[i].z = output[i * 4 + 2];
  }
  pcl::io::savePCDFileASCII("after-cuda-VoxelGrid.pcd", *cloudNew);
  {
    cudaStream_t stream2;
    cudaStreamCreate(&stream2);
    float *input2Data = (float *)cloudNew->points.data();
    float *input2 = NULL;
    cudaMallocManaged(&input2, sizeof(float) * 4 * nCount, cudaMemAttachHost);
    cudaStreamAttachMemAsync(stream2, input2);
    cudaMemcpyAsync(input2, input2Data, sizeof(float) * 4 * nCount,
                    cudaMemcpyHostToDevice, stream2);
    cudaStreamSynchronize(stream2);
    float *output2 = NULL;
    cudaMallocManaged(&output2, sizeof(float) * 4 * nCount, cudaMemAttachHost);
    cudaStreamAttachMemAsync(stream2, output2);
    cudaStreamSynchronize(stream2);
    cudaExtractCluster cudaec;
    extractClusterParam_t ecp;
    ecp.minClusterSize = 100;
    ecp.maxClusterSize = 2500000;
    ecp.voxelX = 0.05;
    ecp.voxelY = 0.05;
    ecp.voxelZ = 0.05;
    ecp.countThreshold = 20;
    cudaec.set(ecp);
    unsigned int *indexEC = NULL;
    cudaMallocManaged(&indexEC, sizeof(float) * 4 * nCount, cudaMemAttachHost);
    cudaStreamAttachMemAsync(stream2, indexEC);
    cudaMemsetAsync(indexEC, 0, sizeof(float) * 4 * nCount, stream2);
    cudaStreamSynchronize(stream2);
  }
  cudaFree(input);
  cudaFree(output);
  cudaStreamDestroy(stream);
}

le-wei avatar Jun 27 '23 02:06 le-wei

cudaExtractCluster object and cudaFilter object coexist, cudaExtractCluster object can call extract function to complete the corresponding function, cudaFilter call filter function to complete the corresponding function with the following error:

Cuda failure: an illegal memory access was encountered at line 138 in file cudaFilter.cpp error status: 700 illegal memory access was encountered at line 138 in file cudaFilter.cpp error status: 700
Aborted

I have tested separately that the cudaExtractCluster object and the cudaSegmentation object work when coexisting. cudaFilter object also works fine when coexisting with cudaSegmentation object. Based on the limited combinations, only cudaExtractCluster object and cudaFilter object coexist and cudaFilter does not work well.We have not found the cause of the error yet. Any help would be greatly appreciated.

le-wei avatar Jun 29 '23 02:06 le-wei

cudaExtractCluster object and cudaFilter object coexist, cudaExtractCluster object can call extract function to complete the corresponding function, cudaFilter call filter function to complete the corresponding function with the following error:

Cuda failure: an illegal memory access was encountered at line 138 in file cudaFilter.cpp error status: 700 illegal memory access was encountered at line 138 in file cudaFilter.cpp error status: 700
Aborted

I have tested separately that the cudaExtractCluster object and the cudaSegmentation object work when coexisting. cudaFilter object also works fine when coexisting with cudaSegmentation object. Based on the limited combinations, only cudaExtractCluster object and cudaFilter object coexist and cudaFilter does not work well.We have not found the cause of the error yet. Any help would be greatly appreciated.

请问您解决了吗?我也遇到了同样的问题。

iceriver97 avatar Dec 04 '23 07:12 iceriver97