CV-CUDA
CV-CUDA copied to clipboard
OpResize Bug[BUG]
Describe the bug
For the Resize operation, given an input image of 1280x960:
-
Resizing to 320x240 results in a normal outcome.
-
Resizing to 300x225 results in an abnormal outcome.
Steps/Code to reproduce bug
- Read the image using cv2.
- Call the Resize operator, setting the width and height to 320x240 and 300x225, respectively.
- Export and save the image.
Environment overview (please complete the following information) CV-CUDA Release v0.7.0
code:
int main(int argc, char* argv[])
{
cudaStream_t stream;
cudaStreamCreate(&stream);
int resize_width = atoi(argv[1]);
int resize_height = atoi(argv[2]);
cv::Mat input_image = cv::imread("test.png");
printf("width: %d, height: %d\n", input_image.cols, input_image.rows);
uint32_t batch_size = 1;
int image_width = input_image.size().width;
int image_height = input_image.size().height;
int channels = 3;
int img_size = image_width * image_height * channels * sizeof(uint8_t);
// 分配 input tensor 显存
nvcv::TensorDataStridedCuda::Buffer inBuf;
inBuf.strides[3] = sizeof(uint8_t);
inBuf.strides[2] = channels * inBuf.strides[3];
inBuf.strides[1] = image_width * inBuf.strides[2];
inBuf.strides[0] = image_height * inBuf.strides[1];
cudaMallocAsync(&inBuf.basePtr, batch_size * img_size, stream);
nvcv::Tensor::Requirements inReqs
= nvcv::Tensor::CalcRequirements(batch_size, {image_width, image_height}, nvcv::FMT_RGB8);
// Create a tensor buffer to store the data pointer and pitch bytes for each plane
nvcv::TensorDataStridedCuda inData(nvcv::TensorShape{inReqs.shape, inReqs.rank, inReqs.layout},
nvcv::DataType{inReqs.dtype}, inBuf);
// TensorWrapData allows for interoperation of external tensor representations with CVCUDA Tensor.
nvcv::Tensor inTensor = nvcv::TensorWrapData(inData);
uint8_t *gpu_input = reinterpret_cast<uint8_t *>(inBuf.basePtr);
cudaMemcpyAsync(gpu_input, input_image.data, img_size, cudaMemcpyHostToDevice);
nvcv::Tensor resizedTensor(batch_size, {resize_width, resize_height}, nvcv::FMT_RGB8);
cvcuda::Resize resizeOp;
resizeOp(stream, inTensor, resizedTensor, NVCV_INTERP_LINEAR);
auto srcData = resizedTensor.exportData<nvcv::TensorDataStridedCuda>();
cudaStreamSynchronize(stream);
cv::Mat gpu_out_image(resize_height, resize_width, CV_8UC3);
cudaMemcpyAsync(gpu_out_image.data, (const unsigned char *)srcData->basePtr(), sizeof(uint8_t) * resize_width * resize_height * 3, cudaMemcpyDeviceToHost);
cv::imwrite("resize_result.jpg", gpu_out_image);
cudaFree(inBuf.basePtr);
return 0;
}
Hi @zhkuo24 , thank you very much for reporting the issue and the super clear steps for reproduction. We have reproduced the issues on our end and are starting the root causing towards a fix.
Checked the CVCUDA code, and we added some additional tests for these resolutions. We were able to repo the issue; The issue is that:
//This can create a strided tensor (not always if there is good alignment) nvcv::Tensor resizedTensor(batch_size, {resize_width, resize_height}, nvcv::FMT_RGB8);
//but then this assumes that the rows have no strides, so you are copying the non image pixels cv::Mat gpu_out_image(resize_height, resize_width, CV_8UC3); cudaMemcpyAsync(gpu_out_image.data, (const unsigned char *)srcData->basePtr(), sizeof(uint8_t) * resize_width * resize_height * 3, cudaMemcpyDeviceToHost);
The best solution to this is to use cudaMemCpy2D which will allow you to provide source and destination strides.
Thanks
Checked the CVCUDA code, and we added some additional tests for these resolutions. We were able to repo the issue; The issue is that:
//This can create a strided tensor (not always if there is good alignment) nvcv::Tensor resizedTensor(batch_size, {resize_width, resize_height}, nvcv::FMT_RGB8);
//but then this assumes that the rows have no strides, so you are copying the non image pixels cv::Mat gpu_out_image(resize_height, resize_width, CV_8UC3); cudaMemcpyAsync(gpu_out_image.data, (const unsigned char *)srcData->basePtr(), sizeof(uint8_t) * resize_width * resize_height * 3, cudaMemcpyDeviceToHost);
The best solution to this is to use cudaMemCpy2D which will allow you to provide source and destination strides.
Thanks
nvcv::Tensor resizedTensor(batch_size, {resize_width, resize_height}, nvcv::FMT_RGB8);
has problem, should use nvcv::TensorWrapData(inData)
to create Tensor