Calculating depth map from stereo images on M350
I have some questions regarding the stereovision sample in samples/sample_c++/module_sample/perception/test_perception_entry.cpp
- The camera directions, e.g.
DJI_PERCEPTION_RECTIFY_FRONTare named such that they are implied to be rectified streams. Can you please confirm if they are in fact rectified? - Do you have code samples for creating a depth map from the pairs of stereo images? I have successfully visualized the disparity and created a depth map using an OpenCV sample, but it appears that the units are incorrect in my calculation (see below).
static void *DjiTest_StereoImagesDisplayTask(void *arg)
{
T_DjiOsalHandler *osalHandler = DjiPlatform_GetOsalHandler();
auto *pack = (T_DjiTestStereoImagePacket *) arg;
char nameStr[32] = {0};
char fpsStr[20] = "FPS: ";
int fps = 0;
double timePrev[USER_PERCEPTION_DIRECTION_NUM] = {0};
double timeNow[USER_PERCEPTION_DIRECTION_NUM] = {0};
double timeFess[USER_PERCEPTION_DIRECTION_NUM] = {0};
int count[USER_PERCEPTION_DIRECTION_NUM] = {1};
char showFpsString[USER_PERCEPTION_DIRECTION_NUM][FPS_STRING_LEN] = {0};
int i = 0;
cv::Mat display;
cv::Mat image_l;
cv::Mat image_r;
cv::Mat disparity_left;
cv::Mat disparity_right;
cv::Mat filtered_disparity;
cv::Mat disparity;
cv::Mat depth;
cv::Mat cameraMatrix1 = (cv::Mat_<double>(3, 3) <<
camera_parameters.leftIntrinsics[0], camera_parameters.leftIntrinsics[1], camera_parameters.leftIntrinsics[2],
camera_parameters.leftIntrinsics[3], camera_parameters.leftIntrinsics[4], camera_parameters.leftIntrinsics[5],
camera_parameters.leftIntrinsics[6], camera_parameters.leftIntrinsics[7], camera_parameters.leftIntrinsics[8]);
cv::Mat cameraMatrix2 = (cv::Mat_<double>(3, 3) <<
camera_parameters.rightIntrinsics[0], camera_parameters.rightIntrinsics[1], camera_parameters.rightIntrinsics[2],
camera_parameters.rightIntrinsics[3], camera_parameters.rightIntrinsics[4], camera_parameters.rightIntrinsics[5],
camera_parameters.rightIntrinsics[6], camera_parameters.rightIntrinsics[7], camera_parameters.rightIntrinsics[8]);
// Convert the rotation matrix to CV_64F
cv::Mat R = (cv::Mat_<double>(3,3) <<
static_cast<double>(camera_parameters.rotationLeftInRight[0]), static_cast<double>(camera_parameters.rotationLeftInRight[1]), static_cast<double>(camera_parameters.rotationLeftInRight[2]),
static_cast<double>(camera_parameters.rotationLeftInRight[3]), static_cast<double>(camera_parameters.rotationLeftInRight[4]), static_cast<double>(camera_parameters.rotationLeftInRight[5]),
static_cast<double>(camera_parameters.rotationLeftInRight[6]), static_cast<double>(camera_parameters.rotationLeftInRight[7]), static_cast<double>(camera_parameters.rotationLeftInRight[8]));
// Convert the translation vector to CV_64F
cv::Mat T = (cv::Mat_<double>(3,1) <<
static_cast<double>(camera_parameters.translationLeftInRight[0]), static_cast<double>(camera_parameters.translationLeftInRight[1]), static_cast<double>(camera_parameters.translationLeftInRight[2]));
// Distortion coefficients should also be CV_64F
cv::Mat distCoeffs1 = cv::Mat::zeros(5, 1, CV_64F); // Zero distortion for left camera
cv::Mat distCoeffs2 = cv::Mat::zeros(5, 1, CV_64F); // Zero distortion for right camera
cv::Size image_size = cv::Size(640, 480); // Adjust based on your image resolution
cv::Mat R1, R2, P1, P2, Q;
std::cout << "before stereo rectification: " << std::endl;
// Output the input parameters
std::cout << "cameraMatrix1 (" << type_string(cameraMatrix1) << "):\n" << cameraMatrix1 << std::endl;
std::cout << "distCoeffs1 (" << type_string(distCoeffs1) << "):\n" << distCoeffs1 << std::endl;
std::cout << "cameraMatrix2 (" << type_string(cameraMatrix2) << "):\n" << cameraMatrix2 << std::endl;
std::cout << "distCoeffs2 (" << type_string(distCoeffs2) << "):\n" << distCoeffs2 << std::endl;
std::cout << "R (Rotation Matrix) (" << type_string(R) << "):\n" << R << std::endl;
std::cout << "T (Translation Vector) (" << type_string(T) << "):\n" << T << std::endl;
std::cout << "image_size: " << image_size.width << "x" << image_size.height << std::endl;
cv::stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2,
image_size, R, T, R1, R2, P1, P2, Q,
cv::CALIB_ZERO_DISPARITY, -1, image_size);
std::cout << "Output parameters from stereoRectify:" << std::endl;
std::cout << "R1 (Rectification Transform 1) (" << type_string(R1) << "):\n" << R1 << std::endl;
std::cout << "R2 (Rectification Transform 2) (" << type_string(R2) << "):\n" << R2 << std::endl;
std::cout << "P1 (Projection Matrix 1) (" << type_string(P1) << "):\n" << P1 << std::endl;
std::cout << "P2 (Projection Matrix 2) (" << type_string(P2) << "):\n" << P2 << std::endl;
std::cout << "Q (" << type_string(Q) << "):\n" << Q << std::endl;
std::cout << "after stereo rectification: " << std::endl;
auto last_execution_time = std::chrono::high_resolution_clock::now();
auto now = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - last_execution_time);
while (true)
{
osalHandler->TaskSleepMs(1);
#ifdef OPEN_CV_INSTALLED
/*! Get data here */
osalHandler->MutexLock(pack->mutex);
if (!pack->gotData)
{
osalHandler->MutexUnlock(pack->mutex);
continue;
}
cv::Mat image = cv::Mat(pack->info.rawInfo.height, pack->info.rawInfo.width, CV_8U);
int copySize = pack->info.rawInfo.height * pack->info.rawInfo.width;
if (pack->imageRawBuffer)
{
memcpy(image.data, pack->imageRawBuffer, copySize);
osalHandler->Free(pack->imageRawBuffer);
pack->imageRawBuffer = NULL;
}
for (i = 0; i < sizeof(positionName) / sizeof(T_DjiTestPerceptionCameraPositionName); ++i)
{
if (positionName[i].cameraPosition == pack->info.dataType)
{
if( i % 2 == 0 )
{
image_l = image.clone();
}
else
{
image_r = image.clone();
}
sprintf(nameStr, "Image position: %s", positionName[i].name);
break;
}
}
pack->gotData = false;
osalHandler->MutexUnlock(pack->mutex);
if( ! image_l.empty() && ! image_r.empty() )
{
now = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - last_execution_time);
if( duration.count() > 100 )
{
last_execution_time = now;
cv::Mat left_for_matcher, right_for_matcher;
cv::Mat left_disp, right_disp;
cv::Mat filtered_disp;
left_for_matcher = image_l.clone();
right_for_matcher = image_r.clone();
int max_disp = 128;
int wsize = 19;
cv::Ptr<cv::StereoBM> left_matcher = cv::StereoBM::create(max_disp, wsize);
cv::Ptr<cv::StereoMatcher> right_matcher = cv::ximgproc::createRightMatcher(left_matcher);
cv::Ptr<cv::ximgproc::DisparityWLSFilter> wls_filter;
// cv::Ptr<cv::ximgproc::DisparityWLSFilter> wlsFilter = cv::ximgproc::createDisparityWLSFilter(stereoBM);
// Ptr<StereoSGBM> left_matcher = StereoSGBM::create(0,max_disp,wsize);
// left_matcher->setP1(24*wsize*wsize);
// left_matcher->setP2(96*wsize*wsize);
// left_matcher->setPreFilterCap(63);
// left_matcher->setMode(StereoSGBM::MODE_SGBM_3WAY);
// wls_filter = createDisparityWLSFilter(left_matcher);
// Ptr<StereoMatcher> right_matcher = createRightMatcher(left_matcher);
left_matcher-> compute(left_for_matcher, right_for_matcher, left_disp);
right_matcher->compute(right_for_matcher, left_for_matcher, right_disp);
Mat conf_map;
conf_map.create(image_l.size(), CV_8U);
conf_map = Scalar(255);
double lambda = 8000;
double sigma = 1.5;
wls_filter = cv::ximgproc::createDisparityWLSFilter(left_matcher);
// wls_filter->setLambda(lambda);
// wls_filter->setSigmaColor(sigma);
wls_filter->filter(left_disp, image_l, filtered_disp, right_disp);
// wlsFilter->filter(disparity_left, down_l, filtered_disparity, -1 * disparity_right); // -16 ???
conf_map = wls_filter->getConfidenceMap();
double fbs_spatial = 16;
double fbs_luma = 8.0;
double fbs_chroma = 8.0;
double fbs_lambda = 0.5;
cv::Mat solved_disp;
fastBilateralSolverFilter(image_l, left_disp, conf_map/255.0f, solved_disp, fbs_spatial, fbs_luma, fbs_chroma, fbs_lambda);
cv::Mat depth;
cv::reprojectImageTo3D(solved_disp, depth, Q, true, CV_32F);
std::vector<cv::Mat> channels(3);
cv::split(depth, channels); // Split the 3 channels into separate single-channel images
cv::Mat z_channel = channels[2];
z_channel.setTo(0, z_channel == -std::numeric_limits<float>::infinity());
z_channel.setTo(0, z_channel == std::numeric_limits<float>::infinity());
z_channel.setTo(0, z_channel != z_channel);
// Get the center point of the image
int centerX = z_channel.cols / 2;
int centerY = z_channel.rows / 2;
// Retrieve the depth value at the center point
float centerDepthValue = z_channel.at<float>(centerY, centerX);
std::cout << "Depth value at center (" << centerX << ", " << centerY << "): " << centerDepthValue << " meters" << std::endl;
double minVal, maxVal;
cv::minMaxLoc(z_channel, &minVal, &maxVal);
std::cout << "z_channel Min: " << minVal << " Max: " << maxVal << std::endl;
cv::Mat visualization;
//cv::normalize(z_channel, visualization, 0, 255, cv::NORM_MINMAX, CV_8U);
// cv::normalize(z_channel, visualization, 0, 255, cv::NORM_MINMAX, CV_32F);
// visualization.convertTo(visualization, CV_8U);
// getDisparityVis(filtered_disp, visualization, 50);
getDisparityVis(solved_disp, visualization, 5);
// getDisparityVis(z_channel, visualization, 5);
cv::Mat visualization_rgb;
cv::cvtColor(visualization, visualization_rgb, cv::COLOR_GRAY2RGB);
// Draw a circle at the center point
cv::circle(visualization_rgb, cv::Point(centerX, centerY), 5, cv::Scalar(0, 255, 0), -1);
// Label the depth value
std::string depthLabel = "Depth: " + std::to_string(centerDepthValue) + "m";
cv::putText(visualization_rgb, depthLabel, cv::Point(centerX + 10, centerY - 10), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);
//getDisparityVis(z_channel, visualization, 1);
cv::imshow("visualization", visualization_rgb);
}
image_l.release();
image_r.release();
disparity.release();
disparity_right.release();
filtered_disparity.release();
depth.release();
}
cv::waitKey(1);
#else
osalHandler->TaskSleepMs(1000);
USER_LOG_WARN("Please install opencv to run this stereo image display sample.");
#endif
}
}
Agent comment from Leon in Zendesk ticket #116330:
Dear developer,
Hello, thank you for contacting DJI.
They are corrected. Do you want to obtain depth information? For depth information, we provide a specific process.
Common business steps: RAW image -> camera internal parameters (dedistortion to obtain correction image) -> calculate disparity map -> use camera external parameters (calculate depth map)
You can refer to the early OSDK Sample: https://github.com/dji-sdk/Onboard-SDK/blob/master/sample/platform/linux/advanced- sensing/stereo_vision_depth_perception_sample/stereo_vision_depth_perception_sample.cpp
Thank you for your support of DJI products! Wish you all the best!
Best Regards,
DJI SDK Technical Support
°°°