Calculating depth map from stereo images on M350

Open uzgit opened this issue 1 year ago • 1 comments

I have some questions regarding the stereovision sample in samples/sample_c++/module_sample/perception/test_perception_entry.cpp

The camera directions, e.g. DJI_PERCEPTION_RECTIFY_FRONT are named such that they are implied to be rectified streams. Can you please confirm if they are in fact rectified?
Do you have code samples for creating a depth map from the pairs of stereo images? I have successfully visualized the disparity and created a depth map using an OpenCV sample, but it appears that the units are incorrect in my calculation (see below).

static void *DjiTest_StereoImagesDisplayTask(void *arg)
{
	T_DjiOsalHandler *osalHandler = DjiPlatform_GetOsalHandler();
	auto *pack = (T_DjiTestStereoImagePacket *) arg;
	char nameStr[32] = {0};
	char fpsStr[20] = "FPS: ";
	int fps = 0;
	double timePrev[USER_PERCEPTION_DIRECTION_NUM] = {0};
	double timeNow[USER_PERCEPTION_DIRECTION_NUM] = {0};
	double timeFess[USER_PERCEPTION_DIRECTION_NUM] = {0};
	int count[USER_PERCEPTION_DIRECTION_NUM] = {1};
	char showFpsString[USER_PERCEPTION_DIRECTION_NUM][FPS_STRING_LEN] = {0};
	int i = 0;

	cv::Mat display;
	cv::Mat image_l;
	cv::Mat image_r;
	cv::Mat disparity_left;
	cv::Mat disparity_right;
	cv::Mat filtered_disparity;
	cv::Mat disparity;
	cv::Mat depth;

	cv::Mat cameraMatrix1 = (cv::Mat_<double>(3, 3) <<
	                         camera_parameters.leftIntrinsics[0], camera_parameters.leftIntrinsics[1], camera_parameters.leftIntrinsics[2],
	                         camera_parameters.leftIntrinsics[3], camera_parameters.leftIntrinsics[4], camera_parameters.leftIntrinsics[5],
	                         camera_parameters.leftIntrinsics[6], camera_parameters.leftIntrinsics[7], camera_parameters.leftIntrinsics[8]);

	cv::Mat cameraMatrix2 = (cv::Mat_<double>(3, 3) <<
	                         camera_parameters.rightIntrinsics[0], camera_parameters.rightIntrinsics[1], camera_parameters.rightIntrinsics[2],
	                         camera_parameters.rightIntrinsics[3], camera_parameters.rightIntrinsics[4], camera_parameters.rightIntrinsics[5],
	                         camera_parameters.rightIntrinsics[6], camera_parameters.rightIntrinsics[7], camera_parameters.rightIntrinsics[8]);

	// Convert the rotation matrix to CV_64F
	cv::Mat R = (cv::Mat_<double>(3,3) <<
	             static_cast<double>(camera_parameters.rotationLeftInRight[0]), static_cast<double>(camera_parameters.rotationLeftInRight[1]), static_cast<double>(camera_parameters.rotationLeftInRight[2]),
	             static_cast<double>(camera_parameters.rotationLeftInRight[3]), static_cast<double>(camera_parameters.rotationLeftInRight[4]), static_cast<double>(camera_parameters.rotationLeftInRight[5]),
	             static_cast<double>(camera_parameters.rotationLeftInRight[6]), static_cast<double>(camera_parameters.rotationLeftInRight[7]), static_cast<double>(camera_parameters.rotationLeftInRight[8]));

	// Convert the translation vector to CV_64F
	cv::Mat T = (cv::Mat_<double>(3,1) <<
	             static_cast<double>(camera_parameters.translationLeftInRight[0]), static_cast<double>(camera_parameters.translationLeftInRight[1]), static_cast<double>(camera_parameters.translationLeftInRight[2]));

	// Distortion coefficients should also be CV_64F
	cv::Mat distCoeffs1 = cv::Mat::zeros(5, 1, CV_64F); // Zero distortion for left camera
	cv::Mat distCoeffs2 = cv::Mat::zeros(5, 1, CV_64F); // Zero distortion for right camera

	cv::Size image_size = cv::Size(640, 480); // Adjust based on your image resolution

	cv::Mat R1, R2, P1, P2, Q;
	std::cout << "before stereo rectification: " << std::endl;

	// Output the input parameters
	std::cout << "cameraMatrix1 (" << type_string(cameraMatrix1) << "):\n" << cameraMatrix1 << std::endl;
	std::cout << "distCoeffs1 (" << type_string(distCoeffs1) << "):\n" << distCoeffs1 << std::endl;
	std::cout << "cameraMatrix2 (" << type_string(cameraMatrix2) << "):\n" << cameraMatrix2 << std::endl;
	std::cout << "distCoeffs2 (" << type_string(distCoeffs2) << "):\n" << distCoeffs2 << std::endl;
	std::cout << "R (Rotation Matrix) (" << type_string(R) << "):\n" << R << std::endl;
	std::cout << "T (Translation Vector) (" << type_string(T) << "):\n" << T << std::endl;
	std::cout << "image_size: " << image_size.width << "x" << image_size.height << std::endl;

	cv::stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2,
	                  image_size, R, T, R1, R2, P1, P2, Q,
	                  cv::CALIB_ZERO_DISPARITY, -1, image_size);

	std::cout << "Output parameters from stereoRectify:" << std::endl;
	std::cout << "R1 (Rectification Transform 1) (" << type_string(R1) << "):\n" << R1 << std::endl;
	std::cout << "R2 (Rectification Transform 2) (" << type_string(R2) << "):\n" << R2 << std::endl;
	std::cout << "P1 (Projection Matrix 1) (" << type_string(P1) << "):\n" << P1 << std::endl;
	std::cout << "P2 (Projection Matrix 2) (" << type_string(P2) << "):\n" << P2 << std::endl;
	std::cout << "Q (" << type_string(Q) << "):\n" << Q << std::endl;
	std::cout << "after stereo rectification: " << std::endl;

	auto last_execution_time = std::chrono::high_resolution_clock::now();
	auto now = std::chrono::high_resolution_clock::now();
	auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - last_execution_time);

	while (true)
	{
		osalHandler->TaskSleepMs(1);

#ifdef OPEN_CV_INSTALLED
		/*! Get data here */
		osalHandler->MutexLock(pack->mutex);
		if (!pack->gotData)
		{
			osalHandler->MutexUnlock(pack->mutex);
			continue;
		}

		cv::Mat image = cv::Mat(pack->info.rawInfo.height, pack->info.rawInfo.width, CV_8U);
		int copySize = pack->info.rawInfo.height * pack->info.rawInfo.width;
		if (pack->imageRawBuffer)
		{
			memcpy(image.data, pack->imageRawBuffer, copySize);
			osalHandler->Free(pack->imageRawBuffer);
			pack->imageRawBuffer = NULL;
		}

		for (i = 0; i < sizeof(positionName) / sizeof(T_DjiTestPerceptionCameraPositionName); ++i)
		{
			if (positionName[i].cameraPosition == pack->info.dataType)
			{
				if( i % 2 == 0 )
				{
					image_l = image.clone();
				}
				else
				{
					image_r = image.clone();
				}

				sprintf(nameStr, "Image position: %s", positionName[i].name);
				break;
			}
		}

		pack->gotData = false;
		osalHandler->MutexUnlock(pack->mutex);

		if( ! image_l.empty() && ! image_r.empty() )
		{
			now = std::chrono::high_resolution_clock::now();
			duration = std::chrono::duration_cast<std::chrono::milliseconds>(now - last_execution_time);

			if( duration.count() > 100 )
			{
				last_execution_time = now;

				cv::Mat left_for_matcher, right_for_matcher;
				cv::Mat left_disp, right_disp;
				cv::Mat filtered_disp;
				left_for_matcher = image_l.clone();
				right_for_matcher = image_r.clone();

				int max_disp = 128;
				int wsize = 19;

				cv::Ptr<cv::StereoBM> left_matcher = cv::StereoBM::create(max_disp, wsize);
				cv::Ptr<cv::StereoMatcher> right_matcher = cv::ximgproc::createRightMatcher(left_matcher);
				cv::Ptr<cv::ximgproc::DisparityWLSFilter> wls_filter;
//	cv::Ptr<cv::ximgproc::DisparityWLSFilter> wlsFilter = cv::ximgproc::createDisparityWLSFilter(stereoBM);

//				Ptr<StereoSGBM> left_matcher  = StereoSGBM::create(0,max_disp,wsize);
//				left_matcher->setP1(24*wsize*wsize);
//				left_matcher->setP2(96*wsize*wsize);
//				left_matcher->setPreFilterCap(63);
//				left_matcher->setMode(StereoSGBM::MODE_SGBM_3WAY);
//				wls_filter = createDisparityWLSFilter(left_matcher);
//				Ptr<StereoMatcher> right_matcher = createRightMatcher(left_matcher);

				left_matcher-> compute(left_for_matcher,  right_for_matcher, left_disp);
				right_matcher->compute(right_for_matcher, left_for_matcher,  right_disp);

				Mat conf_map;
				conf_map.create(image_l.size(), CV_8U);
				conf_map = Scalar(255);
				double lambda = 8000;
				double sigma  = 1.5;
				wls_filter = cv::ximgproc::createDisparityWLSFilter(left_matcher);
//				wls_filter->setLambda(lambda);
//			        wls_filter->setSigmaColor(sigma);
				wls_filter->filter(left_disp, image_l, filtered_disp, right_disp);
//				wlsFilter->filter(disparity_left, down_l, filtered_disparity, -1 * disparity_right); // -16 ???

				conf_map = wls_filter->getConfidenceMap();

				double fbs_spatial = 16;
				double fbs_luma    = 8.0;
				double fbs_chroma  = 8.0;
				double fbs_lambda   = 0.5;
				cv::Mat solved_disp;
				fastBilateralSolverFilter(image_l, left_disp, conf_map/255.0f, solved_disp, fbs_spatial, fbs_luma, fbs_chroma, fbs_lambda);

				cv::Mat depth;
				cv::reprojectImageTo3D(solved_disp, depth, Q, true, CV_32F);

				std::vector<cv::Mat> channels(3);
				cv::split(depth, channels); // Split the 3 channels into separate single-channel images
				cv::Mat z_channel = channels[2];
				z_channel.setTo(0, z_channel == -std::numeric_limits<float>::infinity());
				z_channel.setTo(0, z_channel ==  std::numeric_limits<float>::infinity());
				z_channel.setTo(0, z_channel !=  z_channel);

            // Get the center point of the image
            int centerX = z_channel.cols / 2;
            int centerY = z_channel.rows / 2;

            // Retrieve the depth value at the center point
            float centerDepthValue = z_channel.at<float>(centerY, centerX);
            std::cout << "Depth value at center (" << centerX << ", " << centerY << "): " << centerDepthValue << " meters" << std::endl;

				double minVal, maxVal;
				cv::minMaxLoc(z_channel, &minVal, &maxVal);
				std::cout << "z_channel Min: " << minVal << " Max: " << maxVal << std::endl;
				
				cv::Mat visualization;
				//cv::normalize(z_channel, visualization, 0, 255, cv::NORM_MINMAX, CV_8U);
//				cv::normalize(z_channel, visualization, 0, 255, cv::NORM_MINMAX, CV_32F);
//				visualization.convertTo(visualization, CV_8U);

//				getDisparityVis(filtered_disp, visualization, 50);
				getDisparityVis(solved_disp, visualization, 5);
//				getDisparityVis(z_channel, visualization, 5);

				cv::Mat visualization_rgb;
				cv::cvtColor(visualization, visualization_rgb, cv::COLOR_GRAY2RGB);
				            // Draw a circle at the center point
            cv::circle(visualization_rgb, cv::Point(centerX, centerY), 5, cv::Scalar(0, 255, 0), -1);

            // Label the depth value
            std::string depthLabel = "Depth: " + std::to_string(centerDepthValue) + "m";
            cv::putText(visualization_rgb, depthLabel, cv::Point(centerX + 10, centerY - 10), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);

				//getDisparityVis(z_channel, visualization, 1);
				cv::imshow("visualization", visualization_rgb);
			}

			image_l.release();
			image_r.release();
			disparity.release();
			disparity_right.release();
			filtered_disparity.release();
			depth.release();
		}

		cv::waitKey(1);
#else
		osalHandler->TaskSleepMs(1000);
		USER_LOG_WARN("Please install opencv to run this stereo image display sample.");
#endif
	}
}

Sep 03 '24 10:09 uzgit

Agent comment from Leon in Zendesk ticket #116330:

Dear developer,

Hello, thank you for contacting DJI.

They are corrected. Do you want to obtain depth information? For depth information, we provide a specific process.

Common business steps: RAW image -> camera internal parameters (dedistortion to obtain correction image) -> calculate disparity map -> use camera external parameters (calculate depth map)

You can refer to the early OSDK Sample: https://github.com/dji-sdk/Onboard-SDK/blob/master/sample/platform/linux/advanced- sensing/stereo_vision_depth_perception_sample/stereo_vision_depth_perception_sample.cpp

Thank you for your support of DJI products! Wish you all the best!

Best Regards, DJI SDK Technical Support

°°°

Sep 04 '24 11:09 dji-dev