accelerated_features Image registration based on xfeat c++

Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit

For the followings i had help from stackoverflow

Step1: Install onnxruntime

mkdir /tmp/onnxInstall cd /tmp/onnxInstall wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0 unzip onnx_archive.nupkg cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/ cp -r build/native/include/ ~/.local/include/onnxruntime/

Step2: Cmake

Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:

create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:

set(PACKAGE_VERSION "1.7.0") if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif()

Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake

include(FindPackageHandleStandardArgs)

///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)

set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include) set(onnxruntime_LIBRARIES onnxruntime) set(onnxruntime_CXX_FLAGS "") # no flags needed

find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib" )

add_library(onnxruntime SHARED IMPORTED) set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}") set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}") set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")

find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)

Step4: Project Cmakelist.txt

cmake_minimum_required(VERSION 3.0) project(YourProjectName) #set(OpenCV_DIR "path/to/opencv/build") include_directories("~/.local/include/onnxruntime/") find_package(OpenCV REQUIRED) find_package(onnxruntime) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True)

include_directories(${ONNXRUNTIME_INCLUDE_DIR}) include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(ContourDetection demo.cpp) target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")

Step5: Create a symlink between onnxruntime shared library

ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0

The modified c++ code

#include <chrono>
#include <iostream>
#include "omp.h"

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>


// for onnx model path
const ORTCHAR_T* stringToOrtchar_t(std::string const& s)
{
#ifdef _WIN32
	const char* CStr = s.c_str();
	size_t len = strlen(CStr) + 1;
	size_t converted = 0;
	wchar_t* WStr;
	WStr = (wchar_t*)malloc(len * sizeof(wchar_t));
	mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE);

	return WStr;
#else
	return s.c_str();
#endif // _WIN32
}

bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0)
{
	const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath);

	bool sessionIsAvailable = false;
	/* 
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// try Tensorrt 
			OrtTensorRTProviderOptions trtOptions{};
			trtOptions.device_id = gpuId;
			trtOptions.trt_fp16_enable = 1;
			trtOptions.trt_engine_cache_enable = 1;
			trtOptions.trt_engine_cache_path = "./trt_engine_cache";


			trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024;

			session_options.AppendExecutionProvider_TensorRT(trtOptions);

			session = Ort::Session(env, ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: Tensorrt" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	*/

	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			OrtCUDAProviderOptions cuda0ptions;
			cuda0ptions.device_id = gpuId;
			cuda0ptions.cuda_mem_limit = 4 << 30;

			session_options.AppendExecutionProvider_CUDA(cuda0ptions);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CUDA" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CPU" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CPU accelerator." << std::endl;
			sessionIsAvailable = false;
		}
	}

	if (sessionIsAvailable == true)
	{
		Ort::AllocatorWithDefaultOptions allocator;
		// Get input layers count
		size_t num_input_nodes = session.GetInputCount();

		// Get input layer type, shape, name
		for (int i = 0; i < num_input_nodes; i++)
		{
			
			// Name
			std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get());

			std::cout << "Input " << i << ": " << input_name << ", shape: (";

			// Type
			Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// Shape
			std::vector<int64_t> input_node_dims = tensor_info.GetShape();

			for (int j = 0; j < input_node_dims.size(); j++) {
				std::cout << input_node_dims[j];
				if (j == input_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}

		// Get output layers count
		size_t num_output_nodes = session.GetOutputCount();

		// Get output layer type, shape, name
		for (int i = 0; i < num_output_nodes; i++) {
			// Name
			std::string output_name =  session.GetOutputName(i, allocator);
			std::cout << "Output " << i << ": " << output_name << ", shape: (";

			// type
			Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// shape
			std::vector<int64_t> output_node_dims = tensor_info.GetShape();
			for (int j = 0; j < output_node_dims.size(); j++) {
				std::cout << output_node_dims[j];
				if (j == output_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}
		
	}
	else
	{
		std::cout << modelPath << " is invalid model." << std::endl;
	}

	return sessionIsAvailable;
}


class XFeat
{
public:
	XFeat(std::string &xfeatModelPath, std::string& matchingModelPath);
	int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc);
	int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes);

	~XFeat();

	// gpu id
	int gpuId_ = 0;

	// onnxruntime
	Ort::Env env_{ nullptr };
	Ort::Session xfeatSession_{ nullptr };
	Ort::Session matchingSession_{ nullptr };
	Ort::AllocatorWithDefaultOptions allocator;

	//
	std::vector<const char*> xfeatInputNames = { "images" };
	std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" };
	std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"};
	std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" };

	bool initFinishedFlag_ = false;
};

XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath)
{
	const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath);
	const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath);

	env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" };  //  ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL

	std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
	std::cout << "All available accelerators:" << std::endl;
	for (int i = 0; i < availableProviders.size(); i++)
	{
		std::cout << "  " << i + 1 << ". " << availableProviders[i] << std::endl;
	}
	// init sessions
	initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_);
	initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_);
}

XFeat::~XFeat()
{
	env_.release();
	xfeatSession_.release();
	matchingSession_.release();
}

int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc)
{
	// Pre process
	cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3);
	int stride = preProcessedImage.rows * preProcessedImage.cols;
#pragma omp parallel for 
	for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB
	{
		*((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2);
		*((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1);
		*((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3);
	}

	// Create input tensor
	int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3;
	std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size());
	assert(input_tensor.IsTensor());


	// Run sessionn
	auto output_tensors =
		xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(),
			&input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size());
	assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor());

	// Get outputs
	auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
	int dim1 = static_cast<int>(mkptsShape[0]); // 1
	int dim2 = static_cast<int>(mkptsShape[1]); // 4800
	int dim3 = static_cast<int>(mkptsShape[2]); // 2
	float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>();
	// To cv::Mat
	mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone();

	auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(featsShape[0]); // 1
	dim2 = static_cast<int>(featsShape[1]); // 4800
	dim3 = static_cast<int>(featsShape[2]); // 64
	float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>();
	feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone();

	auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(scShape[0]); // 1
	dim2 = static_cast<int>(scShape[1]); // 4800
	float* scDataPtr = output_tensors[2].GetTensorMutableData<float>();
	sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone();

	return 0;
}

int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes)
{
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

    int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize();
    std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() };
    Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size());

    int64_t feats0_size = feats0.total() * feats0.elemSize();
    std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() };
    Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size());

    int64_t sc0_size = sc0.total() * sc0.elemSize();
    std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols };
    Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size());

    int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize();
    std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() };
    Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size());

    int64_t feats1_size = feats1.total() * feats1.elemSize();
    std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() };
    Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size());

    // Create input tensors
    std::vector<Ort::Value> input_tensors;
    input_tensors.push_back(std::move(mkpts0_tensor));
    input_tensors.push_back(std::move(feats0_tensor));
    input_tensors.push_back(std::move(sc0_tensor));
    input_tensors.push_back(std::move(mkpts1_tensor));
    input_tensors.push_back(std::move(feats1_tensor));

    // Run session
    auto output_tensors =
        matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(),
            input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size());
    
    // Check output tensors
    if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) {
        std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl;
        return -1;
    }

    // Get outputs
    auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
    int dim1 = static_cast<int>(matchesShape[0]); // num
    int dim2 = static_cast<int>(matchesShape[1]); // 4
    // To cv::Mat
    float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>();
    matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone();

    auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
    dim1 = static_cast<int>(batch_indexesShape[0]); // num

    float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>();
    batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone();

    return 0;
}

cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints,
	const cv::Mat& img1, const cv::Mat& img2)
{
	// Step 1: Calculate the Homography matrix and mask
	cv::Mat mask;
	cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999);
	mask = mask.reshape(1, mask.total());  // Flatten the mask

	// Step 2: Get corners of the first image (img1)
	std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0),
											cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) };
	std::vector<cv::Point2f> warpedCorners(4);

	// Step 3: Warp corners to the second image (img2) space
	cv::perspectiveTransform(cornersImg1, warpedCorners, H);

	// Step 4: Draw the warped corners in image2
	cv::Mat img2WithCorners = img2.clone();
	for (size_t i = 0; i < warpedCorners.size(); i++) {
		cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4);
	}

	// Step 5: Prepare keypoints and matches for drawMatches function
	std::vector<cv::KeyPoint> keypoints1, keypoints2;
	std::vector<cv::DMatch> matches;
	for (size_t i = 0; i < refPoints.size(); i++) {
		if (mask.at<uchar>(i)) {  // Only consider inliers
			keypoints1.emplace_back(refPoints[i], 5);
			keypoints2.emplace_back(dstPoints[i], 5);
		}
	}
	for (size_t i = 0; i < keypoints1.size(); i++) {
		matches.emplace_back(i, i, 0);
	}

	// Draw inlier matches
	cv::Mat imgMatches;
	cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);

	return imgMatches;
}

// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
    cv::Mat imgWithKeypoints = img.clone();
    for (int i = 0; i < mkpts.rows; ++i) {
        cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1));
        cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1);
    }
    return imgWithKeypoints;
}


int main()
{
    std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx";
    std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx";
    cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg");
    cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg");
    cv::Mat mkpts0, feats0, sc0;
    cv::Mat mkpts1, feats1, sc1;
    cv::Mat matches, batch_indexes;

    // Init xfeat object
    XFeat xfeat(xfeatModelPath, matchingModelPath);

    // Extract features
    xfeat.detectAndCompute(image0, mkpts0, feats0, sc0);
    xfeat.detectAndCompute(image1, mkpts1, feats1, sc1);

    // Matching and refine
    xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes);

    // Print results
    std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl;
    std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl;

    // Get points
    std::vector<cv::Point2f> points0, points1;
    for (int i = 0; i < matches.rows; i++) {
        points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1)));
        points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3)));
    }
    
    cv::Mat homography, transformed_img;
    homography = cv::findHomography(points0, points1, cv::RANSAC);
                
    if(homography.empty())
    {
        std::cout << "Homography image empty" << std::endl;
    }

    cv::warpPerspective(image0, transformed_img, homography, image1.size());
    

    // Visualization
    cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1);
    

    // Display images
    //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0));
    //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1));
    //cv::imshow("Matches", drawImage);
    cv::imshow("Registered", transformed_img);
    cv::waitKey();

    return 0;
}

Change the model and image path accordingly. Download only the models from this link Xfeat

Just take the models from the above link. The provided script here is modified. Once again thanks to the reference code

May 17 '24 14:05 federista

Hi @federista, thank you for your interest in our work! It's great that you were able to run the provided example. We hope it will assist others interested in using XFeat with C++!

May 28 '24 00:05 guipotje

Hi, @guipotje congratulations on the article presented at the CVPR. @federista I have followed your instructions, and it worked perfectly in my SLAM system. Thanks for your contribution and the detailed steps. Only two minor changes were required for me.

In my case, the symlink between the onnxruntime shared library requires pointing to libonnxruntime.so.1.7.0 instead of onnxruntime.so.1.7.0. This could be related to standard naming conventions for different OS. Just posting here in case someone faces the same issue in their OS.
On the other hand, I think you have a bug in the drawKeypoints function. Next, you can see the function that worked for me.

// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
    cv::Mat imgWithKeypoints = img.clone();
    for (int i = 0; i < mkpts.cols; ++i) {
        cv::Vec2f pt = mkpts.at<cv::Vec2f>(i);
        cv::circle(imgWithKeypoints, cv::Point2f(pt[0], pt[1]), 5, cv::Scalar(0, 0, 255), -1);
    }
    return imgWithKeypoints;
}

Jul 10 '24 14:07 joanpepcompany

Are you sure that you have followed the first 3 steps carefully?

Jul 29 '24 08:07 federista

"I added a set command, and successfully located ONNX. May I ask what version of OpenCV you are using?"

Jul 29 '24 08:07 zw-92

I am using 4.2.0 but i highly doubt it has anything to do with finding the package configuration

Jul 29 '24 08:07 federista

I am using 4.2.0 but i highly doubt it has anything to do with finding the package configuration

In the same case of using only the CPU, 100 loops are executed. The original execution speed is 20s, and the execution speed using your code is 50s. Do you know the reason?

Jul 29 '24 09:07 zw-92

Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit

For the followings i had help from stackoverflow

Step1: Install onnxruntime

mkdir /tmp/onnxInstall cd /tmp/onnxInstall wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0 unzip onnx_archive.nupkg cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/ cp -r build/native/include/ ~/.local/include/onnxruntime/

Step2: Cmake

Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:

create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:

set(PACKAGE_VERSION "1.7.0") if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif()

Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake

include(FindPackageHandleStandardArgs)

///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)

set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include) set(onnxruntime_LIBRARIES onnxruntime) set(onnxruntime_CXX_FLAGS "") # no flags needed

find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib" )

add_library(onnxruntime SHARED IMPORTED) set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}") set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}") set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")

find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)

Step4: Project Cmakelist.txt

cmake_minimum_required(VERSION 3.0) project(YourProjectName) #set(OpenCV_DIR "path/to/opencv/build") include_directories("~/.local/include/onnxruntime/") find_package(OpenCV REQUIRED) find_package(onnxruntime) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True)

include_directories(${ONNXRUNTIME_INCLUDE_DIR}) include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(ContourDetection demo.cpp) target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")

Step5: Create a symlink between onnxruntime shared library

ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0

The modified c++ code

#include <chrono>
#include <iostream>
#include "omp.h"

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>


// for onnx model path
const ORTCHAR_T* stringToOrtchar_t(std::string const& s)
{
#ifdef _WIN32
	const char* CStr = s.c_str();
	size_t len = strlen(CStr) + 1;
	size_t converted = 0;
	wchar_t* WStr;
	WStr = (wchar_t*)malloc(len * sizeof(wchar_t));
	mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE);

	return WStr;
#else
	return s.c_str();
#endif // _WIN32
}

bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0)
{
	const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath);

	bool sessionIsAvailable = false;
	/* 
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// try Tensorrt 
			OrtTensorRTProviderOptions trtOptions{};
			trtOptions.device_id = gpuId;
			trtOptions.trt_fp16_enable = 1;
			trtOptions.trt_engine_cache_enable = 1;
			trtOptions.trt_engine_cache_path = "./trt_engine_cache";


			trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024;

			session_options.AppendExecutionProvider_TensorRT(trtOptions);

			session = Ort::Session(env, ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: Tensorrt" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	*/

	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			OrtCUDAProviderOptions cuda0ptions;
			cuda0ptions.device_id = gpuId;
			cuda0ptions.cuda_mem_limit = 4 << 30;

			session_options.AppendExecutionProvider_CUDA(cuda0ptions);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CUDA" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CPU" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CPU accelerator." << std::endl;
			sessionIsAvailable = false;
		}
	}

	if (sessionIsAvailable == true)
	{
		Ort::AllocatorWithDefaultOptions allocator;
		// Get input layers count
		size_t num_input_nodes = session.GetInputCount();

		// Get input layer type, shape, name
		for (int i = 0; i < num_input_nodes; i++)
		{
			
			// Name
			std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get());

			std::cout << "Input " << i << ": " << input_name << ", shape: (";

			// Type
			Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// Shape
			std::vector<int64_t> input_node_dims = tensor_info.GetShape();

			for (int j = 0; j < input_node_dims.size(); j++) {
				std::cout << input_node_dims[j];
				if (j == input_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}

		// Get output layers count
		size_t num_output_nodes = session.GetOutputCount();

		// Get output layer type, shape, name
		for (int i = 0; i < num_output_nodes; i++) {
			// Name
			std::string output_name =  session.GetOutputName(i, allocator);
			std::cout << "Output " << i << ": " << output_name << ", shape: (";

			// type
			Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// shape
			std::vector<int64_t> output_node_dims = tensor_info.GetShape();
			for (int j = 0; j < output_node_dims.size(); j++) {
				std::cout << output_node_dims[j];
				if (j == output_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}
		
	}
	else
	{
		std::cout << modelPath << " is invalid model." << std::endl;
	}

	return sessionIsAvailable;
}


class XFeat
{
public:
	XFeat(std::string &xfeatModelPath, std::string& matchingModelPath);
	int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc);
	int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes);

	~XFeat();

	// gpu id
	int gpuId_ = 0;

	// onnxruntime
	Ort::Env env_{ nullptr };
	Ort::Session xfeatSession_{ nullptr };
	Ort::Session matchingSession_{ nullptr };
	Ort::AllocatorWithDefaultOptions allocator;

	//
	std::vector<const char*> xfeatInputNames = { "images" };
	std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" };
	std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"};
	std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" };

	bool initFinishedFlag_ = false;
};

XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath)
{
	const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath);
	const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath);

	env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" };  //  ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL

	std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
	std::cout << "All available accelerators:" << std::endl;
	for (int i = 0; i < availableProviders.size(); i++)
	{
		std::cout << "  " << i + 1 << ". " << availableProviders[i] << std::endl;
	}
	// init sessions
	initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_);
	initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_);
}

XFeat::~XFeat()
{
	env_.release();
	xfeatSession_.release();
	matchingSession_.release();
}

int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc)
{
	// Pre process
	cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3);
	int stride = preProcessedImage.rows * preProcessedImage.cols;
#pragma omp parallel for 
	for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB
	{
		*((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2);
		*((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1);
		*((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3);
	}

	// Create input tensor
	int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3;
	std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size());
	assert(input_tensor.IsTensor());


	// Run sessionn
	auto output_tensors =
		xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(),
			&input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size());
	assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor());

	// Get outputs
	auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
	int dim1 = static_cast<int>(mkptsShape[0]); // 1
	int dim2 = static_cast<int>(mkptsShape[1]); // 4800
	int dim3 = static_cast<int>(mkptsShape[2]); // 2
	float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>();
	// To cv::Mat
	mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone();

	auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(featsShape[0]); // 1
	dim2 = static_cast<int>(featsShape[1]); // 4800
	dim3 = static_cast<int>(featsShape[2]); // 64
	float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>();
	feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone();

	auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(scShape[0]); // 1
	dim2 = static_cast<int>(scShape[1]); // 4800
	float* scDataPtr = output_tensors[2].GetTensorMutableData<float>();
	sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone();

	return 0;
}

int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes)
{
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

    int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize();
    std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() };
    Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size());

    int64_t feats0_size = feats0.total() * feats0.elemSize();
    std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() };
    Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size());

    int64_t sc0_size = sc0.total() * sc0.elemSize();
    std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols };
    Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size());

    int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize();
    std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() };
    Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size());

    int64_t feats1_size = feats1.total() * feats1.elemSize();
    std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() };
    Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size());

    // Create input tensors
    std::vector<Ort::Value> input_tensors;
    input_tensors.push_back(std::move(mkpts0_tensor));
    input_tensors.push_back(std::move(feats0_tensor));
    input_tensors.push_back(std::move(sc0_tensor));
    input_tensors.push_back(std::move(mkpts1_tensor));
    input_tensors.push_back(std::move(feats1_tensor));

    // Run session
    auto output_tensors =
        matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(),
            input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size());
    
    // Check output tensors
    if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) {
        std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl;
        return -1;
    }

    // Get outputs
    auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
    int dim1 = static_cast<int>(matchesShape[0]); // num
    int dim2 = static_cast<int>(matchesShape[1]); // 4
    // To cv::Mat
    float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>();
    matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone();

    auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
    dim1 = static_cast<int>(batch_indexesShape[0]); // num

    float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>();
    batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone();

    return 0;
}

cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints,
	const cv::Mat& img1, const cv::Mat& img2)
{
	// Step 1: Calculate the Homography matrix and mask
	cv::Mat mask;
	cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999);
	mask = mask.reshape(1, mask.total());  // Flatten the mask

	// Step 2: Get corners of the first image (img1)
	std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0),
											cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) };
	std::vector<cv::Point2f> warpedCorners(4);

	// Step 3: Warp corners to the second image (img2) space
	cv::perspectiveTransform(cornersImg1, warpedCorners, H);

	// Step 4: Draw the warped corners in image2
	cv::Mat img2WithCorners = img2.clone();
	for (size_t i = 0; i < warpedCorners.size(); i++) {
		cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4);
	}

	// Step 5: Prepare keypoints and matches for drawMatches function
	std::vector<cv::KeyPoint> keypoints1, keypoints2;
	std::vector<cv::DMatch> matches;
	for (size_t i = 0; i < refPoints.size(); i++) {
		if (mask.at<uchar>(i)) {  // Only consider inliers
			keypoints1.emplace_back(refPoints[i], 5);
			keypoints2.emplace_back(dstPoints[i], 5);
		}
	}
	for (size_t i = 0; i < keypoints1.size(); i++) {
		matches.emplace_back(i, i, 0);
	}

	// Draw inlier matches
	cv::Mat imgMatches;
	cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);

	return imgMatches;
}

// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
    cv::Mat imgWithKeypoints = img.clone();
    for (int i = 0; i < mkpts.rows; ++i) {
        cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1));
        cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1);
    }
    return imgWithKeypoints;
}


int main()
{
    std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx";
    std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx";
    cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg");
    cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg");
    cv::Mat mkpts0, feats0, sc0;
    cv::Mat mkpts1, feats1, sc1;
    cv::Mat matches, batch_indexes;

    // Init xfeat object
    XFeat xfeat(xfeatModelPath, matchingModelPath);

    // Extract features
    xfeat.detectAndCompute(image0, mkpts0, feats0, sc0);
    xfeat.detectAndCompute(image1, mkpts1, feats1, sc1);

    // Matching and refine
    xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes);

    // Print results
    std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl;
    std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl;

    // Get points
    std::vector<cv::Point2f> points0, points1;
    for (int i = 0; i < matches.rows; i++) {
        points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1)));
        points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3)));
    }
    
    cv::Mat homography, transformed_img;
    homography = cv::findHomography(points0, points1, cv::RANSAC);
                
    if(homography.empty())
    {
        std::cout << "Homography image empty" << std::endl;
    }

    cv::warpPerspective(image0, transformed_img, homography, image1.size());
    

    // Visualization
    cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1);
    

    // Display images
    //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0));
    //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1));
    //cv::imshow("Matches", drawImage);
    cv::imshow("Registered", transformed_img);
    cv::waitKey();

    return 0;
}

Change the model and image path accordingly. Download only the models from this link Xfeat

Just take the models from the above link. The provided script here is modified. Once again thanks to the reference code

Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit

For the followings i had help from stackoverflow

Step1: Install onnxruntime

mkdir /tmp/onnxInstall cd /tmp/onnxInstall wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0 unzip onnx_archive.nupkg cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/ cp -r build/native/include/ ~/.local/include/onnxruntime/

Step2: Cmake

Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:

create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:

set(PACKAGE_VERSION "1.7.0") if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif()

Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake

include(FindPackageHandleStandardArgs)

///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)

set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include) set(onnxruntime_LIBRARIES onnxruntime) set(onnxruntime_CXX_FLAGS "") # no flags needed

find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib" )

add_library(onnxruntime SHARED IMPORTED) set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}") set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}") set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")

find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)

Step4: Project Cmakelist.txt

cmake_minimum_required(VERSION 3.0) project(YourProjectName) #set(OpenCV_DIR "path/to/opencv/build") include_directories("~/.local/include/onnxruntime/") find_package(OpenCV REQUIRED) find_package(onnxruntime) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True)

include_directories(${ONNXRUNTIME_INCLUDE_DIR}) include_directories(${OpenCV_INCLUDE_DIRS})

add_executable(ContourDetection demo.cpp) target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")

Step5: Create a symlink between onnxruntime shared library

ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0

The modified c++ code

#include <chrono>
#include <iostream>
#include "omp.h"

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>


// for onnx model path
const ORTCHAR_T* stringToOrtchar_t(std::string const& s)
{
#ifdef _WIN32
	const char* CStr = s.c_str();
	size_t len = strlen(CStr) + 1;
	size_t converted = 0;
	wchar_t* WStr;
	WStr = (wchar_t*)malloc(len * sizeof(wchar_t));
	mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE);

	return WStr;
#else
	return s.c_str();
#endif // _WIN32
}

bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0)
{
	const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath);

	bool sessionIsAvailable = false;
	/* 
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			// try Tensorrt 
			OrtTensorRTProviderOptions trtOptions{};
			trtOptions.device_id = gpuId;
			trtOptions.trt_fp16_enable = 1;
			trtOptions.trt_engine_cache_enable = 1;
			trtOptions.trt_engine_cache_path = "./trt_engine_cache";


			trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024;

			session_options.AppendExecutionProvider_TensorRT(trtOptions);

			session = Ort::Session(env, ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: Tensorrt" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	*/

	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			OrtCUDAProviderOptions cuda0ptions;
			cuda0ptions.device_id = gpuId;
			cuda0ptions.cuda_mem_limit = 4 << 30;

			session_options.AppendExecutionProvider_CUDA(cuda0ptions);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CUDA" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
	}
	if (sessionIsAvailable == false)
	{
		try
		{
			Ort::SessionOptions session_options;
			session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

			session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
			
			sessionIsAvailable = true;
			std::cout << "Using accelerator: CPU" << std::endl;
		}
		catch (Ort::Exception e)
		{
			std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
			std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl;
			sessionIsAvailable = false;
		}
		catch (...)
		{
			std::cout << "Failed to init CPU accelerator." << std::endl;
			sessionIsAvailable = false;
		}
	}

	if (sessionIsAvailable == true)
	{
		Ort::AllocatorWithDefaultOptions allocator;
		// Get input layers count
		size_t num_input_nodes = session.GetInputCount();

		// Get input layer type, shape, name
		for (int i = 0; i < num_input_nodes; i++)
		{
			
			// Name
			std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get());

			std::cout << "Input " << i << ": " << input_name << ", shape: (";

			// Type
			Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// Shape
			std::vector<int64_t> input_node_dims = tensor_info.GetShape();

			for (int j = 0; j < input_node_dims.size(); j++) {
				std::cout << input_node_dims[j];
				if (j == input_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}

		// Get output layers count
		size_t num_output_nodes = session.GetOutputCount();

		// Get output layer type, shape, name
		for (int i = 0; i < num_output_nodes; i++) {
			// Name
			std::string output_name =  session.GetOutputName(i, allocator);
			std::cout << "Output " << i << ": " << output_name << ", shape: (";

			// type
			Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
			auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

			ONNXTensorElementDataType type = tensor_info.GetElementType();

			// shape
			std::vector<int64_t> output_node_dims = tensor_info.GetShape();
			for (int j = 0; j < output_node_dims.size(); j++) {
				std::cout << output_node_dims[j];
				if (j == output_node_dims.size() - 1)
				{
					std::cout << ")" << std::endl; 
				}
				else
				{
					std::cout << ", ";
				}
			}
		}
		
	}
	else
	{
		std::cout << modelPath << " is invalid model." << std::endl;
	}

	return sessionIsAvailable;
}


class XFeat
{
public:
	XFeat(std::string &xfeatModelPath, std::string& matchingModelPath);
	int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc);
	int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes);

	~XFeat();

	// gpu id
	int gpuId_ = 0;

	// onnxruntime
	Ort::Env env_{ nullptr };
	Ort::Session xfeatSession_{ nullptr };
	Ort::Session matchingSession_{ nullptr };
	Ort::AllocatorWithDefaultOptions allocator;

	//
	std::vector<const char*> xfeatInputNames = { "images" };
	std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" };
	std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"};
	std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" };

	bool initFinishedFlag_ = false;
};

XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath)
{
	const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath);
	const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath);

	env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" };  //  ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL

	std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
	std::cout << "All available accelerators:" << std::endl;
	for (int i = 0; i < availableProviders.size(); i++)
	{
		std::cout << "  " << i + 1 << ". " << availableProviders[i] << std::endl;
	}
	// init sessions
	initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_);
	initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_);
}

XFeat::~XFeat()
{
	env_.release();
	xfeatSession_.release();
	matchingSession_.release();
}

int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc)
{
	// Pre process
	cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3);
	int stride = preProcessedImage.rows * preProcessedImage.cols;
#pragma omp parallel for 
	for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB
	{
		*((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2);
		*((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1);
		*((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3);
	}

	// Create input tensor
	int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3;
	std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size());
	assert(input_tensor.IsTensor());


	// Run sessionn
	auto output_tensors =
		xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(),
			&input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size());
	assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor());

	// Get outputs
	auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
	int dim1 = static_cast<int>(mkptsShape[0]); // 1
	int dim2 = static_cast<int>(mkptsShape[1]); // 4800
	int dim3 = static_cast<int>(mkptsShape[2]); // 2
	float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>();
	// To cv::Mat
	mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone();

	auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(featsShape[0]); // 1
	dim2 = static_cast<int>(featsShape[1]); // 4800
	dim3 = static_cast<int>(featsShape[2]); // 64
	float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>();
	feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone();

	auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape();
	dim1 = static_cast<int>(scShape[0]); // 1
	dim2 = static_cast<int>(scShape[1]); // 4800
	float* scDataPtr = output_tensors[2].GetTensorMutableData<float>();
	sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone();

	return 0;
}

int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes)
{
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);

    int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize();
    std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() };
    Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size());

    int64_t feats0_size = feats0.total() * feats0.elemSize();
    std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() };
    Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size());

    int64_t sc0_size = sc0.total() * sc0.elemSize();
    std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols };
    Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size());

    int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize();
    std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() };
    Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size());

    int64_t feats1_size = feats1.total() * feats1.elemSize();
    std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() };
    Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size());

    // Create input tensors
    std::vector<Ort::Value> input_tensors;
    input_tensors.push_back(std::move(mkpts0_tensor));
    input_tensors.push_back(std::move(feats0_tensor));
    input_tensors.push_back(std::move(sc0_tensor));
    input_tensors.push_back(std::move(mkpts1_tensor));
    input_tensors.push_back(std::move(feats1_tensor));

    // Run session
    auto output_tensors =
        matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(),
            input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size());
    
    // Check output tensors
    if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) {
        std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl;
        return -1;
    }

    // Get outputs
    auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
    int dim1 = static_cast<int>(matchesShape[0]); // num
    int dim2 = static_cast<int>(matchesShape[1]); // 4
    // To cv::Mat
    float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>();
    matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone();

    auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
    dim1 = static_cast<int>(batch_indexesShape[0]); // num

    float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>();
    batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone();

    return 0;
}

cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints,
	const cv::Mat& img1, const cv::Mat& img2)
{
	// Step 1: Calculate the Homography matrix and mask
	cv::Mat mask;
	cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999);
	mask = mask.reshape(1, mask.total());  // Flatten the mask

	// Step 2: Get corners of the first image (img1)
	std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0),
											cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) };
	std::vector<cv::Point2f> warpedCorners(4);

	// Step 3: Warp corners to the second image (img2) space
	cv::perspectiveTransform(cornersImg1, warpedCorners, H);

	// Step 4: Draw the warped corners in image2
	cv::Mat img2WithCorners = img2.clone();
	for (size_t i = 0; i < warpedCorners.size(); i++) {
		cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4);
	}

	// Step 5: Prepare keypoints and matches for drawMatches function
	std::vector<cv::KeyPoint> keypoints1, keypoints2;
	std::vector<cv::DMatch> matches;
	for (size_t i = 0; i < refPoints.size(); i++) {
		if (mask.at<uchar>(i)) {  // Only consider inliers
			keypoints1.emplace_back(refPoints[i], 5);
			keypoints2.emplace_back(dstPoints[i], 5);
		}
	}
	for (size_t i = 0; i < keypoints1.size(); i++) {
		matches.emplace_back(i, i, 0);
	}

	// Draw inlier matches
	cv::Mat imgMatches;
	cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);

	return imgMatches;
}

// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
    cv::Mat imgWithKeypoints = img.clone();
    for (int i = 0; i < mkpts.rows; ++i) {
        cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1));
        cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1);
    }
    return imgWithKeypoints;
}


int main()
{
    std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx";
    std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx";
    cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg");
    cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg");
    cv::Mat mkpts0, feats0, sc0;
    cv::Mat mkpts1, feats1, sc1;
    cv::Mat matches, batch_indexes;

    // Init xfeat object
    XFeat xfeat(xfeatModelPath, matchingModelPath);

    // Extract features
    xfeat.detectAndCompute(image0, mkpts0, feats0, sc0);
    xfeat.detectAndCompute(image1, mkpts1, feats1, sc1);

    // Matching and refine
    xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes);

    // Print results
    std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl;
    std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl;

    // Get points
    std::vector<cv::Point2f> points0, points1;
    for (int i = 0; i < matches.rows; i++) {
        points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1)));
        points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3)));
    }
    
    cv::Mat homography, transformed_img;
    homography = cv::findHomography(points0, points1, cv::RANSAC);
                
    if(homography.empty())
    {
        std::cout << "Homography image empty" << std::endl;
    }

    cv::warpPerspective(image0, transformed_img, homography, image1.size());
    

    // Visualization
    cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1);
    

    // Display images
    //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0));
    //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1));
    //cv::imshow("Matches", drawImage);
    cv::imshow("Registered", transformed_img);
    cv::waitKey();

    return 0;
}

Change the model and image path accordingly. Download only the models from this link Xfeat

Just take the models from the above link. The provided script here is modified. Once again thanks to the reference code

Hello, can you provide the complete code before you switch to onnx? Thank you very much. You can put it on your github homepage and I will download it myself.

Jul 31 '24 03:07 zw-92

I don't have ownership of this code, for which i cant upload it on GitHub. Consult @acai66. I have just modified the existing code. Any queries about the modified code is welcome. Thank you

Jul 31 '24 08:07 federista