Image registration based on xfeat c++
Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit
For the followings i had help from stackoverflow
Step1: Install onnxruntime
mkdir /tmp/onnxInstall cd /tmp/onnxInstall wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0 unzip onnx_archive.nupkg cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/ cp -r build/native/include/ ~/.local/include/onnxruntime/
Step2: Cmake
Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:
create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:
set(PACKAGE_VERSION "1.7.0") if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif()
Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake
include(FindPackageHandleStandardArgs)
///////////// Assume we are in
set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include) set(onnxruntime_LIBRARIES onnxruntime) set(onnxruntime_CXX_FLAGS "") # no flags needed
find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib" )
add_library(onnxruntime SHARED IMPORTED) set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}") set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}") set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")
find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)
Step4: Project Cmakelist.txt
cmake_minimum_required(VERSION 3.0) project(YourProjectName) #set(OpenCV_DIR "path/to/opencv/build") include_directories("~/.local/include/onnxruntime/") find_package(OpenCV REQUIRED) find_package(onnxruntime) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True)
include_directories(${ONNXRUNTIME_INCLUDE_DIR}) include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(ContourDetection demo.cpp) target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")
Step5: Create a symlink between onnxruntime shared library
ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0
The modified c++ code
#include <chrono>
#include <iostream>
#include "omp.h"
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
// for onnx model path
const ORTCHAR_T* stringToOrtchar_t(std::string const& s)
{
#ifdef _WIN32
const char* CStr = s.c_str();
size_t len = strlen(CStr) + 1;
size_t converted = 0;
wchar_t* WStr;
WStr = (wchar_t*)malloc(len * sizeof(wchar_t));
mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE);
return WStr;
#else
return s.c_str();
#endif // _WIN32
}
bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0)
{
const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath);
bool sessionIsAvailable = false;
/*
if (sessionIsAvailable == false)
{
try
{
Ort::SessionOptions session_options;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
// try Tensorrt
OrtTensorRTProviderOptions trtOptions{};
trtOptions.device_id = gpuId;
trtOptions.trt_fp16_enable = 1;
trtOptions.trt_engine_cache_enable = 1;
trtOptions.trt_engine_cache_path = "./trt_engine_cache";
trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024;
session_options.AppendExecutionProvider_TensorRT(trtOptions);
session = Ort::Session(env, ortModelPath, session_options);
sessionIsAvailable = true;
std::cout << "Using accelerator: Tensorrt" << std::endl;
}
catch (Ort::Exception e)
{
std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
sessionIsAvailable = false;
}
catch (...)
{
std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl;
sessionIsAvailable = false;
}
}
*/
if (sessionIsAvailable == false)
{
try
{
Ort::SessionOptions session_options;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
OrtCUDAProviderOptions cuda0ptions;
cuda0ptions.device_id = gpuId;
cuda0ptions.cuda_mem_limit = 4 << 30;
session_options.AppendExecutionProvider_CUDA(cuda0ptions);
session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
sessionIsAvailable = true;
std::cout << "Using accelerator: CUDA" << std::endl;
}
catch (Ort::Exception e)
{
std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
sessionIsAvailable = false;
}
catch (...)
{
std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl;
sessionIsAvailable = false;
}
}
if (sessionIsAvailable == false)
{
try
{
Ort::SessionOptions session_options;
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options);
sessionIsAvailable = true;
std::cout << "Using accelerator: CPU" << std::endl;
}
catch (Ort::Exception e)
{
std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl;
std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl;
sessionIsAvailable = false;
}
catch (...)
{
std::cout << "Failed to init CPU accelerator." << std::endl;
sessionIsAvailable = false;
}
}
if (sessionIsAvailable == true)
{
Ort::AllocatorWithDefaultOptions allocator;
// Get input layers count
size_t num_input_nodes = session.GetInputCount();
// Get input layer type, shape, name
for (int i = 0; i < num_input_nodes; i++)
{
// Name
std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get());
std::cout << "Input " << i << ": " << input_name << ", shape: (";
// Type
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
// Shape
std::vector<int64_t> input_node_dims = tensor_info.GetShape();
for (int j = 0; j < input_node_dims.size(); j++) {
std::cout << input_node_dims[j];
if (j == input_node_dims.size() - 1)
{
std::cout << ")" << std::endl;
}
else
{
std::cout << ", ";
}
}
}
// Get output layers count
size_t num_output_nodes = session.GetOutputCount();
// Get output layer type, shape, name
for (int i = 0; i < num_output_nodes; i++) {
// Name
std::string output_name = session.GetOutputName(i, allocator);
std::cout << "Output " << i << ": " << output_name << ", shape: (";
// type
Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
// shape
std::vector<int64_t> output_node_dims = tensor_info.GetShape();
for (int j = 0; j < output_node_dims.size(); j++) {
std::cout << output_node_dims[j];
if (j == output_node_dims.size() - 1)
{
std::cout << ")" << std::endl;
}
else
{
std::cout << ", ";
}
}
}
}
else
{
std::cout << modelPath << " is invalid model." << std::endl;
}
return sessionIsAvailable;
}
class XFeat
{
public:
XFeat(std::string &xfeatModelPath, std::string& matchingModelPath);
int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc);
int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes);
~XFeat();
// gpu id
int gpuId_ = 0;
// onnxruntime
Ort::Env env_{ nullptr };
Ort::Session xfeatSession_{ nullptr };
Ort::Session matchingSession_{ nullptr };
Ort::AllocatorWithDefaultOptions allocator;
//
std::vector<const char*> xfeatInputNames = { "images" };
std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" };
std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"};
std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" };
bool initFinishedFlag_ = false;
};
XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath)
{
const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath);
const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath);
env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" }; // ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
std::cout << "All available accelerators:" << std::endl;
for (int i = 0; i < availableProviders.size(); i++)
{
std::cout << " " << i + 1 << ". " << availableProviders[i] << std::endl;
}
// init sessions
initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_);
initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_);
}
XFeat::~XFeat()
{
env_.release();
xfeatSession_.release();
matchingSession_.release();
}
int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc)
{
// Pre process
cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3);
int stride = preProcessedImage.rows * preProcessedImage.cols;
#pragma omp parallel for
for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB
{
*((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2);
*((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1);
*((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3);
}
// Create input tensor
int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3;
std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size());
assert(input_tensor.IsTensor());
// Run sessionn
auto output_tensors =
xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(),
&input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size());
assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor());
// Get outputs
auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
int dim1 = static_cast<int>(mkptsShape[0]); // 1
int dim2 = static_cast<int>(mkptsShape[1]); // 4800
int dim3 = static_cast<int>(mkptsShape[2]); // 2
float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>();
// To cv::Mat
mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone();
auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
dim1 = static_cast<int>(featsShape[0]); // 1
dim2 = static_cast<int>(featsShape[1]); // 4800
dim3 = static_cast<int>(featsShape[2]); // 64
float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>();
feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone();
auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape();
dim1 = static_cast<int>(scShape[0]); // 1
dim2 = static_cast<int>(scShape[1]); // 4800
float* scDataPtr = output_tensors[2].GetTensorMutableData<float>();
sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone();
return 0;
}
int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes)
{
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize();
std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() };
Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size());
int64_t feats0_size = feats0.total() * feats0.elemSize();
std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() };
Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size());
int64_t sc0_size = sc0.total() * sc0.elemSize();
std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols };
Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size());
int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize();
std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() };
Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size());
int64_t feats1_size = feats1.total() * feats1.elemSize();
std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() };
Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size());
// Create input tensors
std::vector<Ort::Value> input_tensors;
input_tensors.push_back(std::move(mkpts0_tensor));
input_tensors.push_back(std::move(feats0_tensor));
input_tensors.push_back(std::move(sc0_tensor));
input_tensors.push_back(std::move(mkpts1_tensor));
input_tensors.push_back(std::move(feats1_tensor));
// Run session
auto output_tensors =
matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(),
input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size());
// Check output tensors
if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) {
std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl;
return -1;
}
// Get outputs
auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
int dim1 = static_cast<int>(matchesShape[0]); // num
int dim2 = static_cast<int>(matchesShape[1]); // 4
// To cv::Mat
float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>();
matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone();
auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();
dim1 = static_cast<int>(batch_indexesShape[0]); // num
float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>();
batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone();
return 0;
}
cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints,
const cv::Mat& img1, const cv::Mat& img2)
{
// Step 1: Calculate the Homography matrix and mask
cv::Mat mask;
cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999);
mask = mask.reshape(1, mask.total()); // Flatten the mask
// Step 2: Get corners of the first image (img1)
std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0),
cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) };
std::vector<cv::Point2f> warpedCorners(4);
// Step 3: Warp corners to the second image (img2) space
cv::perspectiveTransform(cornersImg1, warpedCorners, H);
// Step 4: Draw the warped corners in image2
cv::Mat img2WithCorners = img2.clone();
for (size_t i = 0; i < warpedCorners.size(); i++) {
cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4);
}
// Step 5: Prepare keypoints and matches for drawMatches function
std::vector<cv::KeyPoint> keypoints1, keypoints2;
std::vector<cv::DMatch> matches;
for (size_t i = 0; i < refPoints.size(); i++) {
if (mask.at<uchar>(i)) { // Only consider inliers
keypoints1.emplace_back(refPoints[i], 5);
keypoints2.emplace_back(dstPoints[i], 5);
}
}
for (size_t i = 0; i < keypoints1.size(); i++) {
matches.emplace_back(i, i, 0);
}
// Draw inlier matches
cv::Mat imgMatches;
cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
return imgMatches;
}
// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
cv::Mat imgWithKeypoints = img.clone();
for (int i = 0; i < mkpts.rows; ++i) {
cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1));
cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1);
}
return imgWithKeypoints;
}
int main()
{
std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx";
std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx";
cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg");
cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg");
cv::Mat mkpts0, feats0, sc0;
cv::Mat mkpts1, feats1, sc1;
cv::Mat matches, batch_indexes;
// Init xfeat object
XFeat xfeat(xfeatModelPath, matchingModelPath);
// Extract features
xfeat.detectAndCompute(image0, mkpts0, feats0, sc0);
xfeat.detectAndCompute(image1, mkpts1, feats1, sc1);
// Matching and refine
xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes);
// Print results
std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl;
std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl;
// Get points
std::vector<cv::Point2f> points0, points1;
for (int i = 0; i < matches.rows; i++) {
points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1)));
points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3)));
}
cv::Mat homography, transformed_img;
homography = cv::findHomography(points0, points1, cv::RANSAC);
if(homography.empty())
{
std::cout << "Homography image empty" << std::endl;
}
cv::warpPerspective(image0, transformed_img, homography, image1.size());
// Visualization
cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1);
// Display images
//cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0));
//cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1));
//cv::imshow("Matches", drawImage);
cv::imshow("Registered", transformed_img);
cv::waitKey();
return 0;
}
Change the model and image path accordingly. Download only the models from this link Xfeat
Just take the models from the above link. The provided script here is modified. Once again thanks to the reference code
Hi @federista, thank you for your interest in our work! It's great that you were able to run the provided example. We hope it will assist others interested in using XFeat with C++!
Hi, @guipotje congratulations on the article presented at the CVPR. @federista I have followed your instructions, and it worked perfectly in my SLAM system. Thanks for your contribution and the detailed steps. Only two minor changes were required for me.
- In my case, the symlink between the onnxruntime shared library requires pointing to libonnxruntime.so.1.7.0 instead of onnxruntime.so.1.7.0. This could be related to standard naming conventions for different OS. Just posting here in case someone faces the same issue in their OS.
- On the other hand, I think you have a bug in the drawKeypoints function. Next, you can see the function that worked for me.
// Helper function to draw keypoints
cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) {
cv::Mat imgWithKeypoints = img.clone();
for (int i = 0; i < mkpts.cols; ++i) {
cv::Vec2f pt = mkpts.at<cv::Vec2f>(i);
cv::circle(imgWithKeypoints, cv::Point2f(pt[0], pt[1]), 5, cv::Scalar(0, 0, 255), -1);
}
return imgWithKeypoints;
}
Are you sure that you have followed the first 3 steps carefully?
"I added a set command, and successfully located ONNX. May I ask what version of OpenCV you are using?"
I am using 4.2.0 but i highly doubt it has anything to do with finding the package configuration
I am using 4.2.0 but i highly doubt it has anything to do with finding the package configuration
In the same case of using only the CPU, 100 loops are executed. The original execution speed is 20s, and the execution speed using your code is 50s. Do you know the reason?
Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit
For the followings i had help from stackoverflow
Step1: Install onnxruntime
mkdir /tmp/onnxInstall cd /tmp/onnxInstall wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0 unzip onnx_archive.nupkg cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/ cp -r build/native/include/ ~/.local/include/onnxruntime/
Step2: Cmake
Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:
create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:
set(PACKAGE_VERSION "1.7.0") if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif()
Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake
include(FindPackageHandleStandardArgs)
///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include) set(onnxruntime_LIBRARIES onnxruntime) set(onnxruntime_CXX_FLAGS "") # no flags needed
find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib" )
add_library(onnxruntime SHARED IMPORTED) set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}") set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}") set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")
find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)
Step4: Project Cmakelist.txt
cmake_minimum_required(VERSION 3.0) project(YourProjectName) #set(OpenCV_DIR "path/to/opencv/build") include_directories("~/.local/include/onnxruntime/") find_package(OpenCV REQUIRED) find_package(onnxruntime) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True)
include_directories(${ONNXRUNTIME_INCLUDE_DIR}) include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(ContourDetection demo.cpp) target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")
Step5: Create a symlink between onnxruntime shared library
ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0
The modified c++ code
#include <chrono> #include <iostream> #include "omp.h" #include <opencv2/opencv.hpp> #include <onnxruntime_cxx_api.h> // for onnx model path const ORTCHAR_T* stringToOrtchar_t(std::string const& s) { #ifdef _WIN32 const char* CStr = s.c_str(); size_t len = strlen(CStr) + 1; size_t converted = 0; wchar_t* WStr; WStr = (wchar_t*)malloc(len * sizeof(wchar_t)); mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE); return WStr; #else return s.c_str(); #endif // _WIN32 } bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0) { const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath); bool sessionIsAvailable = false; /* if (sessionIsAvailable == false) { try { Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); // try Tensorrt OrtTensorRTProviderOptions trtOptions{}; trtOptions.device_id = gpuId; trtOptions.trt_fp16_enable = 1; trtOptions.trt_engine_cache_enable = 1; trtOptions.trt_engine_cache_path = "./trt_engine_cache"; trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024; session_options.AppendExecutionProvider_TensorRT(trtOptions); session = Ort::Session(env, ortModelPath, session_options); sessionIsAvailable = true; std::cout << "Using accelerator: Tensorrt" << std::endl; } catch (Ort::Exception e) { std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl; std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } catch (...) { std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } } */ if (sessionIsAvailable == false) { try { Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); OrtCUDAProviderOptions cuda0ptions; cuda0ptions.device_id = gpuId; cuda0ptions.cuda_mem_limit = 4 << 30; session_options.AppendExecutionProvider_CUDA(cuda0ptions); session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options); sessionIsAvailable = true; std::cout << "Using accelerator: CUDA" << std::endl; } catch (Ort::Exception e) { std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl; std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } catch (...) { std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } } if (sessionIsAvailable == false) { try { Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options); sessionIsAvailable = true; std::cout << "Using accelerator: CPU" << std::endl; } catch (Ort::Exception e) { std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl; std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } catch (...) { std::cout << "Failed to init CPU accelerator." << std::endl; sessionIsAvailable = false; } } if (sessionIsAvailable == true) { Ort::AllocatorWithDefaultOptions allocator; // Get input layers count size_t num_input_nodes = session.GetInputCount(); // Get input layer type, shape, name for (int i = 0; i < num_input_nodes; i++) { // Name std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get()); std::cout << "Input " << i << ": " << input_name << ", shape: ("; // Type Ort::TypeInfo type_info = session.GetInputTypeInfo(i); auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); ONNXTensorElementDataType type = tensor_info.GetElementType(); // Shape std::vector<int64_t> input_node_dims = tensor_info.GetShape(); for (int j = 0; j < input_node_dims.size(); j++) { std::cout << input_node_dims[j]; if (j == input_node_dims.size() - 1) { std::cout << ")" << std::endl; } else { std::cout << ", "; } } } // Get output layers count size_t num_output_nodes = session.GetOutputCount(); // Get output layer type, shape, name for (int i = 0; i < num_output_nodes; i++) { // Name std::string output_name = session.GetOutputName(i, allocator); std::cout << "Output " << i << ": " << output_name << ", shape: ("; // type Ort::TypeInfo type_info = session.GetOutputTypeInfo(i); auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); ONNXTensorElementDataType type = tensor_info.GetElementType(); // shape std::vector<int64_t> output_node_dims = tensor_info.GetShape(); for (int j = 0; j < output_node_dims.size(); j++) { std::cout << output_node_dims[j]; if (j == output_node_dims.size() - 1) { std::cout << ")" << std::endl; } else { std::cout << ", "; } } } } else { std::cout << modelPath << " is invalid model." << std::endl; } return sessionIsAvailable; } class XFeat { public: XFeat(std::string &xfeatModelPath, std::string& matchingModelPath); int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc); int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes); ~XFeat(); // gpu id int gpuId_ = 0; // onnxruntime Ort::Env env_{ nullptr }; Ort::Session xfeatSession_{ nullptr }; Ort::Session matchingSession_{ nullptr }; Ort::AllocatorWithDefaultOptions allocator; // std::vector<const char*> xfeatInputNames = { "images" }; std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" }; std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"}; std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" }; bool initFinishedFlag_ = false; }; XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath) { const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath); const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath); env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" }; // ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL std::vector<std::string> availableProviders = Ort::GetAvailableProviders(); std::cout << "All available accelerators:" << std::endl; for (int i = 0; i < availableProviders.size(); i++) { std::cout << " " << i + 1 << ". " << availableProviders[i] << std::endl; } // init sessions initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_); initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_); } XFeat::~XFeat() { env_.release(); xfeatSession_.release(); matchingSession_.release(); } int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc) { // Pre process cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3); int stride = preProcessedImage.rows * preProcessedImage.cols; #pragma omp parallel for for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB { *((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2); *((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1); *((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3); } // Create input tensor int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3; std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols }; auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size()); assert(input_tensor.IsTensor()); // Run sessionn auto output_tensors = xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(), &input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size()); assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor()); // Get outputs auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); int dim1 = static_cast<int>(mkptsShape[0]); // 1 int dim2 = static_cast<int>(mkptsShape[1]); // 4800 int dim3 = static_cast<int>(mkptsShape[2]); // 2 float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>(); // To cv::Mat mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone(); auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape(); dim1 = static_cast<int>(featsShape[0]); // 1 dim2 = static_cast<int>(featsShape[1]); // 4800 dim3 = static_cast<int>(featsShape[2]); // 64 float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>(); feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone(); auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape(); dim1 = static_cast<int>(scShape[0]); // 1 dim2 = static_cast<int>(scShape[1]); // 4800 float* scDataPtr = output_tensors[2].GetTensorMutableData<float>(); sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone(); return 0; } int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes) { auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize(); std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() }; Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size()); int64_t feats0_size = feats0.total() * feats0.elemSize(); std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() }; Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size()); int64_t sc0_size = sc0.total() * sc0.elemSize(); std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols }; Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size()); int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize(); std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() }; Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size()); int64_t feats1_size = feats1.total() * feats1.elemSize(); std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() }; Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size()); // Create input tensors std::vector<Ort::Value> input_tensors; input_tensors.push_back(std::move(mkpts0_tensor)); input_tensors.push_back(std::move(feats0_tensor)); input_tensors.push_back(std::move(sc0_tensor)); input_tensors.push_back(std::move(mkpts1_tensor)); input_tensors.push_back(std::move(feats1_tensor)); // Run session auto output_tensors = matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(), input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size()); // Check output tensors if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) { std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl; return -1; } // Get outputs auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); int dim1 = static_cast<int>(matchesShape[0]); // num int dim2 = static_cast<int>(matchesShape[1]); // 4 // To cv::Mat float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>(); matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone(); auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape(); dim1 = static_cast<int>(batch_indexesShape[0]); // num float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>(); batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone(); return 0; } cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints, const cv::Mat& img1, const cv::Mat& img2) { // Step 1: Calculate the Homography matrix and mask cv::Mat mask; cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999); mask = mask.reshape(1, mask.total()); // Flatten the mask // Step 2: Get corners of the first image (img1) std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0), cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) }; std::vector<cv::Point2f> warpedCorners(4); // Step 3: Warp corners to the second image (img2) space cv::perspectiveTransform(cornersImg1, warpedCorners, H); // Step 4: Draw the warped corners in image2 cv::Mat img2WithCorners = img2.clone(); for (size_t i = 0; i < warpedCorners.size(); i++) { cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4); } // Step 5: Prepare keypoints and matches for drawMatches function std::vector<cv::KeyPoint> keypoints1, keypoints2; std::vector<cv::DMatch> matches; for (size_t i = 0; i < refPoints.size(); i++) { if (mask.at<uchar>(i)) { // Only consider inliers keypoints1.emplace_back(refPoints[i], 5); keypoints2.emplace_back(dstPoints[i], 5); } } for (size_t i = 0; i < keypoints1.size(); i++) { matches.emplace_back(i, i, 0); } // Draw inlier matches cv::Mat imgMatches; cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); return imgMatches; } // Helper function to draw keypoints cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) { cv::Mat imgWithKeypoints = img.clone(); for (int i = 0; i < mkpts.rows; ++i) { cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1)); cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1); } return imgWithKeypoints; } int main() { std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx"; std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx"; cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg"); cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg"); cv::Mat mkpts0, feats0, sc0; cv::Mat mkpts1, feats1, sc1; cv::Mat matches, batch_indexes; // Init xfeat object XFeat xfeat(xfeatModelPath, matchingModelPath); // Extract features xfeat.detectAndCompute(image0, mkpts0, feats0, sc0); xfeat.detectAndCompute(image1, mkpts1, feats1, sc1); // Matching and refine xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes); // Print results std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl; std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl; // Get points std::vector<cv::Point2f> points0, points1; for (int i = 0; i < matches.rows; i++) { points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1))); points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3))); } cv::Mat homography, transformed_img; homography = cv::findHomography(points0, points1, cv::RANSAC); if(homography.empty()) { std::cout << "Homography image empty" << std::endl; } cv::warpPerspective(image0, transformed_img, homography, image1.size()); // Visualization cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1); // Display images //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0)); //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1)); //cv::imshow("Matches", drawImage); cv::imshow("Registered", transformed_img); cv::waitKey(); return 0; }Change the model and image path accordingly. Download only the models from this link Xfeat
Just take the models from the above link. The provided script here is modified. Once again thanks to the reference code
Thanks to @acai66 for the reference code. it has helped alot. There code has been modifed a bit
For the followings i had help from stackoverflow
Step1: Install onnxruntime
mkdir /tmp/onnxInstall cd /tmp/onnxInstall wget -O onnx_archive.nupkg https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.7.0 unzip onnx_archive.nupkg cp runtimes/linux-x64/native/libonnxruntime.so ~/.local/lib/ cp -r build/native/include/ ~/.local/include/onnxruntime/
Step2: Cmake
Now if you want to be able to find_package(onnxruntime) from your Cmake package, I suggest you place my self-created onnx cmake files in ~/.local/share/cmake/onnxruntime. The files are:
create a ~/.local/share/cmake/onnxruntime/onnxruntimeVersion.cmake:
set(PACKAGE_VERSION "1.7.0") if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_COMPATIBLE FALSE) else() set(PACKAGE_VERSION_COMPATIBLE TRUE) if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") set(PACKAGE_VERSION_EXACT TRUE) endif() endif()
Step3: create a file ~/.local/share/cmake/onnxruntime/onnxruntimeConfig.cmake
include(FindPackageHandleStandardArgs)
///////////// Assume we are in /share/cmake/onnxruntime/onnxruntimeConfig.cmake get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(onnxruntime_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
set(onnxruntime_INCLUDE_DIRS ${onnxruntime_INSTALL_PREFIX}/include) set(onnxruntime_LIBRARIES onnxruntime) set(onnxruntime_CXX_FLAGS "") # no flags needed
find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib" )
add_library(onnxruntime SHARED IMPORTED) set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION "${onnxruntime_LIBRARY}") set_property(TARGET onnxruntime PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIRS}") set_property(TARGET onnxruntime PROPERTY INTERFACE_COMPILE_OPTIONS "${onnxruntime_CXX_FLAGS}")
find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_LIBRARY onnxruntime_INCLUDE_DIRS)
Step4: Project Cmakelist.txt
cmake_minimum_required(VERSION 3.0) project(YourProjectName) #set(OpenCV_DIR "path/to/opencv/build") include_directories("~/.local/include/onnxruntime/") find_package(OpenCV REQUIRED) find_package(onnxruntime) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED True)
include_directories(${ONNXRUNTIME_INCLUDE_DIR}) include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(ContourDetection demo.cpp) target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS}) target_link_libraries(${PROJECT_NAME} "~/.local/lib/libonnxruntime.so")
Step5: Create a symlink between onnxruntime shared library
ln -s ~/.local/lib/libonnxruntime.so /usr/local/lib/onnxruntime.so.1.7.0
The modified c++ code
#include <chrono> #include <iostream> #include "omp.h" #include <opencv2/opencv.hpp> #include <onnxruntime_cxx_api.h> // for onnx model path const ORTCHAR_T* stringToOrtchar_t(std::string const& s) { #ifdef _WIN32 const char* CStr = s.c_str(); size_t len = strlen(CStr) + 1; size_t converted = 0; wchar_t* WStr; WStr = (wchar_t*)malloc(len * sizeof(wchar_t)); mbstowcs_s(&converted, WStr, len, CStr, _TRUNCATE); return WStr; #else return s.c_str(); #endif // _WIN32 } bool initOrtSession(const Ort::Env& env, Ort::Session& session, std::string& modelPath, const int& gpuId = 0) { const ORTCHAR_T* ortModelPath = stringToOrtchar_t(modelPath); bool sessionIsAvailable = false; /* if (sessionIsAvailable == false) { try { Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); // try Tensorrt OrtTensorRTProviderOptions trtOptions{}; trtOptions.device_id = gpuId; trtOptions.trt_fp16_enable = 1; trtOptions.trt_engine_cache_enable = 1; trtOptions.trt_engine_cache_path = "./trt_engine_cache"; trtOptions.trt_max_workspace_size = (size_t)4 * 1024 * 1024 * 1024; session_options.AppendExecutionProvider_TensorRT(trtOptions); session = Ort::Session(env, ortModelPath, session_options); sessionIsAvailable = true; std::cout << "Using accelerator: Tensorrt" << std::endl; } catch (Ort::Exception e) { std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl; std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } catch (...) { std::cout << "Failed to init Tensorrt accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } } */ if (sessionIsAvailable == false) { try { Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); OrtCUDAProviderOptions cuda0ptions; cuda0ptions.device_id = gpuId; cuda0ptions.cuda_mem_limit = 4 << 30; session_options.AppendExecutionProvider_CUDA(cuda0ptions); session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options); sessionIsAvailable = true; std::cout << "Using accelerator: CUDA" << std::endl; } catch (Ort::Exception e) { std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl; std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } catch (...) { std::cout << "Failed to init CUDA accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } } if (sessionIsAvailable == false) { try { Ort::SessionOptions session_options; session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); session = Ort::Session(const_cast<Ort::Env&>(env), ortModelPath, session_options); sessionIsAvailable = true; std::cout << "Using accelerator: CPU" << std::endl; } catch (Ort::Exception e) { std::cout << "Exception code: " << e.GetOrtErrorCode() << ", exception: " << e.what() << std::endl; std::cout << "Failed to init CPU accelerator, Trying another accelerator..." << std::endl; sessionIsAvailable = false; } catch (...) { std::cout << "Failed to init CPU accelerator." << std::endl; sessionIsAvailable = false; } } if (sessionIsAvailable == true) { Ort::AllocatorWithDefaultOptions allocator; // Get input layers count size_t num_input_nodes = session.GetInputCount(); // Get input layer type, shape, name for (int i = 0; i < num_input_nodes; i++) { // Name std::string input_name = session.GetInputName(i, allocator); //std::string(session.GetInputName(i, allocator).get()); std::cout << "Input " << i << ": " << input_name << ", shape: ("; // Type Ort::TypeInfo type_info = session.GetInputTypeInfo(i); auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); ONNXTensorElementDataType type = tensor_info.GetElementType(); // Shape std::vector<int64_t> input_node_dims = tensor_info.GetShape(); for (int j = 0; j < input_node_dims.size(); j++) { std::cout << input_node_dims[j]; if (j == input_node_dims.size() - 1) { std::cout << ")" << std::endl; } else { std::cout << ", "; } } } // Get output layers count size_t num_output_nodes = session.GetOutputCount(); // Get output layer type, shape, name for (int i = 0; i < num_output_nodes; i++) { // Name std::string output_name = session.GetOutputName(i, allocator); std::cout << "Output " << i << ": " << output_name << ", shape: ("; // type Ort::TypeInfo type_info = session.GetOutputTypeInfo(i); auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); ONNXTensorElementDataType type = tensor_info.GetElementType(); // shape std::vector<int64_t> output_node_dims = tensor_info.GetShape(); for (int j = 0; j < output_node_dims.size(); j++) { std::cout << output_node_dims[j]; if (j == output_node_dims.size() - 1) { std::cout << ")" << std::endl; } else { std::cout << ", "; } } } } else { std::cout << modelPath << " is invalid model." << std::endl; } return sessionIsAvailable; } class XFeat { public: XFeat(std::string &xfeatModelPath, std::string& matchingModelPath); int detectAndCompute(const cv::Mat &image, cv::Mat &mkpts, cv::Mat& feats, cv::Mat& sc); int matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes); ~XFeat(); // gpu id int gpuId_ = 0; // onnxruntime Ort::Env env_{ nullptr }; Ort::Session xfeatSession_{ nullptr }; Ort::Session matchingSession_{ nullptr }; Ort::AllocatorWithDefaultOptions allocator; // std::vector<const char*> xfeatInputNames = { "images" }; std::vector<const char*> xfeatOutputNames = { "mkpts", "feats", "sc" }; std::vector<const char*> matchingInputNames = { "mkpts0", "feats0", "sc0", "mkpts1", "feats1"}; std::vector<const char*> matchingOutputNames = { "matches", "batch_indexes" }; bool initFinishedFlag_ = false; }; XFeat::XFeat(std::string& xfeatModelPath, std::string& matchingModelPath) { const ORTCHAR_T* ortXfeatModelPath = stringToOrtchar_t(xfeatModelPath); const ORTCHAR_T* ortMatchingModelPath = stringToOrtchar_t(matchingModelPath); env_ = Ort::Env{ OrtLoggingLevel::ORT_LOGGING_LEVEL_FATAL, "xfeat_demo" }; // ORT_LOGGING_LEVEL_VERBOSE, ORT_LOGGING_LEVEL_FATAL std::vector<std::string> availableProviders = Ort::GetAvailableProviders(); std::cout << "All available accelerators:" << std::endl; for (int i = 0; i < availableProviders.size(); i++) { std::cout << " " << i + 1 << ". " << availableProviders[i] << std::endl; } // init sessions initOrtSession(env_, xfeatSession_, xfeatModelPath, gpuId_); initOrtSession(env_, matchingSession_, matchingModelPath, gpuId_); } XFeat::~XFeat() { env_.release(); xfeatSession_.release(); matchingSession_.release(); } int XFeat::detectAndCompute(const cv::Mat& image, cv::Mat& mkpts, cv::Mat& feats, cv::Mat& sc) { // Pre process cv::Mat preProcessedImage = cv::Mat::zeros(image.rows, image.cols, CV_32FC3); int stride = preProcessedImage.rows * preProcessedImage.cols; #pragma omp parallel for for (int i = 0; i < stride; i++) // HWC -> CHW, BGR -> RGB { *((float*)preProcessedImage.data + i) = (float)*(image.data + i * 3 + 2); *((float*)preProcessedImage.data + i + stride) = (float)*(image.data + i * 3 + 1); *((float*)preProcessedImage.data + i + stride * 2) = (float)*(image.data + i * 3); } // Create input tensor int64_t input_size = preProcessedImage.rows * preProcessedImage.cols * 3; std::vector<int64_t> input_node_dims = { 1, 3, preProcessedImage.rows , preProcessedImage.cols }; auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(preProcessedImage.data), input_size, input_node_dims.data(), input_node_dims.size()); assert(input_tensor.IsTensor()); // Run sessionn auto output_tensors = xfeatSession_.Run(Ort::RunOptions{ nullptr }, xfeatInputNames.data(), &input_tensor, xfeatInputNames.size(), xfeatOutputNames.data(), xfeatOutputNames.size()); assert(output_tensors.size() == xfeatOutputNames.size() && output_tensors.front().IsTensor()); // Get outputs auto mkptsShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); int dim1 = static_cast<int>(mkptsShape[0]); // 1 int dim2 = static_cast<int>(mkptsShape[1]); // 4800 int dim3 = static_cast<int>(mkptsShape[2]); // 2 float* mkptsDataPtr = output_tensors[0].GetTensorMutableData<float>(); // To cv::Mat mkpts = cv::Mat(dim1, dim2, CV_32FC(dim3), mkptsDataPtr).clone(); auto featsShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape(); dim1 = static_cast<int>(featsShape[0]); // 1 dim2 = static_cast<int>(featsShape[1]); // 4800 dim3 = static_cast<int>(featsShape[2]); // 64 float* featsDataPtr = output_tensors[1].GetTensorMutableData<float>(); feats = cv::Mat(dim1, dim2, CV_32FC(dim3), featsDataPtr).clone(); auto scShape = output_tensors[2].GetTensorTypeAndShapeInfo().GetShape(); dim1 = static_cast<int>(scShape[0]); // 1 dim2 = static_cast<int>(scShape[1]); // 4800 float* scDataPtr = output_tensors[2].GetTensorMutableData<float>(); sc = cv::Mat(dim1, dim2, CV_32F, scDataPtr).clone(); return 0; } int XFeat::matchStar(const cv::Mat& mkpts0, const cv::Mat& feats0, const cv::Mat& sc0, const cv::Mat& mkpts1, const cv::Mat& feats1, cv::Mat& matches, cv::Mat& batch_indexes) { auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); int64_t mkpts0_size = mkpts0.total() * mkpts0.elemSize(); std::vector<int64_t> mkpts0_dims = { mkpts0.rows, mkpts0.cols, mkpts0.channels() }; Ort::Value mkpts0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts0.data), mkpts0_size, mkpts0_dims.data(), mkpts0_dims.size()); int64_t feats0_size = feats0.total() * feats0.elemSize(); std::vector<int64_t> feats0_dims = { feats0.rows, feats0.cols, feats0.channels() }; Ort::Value feats0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats0.data), feats0_size, feats0_dims.data(), feats0_dims.size()); int64_t sc0_size = sc0.total() * sc0.elemSize(); std::vector<int64_t> sc0_dims = { sc0.rows, sc0.cols }; Ort::Value sc0_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(sc0.data), sc0_size, sc0_dims.data(), sc0_dims.size()); int64_t mkpts1_size = mkpts1.total() * mkpts1.elemSize(); std::vector<int64_t> mkpts1_dims = { mkpts1.rows, mkpts1.cols, mkpts1.channels() }; Ort::Value mkpts1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(mkpts1.data), mkpts1_size, mkpts1_dims.data(), mkpts1_dims.size()); int64_t feats1_size = feats1.total() * feats1.elemSize(); std::vector<int64_t> feats1_dims = { feats1.rows, feats1.cols, feats1.channels() }; Ort::Value feats1_tensor = Ort::Value::CreateTensor<float>(memory_info, (float*)(feats1.data), feats1_size, feats1_dims.data(), feats1_dims.size()); // Create input tensors std::vector<Ort::Value> input_tensors; input_tensors.push_back(std::move(mkpts0_tensor)); input_tensors.push_back(std::move(feats0_tensor)); input_tensors.push_back(std::move(sc0_tensor)); input_tensors.push_back(std::move(mkpts1_tensor)); input_tensors.push_back(std::move(feats1_tensor)); // Run session auto output_tensors = matchingSession_.Run(Ort::RunOptions{ nullptr }, matchingInputNames.data(), input_tensors.data(), input_tensors.size(), matchingOutputNames.data(), matchingOutputNames.size()); // Check output tensors if (output_tensors.size() != matchingOutputNames.size() || !output_tensors.front().IsTensor()) { std::cerr << "Error: Output tensor size mismatch or output is not a tensor." << std::endl; return -1; } // Get outputs auto matchesShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape(); int dim1 = static_cast<int>(matchesShape[0]); // num int dim2 = static_cast<int>(matchesShape[1]); // 4 // To cv::Mat float* matchesDataPtr = output_tensors[0].GetTensorMutableData<float>(); matches = cv::Mat(dim1, dim2, CV_32F, matchesDataPtr).clone(); auto batch_indexesShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape(); dim1 = static_cast<int>(batch_indexesShape[0]); // num float* batch_indexesDataPtr = output_tensors[1].GetTensorMutableData<float>(); batch_indexes = cv::Mat(dim1, 1, CV_32F, batch_indexesDataPtr).clone(); return 0; } cv::Mat warpCornersAndDrawMatches(const std::vector<cv::Point2f>& refPoints, const std::vector<cv::Point2f>& dstPoints, const cv::Mat& img1, const cv::Mat& img2) { // Step 1: Calculate the Homography matrix and mask cv::Mat mask; cv::Mat H = cv::findHomography(refPoints, dstPoints, cv::RANSAC, 3.5, mask, 1000, 0.999); mask = mask.reshape(1, mask.total()); // Flatten the mask // Step 2: Get corners of the first image (img1) std::vector<cv::Point2f> cornersImg1 = { cv::Point2f(0, 0), cv::Point2f(img1.cols - 1, 0), cv::Point2f(img1.cols - 1, img1.rows - 1), cv::Point2f(0, img1.rows - 1) }; std::vector<cv::Point2f> warpedCorners(4); // Step 3: Warp corners to the second image (img2) space cv::perspectiveTransform(cornersImg1, warpedCorners, H); // Step 4: Draw the warped corners in image2 cv::Mat img2WithCorners = img2.clone(); for (size_t i = 0; i < warpedCorners.size(); i++) { cv::line(img2WithCorners, warpedCorners[i], warpedCorners[(i + 1) % 4], cv::Scalar(0, 255, 0), 4); } // Step 5: Prepare keypoints and matches for drawMatches function std::vector<cv::KeyPoint> keypoints1, keypoints2; std::vector<cv::DMatch> matches; for (size_t i = 0; i < refPoints.size(); i++) { if (mask.at<uchar>(i)) { // Only consider inliers keypoints1.emplace_back(refPoints[i], 5); keypoints2.emplace_back(dstPoints[i], 5); } } for (size_t i = 0; i < keypoints1.size(); i++) { matches.emplace_back(i, i, 0); } // Draw inlier matches cv::Mat imgMatches; cv::drawMatches(img1, keypoints1, img2WithCorners, keypoints2, matches, imgMatches, cv::Scalar(0, 255, 0), cv::Scalar::all(-1), std::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); return imgMatches; } // Helper function to draw keypoints cv::Mat drawKeypoints(const cv::Mat& img, const cv::Mat& mkpts) { cv::Mat imgWithKeypoints = img.clone(); for (int i = 0; i < mkpts.rows; ++i) { cv::Point2f pt(mkpts.at<float>(i, 0), mkpts.at<float>(i, 1)); cv::circle(imgWithKeypoints, pt, 5, cv::Scalar(0, 0, 255), -1); } return imgWithKeypoints; } int main() { std::string xfeatModelPath = "/home/rack_dl/image_registration/xfeat/xfeat_dualscale.onnx"; std::string matchingModelPath = "/home/rack_dl/image_registration/xfeat/matching.onnx"; cv::Mat image0 = cv::imread("/home/rack_dl/register/xfeat/6.jpg"); cv::Mat image1 = cv::imread("/home/rack_dl/register/xfeat/5.jpg"); cv::Mat mkpts0, feats0, sc0; cv::Mat mkpts1, feats1, sc1; cv::Mat matches, batch_indexes; // Init xfeat object XFeat xfeat(xfeatModelPath, matchingModelPath); // Extract features xfeat.detectAndCompute(image0, mkpts0, feats0, sc0); xfeat.detectAndCompute(image1, mkpts1, feats1, sc1); // Matching and refine xfeat.matchStar(mkpts0, feats0, sc0, mkpts1, feats1, matches, batch_indexes); // Print results std::cout << "matches: " << matches.rows << "x" << matches.cols << "x" << matches.channels() << std::endl; std::cout << "batch_indexes: " << batch_indexes.rows << "x" << batch_indexes.cols << "x" << batch_indexes.channels() << std::endl; // Get points std::vector<cv::Point2f> points0, points1; for (int i = 0; i < matches.rows; i++) { points0.push_back(cv::Point2f(*((float*)matches.data + i * 4), *((float*)matches.data + i * 4 + 1))); points1.push_back(cv::Point2f(*((float*)matches.data + i * 4 + 2), *((float*)matches.data + i * 4 + 3))); } cv::Mat homography, transformed_img; homography = cv::findHomography(points0, points1, cv::RANSAC); if(homography.empty()) { std::cout << "Homography image empty" << std::endl; } cv::warpPerspective(image0, transformed_img, homography, image1.size()); // Visualization cv::Mat drawImage = warpCornersAndDrawMatches(points0, points1, image0, image1); // Display images //cv::imshow("Detected Keypoints Image0", drawKeypoints(image0, mkpts0)); //cv::imshow("Detected Keypoints Image1", drawKeypoints(image1, mkpts1)); //cv::imshow("Matches", drawImage); cv::imshow("Registered", transformed_img); cv::waitKey(); return 0; }Change the model and image path accordingly. Download only the models from this link Xfeat
Just take the models from the above link. The provided script here is modified. Once again thanks to the reference code
Hello, can you provide the complete code before you switch to onnx? Thank you very much. You can put it on your github homepage and I will download it myself.
I don't have ownership of this code, for which i cant upload it on GitHub. Consult @acai66. I have just modified the existing code. Any queries about the modified code is welcome. Thank you