libfacedetection
libfacedetection copied to clipboard
How to predict face landmarks with Caffe model?
The following code detects faces on camera with yufacedetectnet-open-v2.prototxt and yufacedetectnet-open-v2.caffemodel, but how can I predict face landmarks with this Caffe model?
main.cpp:
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <string>
#include <opencv2/opencv.hpp>
#define MODEL_INPUT_WIDTH 320
#define MODEL_INPUT_HEIGHT 240
#define MODEL_INPUT_CHANNEL 3
#define ESC_KEY_CODE 27
int main(int argc, char* argv[])
{
//----------------------------------------------------------------------------------------------------
if((argc < 3) || (argc > 4))
{
std::cout << "Usage: ./face-detector <model-prototxt> <model-caffemodel> [<camera-index>]" << std::endl;
std::cout << "Example: ./face-detector yufacedetectnet-open-v2.prototxt yufacedetectnet-open-v2.caffemodel -1" << std::endl;
return EXIT_FAILURE;
}
//----------------------------------------------------------------------------------------------------
std::string prototxtFileName = argv[1];
std::string caffemodelFileName = argv[2];
int cameraIndex = -1;
if(argc == 4)
{
cameraIndex = std::stoi(argv[3]);
}
//----------------------------------------------------------------------------------------------------
std::cout << "Loading model..." << std::endl;
cv::dnn::Net net = cv::dnn::readNetFromCaffe(prototxtFileName, caffemodelFileName);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
std::cout << std::endl;
//----------------------------------------------------------------------------------------------------
std::cout << "Opening camera..." << std::endl;
cv::VideoCapture videoCapture;
if (!videoCapture.open(cameraIndex))
{
std::cout << "Error: Could not open camera: " << cameraIndex << std::endl;
return EXIT_FAILURE;
}
std::cout << std::endl;
//----------------------------------------------------------------------------------------------------
std::cout << "Detecting..." << std::endl;
while (true)
{
cv::Mat frame;
videoCapture >> frame;
if(frame.empty())
{
std::cout << "Error: Could not read camera frame." << std::endl;
return EXIT_FAILURE;
}
//cv::resize(frame, frame, cv::Size(MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT));
auto beginTime = std::chrono::steady_clock::now();
auto input = cv::dnn::blobFromImage(frame, 1.0, cv::Size(), cv::Scalar(), true);
net.setInput(input, "data");
auto output = net.forward("detection_out");
auto endTime = std::chrono::steady_clock::now();
int timespan = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - beginTime).count();
std::cout << "Detection time = " << timespan << "ms" << std::endl;
// output.size = 1 x 1 x 50 x 7
// detectionMat.size = 50 x 7
cv::Mat detectionMat(output.size[2], output.size[3], CV_32F, output.ptr<float>());
for(int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if(confidence < 0.5)
{
continue;
}
int x = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
int y = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
int width = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols + 0.5f) - x;
int height = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows + 0.5f) - y;
cv::putText(frame, std::to_string(confidence), cv::Point(x, y - 3), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);
cv::rectangle(frame, cv::Rect(x, y, width, height), cv::Scalar(0, 255, 0), 2);
}
//cv::resize(frame, frame, cv::Size(640, 480));
cv::imshow("Face Detection", frame);
if (cv::waitKey(1) == ESC_KEY_CODE)
{
break;
}
}
std::cout << std::endl;
//----------------------------------------------------------------------------------------------------
std::cout << "Releasing camera..." << std::endl;
videoCapture.release();
std::cout << std::endl;
//----------------------------------------------------------------------------------------------------
return EXIT_SUCCESS;
//----------------------------------------------------------------------------------------------------
}
CMakeLists.txt:
cmake_minimum_required(VERSION 3.5)
project(face-detector LANGUAGES CXX)
set(APP_NAME "${PROJECT_NAME}")
find_package(OpenCV REQUIRED)
add_executable(${APP_NAME} main.cpp)
set_property(TARGET ${APP_NAME} PROPERTY CXX_STANDARD 11)
set_property(TARGET ${APP_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
set_property(TARGET ${APP_NAME} PROPERTY CXX_EXTENSIONS OFF)
target_link_libraries(${APP_NAME} PRIVATE ${OpenCV_LIBS})
Facial landmarks detection is added in model of v3 as ChangeLog says. If you need the model in caffe's format, you could find a converter to convert our Pytorch model to be of caffe's format.