FastDeploy
FastDeploy copied to clipboard
测试了rvm和scrfd都比lite.ai.toolkit里的慢
rvm在这里用时大概 660ms到750ms lite.ai.toolkit 大概 115ms到230ms
scrfd这里大概11ms到30ms lite.ai.toolkit 大概 8ms到20ms
环境是MacBook Pro 2.6 GHz 六核Intel Core i7
@chfeizy 您好,请问可以提供一下您的测试代码吗?
long getCurrentTime()
{
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
}
int main()
{
std::string onnx_path = "/Users/also/www/FastDeploy/examples/vision/matting/rvm/cpp/rvm_mobilenetv3_fp32.onnx";
auto option = fastdeploy::RuntimeOption();
auto model = fastdeploy::vision::matting::RobustVideoMatting(onnx_path, "", option);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return 0;
}
fastdeploy::vision::MattingResult res;
cv::VideoCapture cap;
cv::Mat im;
cap.open(0);
if(! cap.isOpened())
{
std::cerr << "Cannot open the camera." << std::endl;
return 0;
}
if( cap.isOpened()) {
while (true) {
cap >> im;
int capture_width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
int capture_height = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
std::cout << "Image size: " << im.rows << "X" << im.cols << std::endl;
long start= getCurrentTime();
if (!model.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return 0;
}
cv::Mat img_bgr = im.clone();
cv::Mat vis_im = fastdeploy::vision::VisMatting(img_bgr, res);
cv::imshow("result", vis_im);
long end= getCurrentTime();
std::cout << "time: " << end-start << std::endl;
if((cv::waitKey(2)& 0xFF) == 'q')
break;
}
}
return 0;
}
rvm 的
@chfeizy lite.ai.toolkit的测试代码有吗?方便也提供一份不?
long getCurrentTime()
{
return std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
}
int main(__unused int argc, __unused char *argv[])
{
// test_lite();
std::string onnx_path = "/Users/also/www/lite.ai.toolkit/hub/onnx/cv/rvm_mobilenetv3_fp32_2.onnx";
// std::string onnx_path = "/Users/also/www/lite.ai.toolkit/hub/onnx/cv/rvm_resnet50_fp32.onnx";
std::string video_path = "../../../examples/lite/resources/test_lite_rvm_0.mp4";
std::string background_path = "/Users/also/www/lite.ai.toolkit/examples/lite/resources/r.png";
std::string output_path = "../../../logs/test_lite_rvm_0.mp4";
cv::Mat background = cv::imread(background_path);
auto *rvm = new lite::cv::matting::RobustVideoMatting(onnx_path, 4); // 16 threads
lite::types::MattingContent content;
cv::VideoCapture cap;
cv::Mat im;
cap.open(0);
if(! cap.isOpened())
{
std::cerr << "Cannot open the camera." << std::endl;
return 0;
}
if( cap.isOpened()) {
while (true) {
cap >> im;
int capture_width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
int capture_height = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
//cout << "Image size: " << im.rows << "X" << im.cols << endl;
long start= getCurrentTime();
cv::Mat img_bgr = im.clone();
cv::Mat det_input;
rvm->detect(img_bgr, content, 0.4f, true, true, false);
if (content.flag)
{
if (!content.merge_mat.empty()){
cv::imshow("result", content.merge_mat);
}
std::cout << "Default Version MobileHumanMatting Done!" << std::endl;
}
//delete fast_portrait_seg;
long end= getCurrentTime();
std::cout << "time: " << end-start << std::endl;
if((cv::waitKey(2)& 0xFF) == 'q')
break;
}
}
return 0;
}
参数配置的0.4
感谢您的反馈哈~ 我们的同学正在排查具体的原因
@chfeizy 这里有两点需要对齐下
- lite.ai.toolkit默认CPU后端为ONNXRuntime
- lite.ai.toolkit线程数使用为4个线程
所以先对齐一下,在FastDeploy 模型初始化阶段使用如下代码
auto option = fastdeploy::RuntimeOption();
option.UseOrtBackend();
option.SetCpuThreadNum(4)
auto model = fastdeploy::vision::matting::RobustVideoMatting(onnx_path, "", option);
![image](https://user-images.githubusercontent.com/6427629/203719386-9d1a2537-511d-4af3-8e9d-985a999d08ff.png)
基本上还是有 280到360ms左右,还是慢的
@chfeizy 目前RVM前后处理做了优化,可以参考这两个PR: https://github.com/PaddlePaddle/FastDeploy/pull/658 https://github.com/PaddlePaddle/FastDeploy/pull/679
我这边自测了下,在我自己的Mac机器中:
CPU:Intel(R) Core(TM) i5-8257U CPU @ 1.40GHz Python:3.7 Torch:1.12.1 共100次测速,前20次warmup
版本 | Runtime(ms) | End2End(ms) |
---|---|---|
RVM TorchScript | 2247.62 | 2277.16 |
FastDeploy 优化前 | 103.07 | 238.79 |
FastDeploy 优化后 | 102.79 | 113.78 |
可以看最后一行End2End,优化较明显
你好,我测试了SCRFD,结果如下 硬件环境:Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz 线程:4 Backend: OnnxRuntime 循环次数:100 FastDeploy: 20ms/pic Lite: 24ms/pic 如果您希望进一步提高SCRFD的在FastDeploy的性能,可以参考RVM的优化方案。
rvm 测试还是200到240ms感觉还是差点
测试的 是 c++版本吗好像是 python 的
@chfeizy 拉去最新的代码,重新编译再试试,测试为c++版本
就是拉取的最新版本测试的时间是有提升但是还是不及lite.ai.toolkit
@chfeizy 使用如下C++ benchmark代码重测一下,我这边不会有这种现象
void CpuInfer(const std::string& model_file, const std::string& image_file,
const std::string& background_file) {
auto option = fastdeploy::RuntimeOption();
option.UseOrtBackend();
auto model = fastdeploy::vision::matting::RobustVideoMatting(model_file, "", option);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto im = cv::imread(image_file);
auto im_bak = im.clone();
cv::Mat bg = cv::imread(background_file);
fastdeploy::vision::MattingResult res;
model.EnableRecordTimeOfRuntime();
double time_sum = 0.0;
//model.video_mode = false;
for (auto i = 0; i < 100; ++i) {
auto begin = std::chrono::system_clock::now();
if (!model.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
auto end = std::chrono::system_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - begin);
if (i >= 20) {
time_sum += static_cast<double>(duration.count()) *
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den * 1000;
}
std::cout << "time(ms) = " << static_cast<double>(duration.count()) *
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den * 1000 << std::endl;
//std::cout << "time = " << double(end - start) / CLOCKS_PER_SEC << std::endl;
}
model.PrintStatisInfoOfRuntime();
std::cout << "average time(ms):" << time_sum / 80 << std::endl;
auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
auto vis_im_with_bg =
fastdeploy::vision::SwapBackground(im_bak, bg, res);
cv::imwrite("visualized_result.jpg", vis_im_with_bg);
cv::imwrite("visualized_result_fg.jpg", vis_im);
std::cout << "Visualized result save in ./visualized_result.jpg "
"and ./visualized_result_fg.jpg"
<< std::endl;
}
然后log信息截图看下吧~
你这个是图片吗,我是直接使用摄像头的
此ISSUE由于一年未更新,将会关闭处理,如有需要,可再次更新打开。