Paddle-Lite
Paddle-Lite copied to clipboard
Output data is null
Hello everyone. I run paddle lite in windows with C++example. i use PP-OCRv3 model. https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb
My code I run as below:
#include <malloc.h>
#include <time.h>
#include <cmath>
#include <iostream>
#include <string>
#include <vector>
#include "paddle_api.h" // NOLINT
#include <opencv2/opencv.hpp>
// If this demo is linked to static library:libpaddle_api_light_bundled.a
// , you should include `paddle_use_ops.h` and `paddle_use_kernels.h` to
// avoid linking errors such as `unsupport ops or kernels`.
/////////////////////////////////////////////////////////////////////////
// #include "paddle_use_kernels.h" // NOLINT
// #include "paddle_use_ops.h" // NOLINT
using namespace paddle::lite_api; // NOLINT
// using namespace paddle::lite::utils::cv_p;
using namespace cv;
int64_t ShapeProduction(const shape_t& shape) {
int64_t res = 1;
for (auto i : shape) res *= i;
return res;
}
std::string ShapePrint(const std::vector<shape_t>& shapes) {
std::string shapes_str{""};
for (size_t shape_idx = 0; shape_idx < shapes.size(); ++shape_idx) {
auto shape = shapes[shape_idx];
std::string shape_str;
for (auto i : shape) {
shape_str += std::to_string(i) + ",";
}
shapes_str += shape_str;
shapes_str +=
(shape_idx != 0 && shape_idx == shapes.size() - 1) ? "" : " : ";
}
return shapes_str;
}
std::string ShapePrint(const shape_t& shape) {
std::string shape_str{""};
for (auto i : shape) {
shape_str += std::to_string(i) + " ";
}
return shape_str;
}
std::vector<std::string> split_string(const std::string& str_in) {
std::vector<std::string> str_out;
std::string tmp_str = str_in;
while (!tmp_str.empty()) {
size_t next_offset = tmp_str.find(":");
str_out.push_back(tmp_str.substr(0, next_offset));
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return str_out;
}
std::vector<int64_t> get_shape(const std::string& str_shape) {
std::vector<int64_t> shape;
std::string tmp_str = str_shape;
while (!tmp_str.empty()) {
int dim = atoi(tmp_str.data());
shape.push_back(dim);
size_t next_offset = tmp_str.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return shape;
}
template <typename T>
double compute_mean(const T* in, const size_t length) {
double sum = 0.;
for (size_t i = 0; i < length; ++i) {
sum += in[i];
}
return sum / length;
}
template <typename T>
double compute_standard_deviation(const T* in,
const size_t length,
bool has_mean = false,
double mean = 10000) {
if (!has_mean) {
mean = compute_mean<T>(in, length);
}
double variance = 0.;
for (size_t i = 0; i < length; ++i) {
variance += pow((in[i] - mean), 2);
}
variance /= length;
return sqrt(variance);
}
uint8_t* Mat_to_array(const cv::Mat input)
{
// 把Mat图像转成内存中的数组
int height = input.rows;
int width = input.cols;
uint8_t* pRgb = new uint8_t[height * width * 3];
for (int i = 0;i < height;i++)
{
for (int j = 0;j < width;j++)
{
for (int k = 0;k < 3;k++)
{
pRgb[i * width * 3 + j * 3 + k] = input.at<cv::Vec3b>(i, j)[k];
}
}
}
return pRgb;
}
Mat Array2Mat(uint8_t *array, int row, int col)
{
uint8_t **dst = new uint8_t*[row]; //开辟行
for(int i = 0; i < row; i++) dst[i] = new uint8_t[col]; //开辟列
for (int i = 0; i <row; ++i)
{
for (int j = 0; j < col; ++j)
{
dst[i][j] = array[i * col + j] * 255;
}
}
Mat img(row ,col, CV_8UC1, (uint8_t*)array);
return img;
}
float* Mat_to_nchw(cv::Mat& img){
const int channels = img.channels();
const int height = img.rows;
const int width = img.cols;
float* nchwMat = (float*)malloc(channels * height * width * sizeof(float));
memset(nchwMat, 0, channels * height * width * sizeof(float));
// Convert HWC to CHW and Normalize
float mean_rgb[3] = {0.485, 0.456, 0.406};
float std_rgb[3] = {0.229, 0.224, 0.225};
uint8_t* ptMat = img.ptr<uint8_t>(0);
int area = height * width;
for (int h = 0; h < height; ++h)
{
for (int w = 0; w < width; ++w)
{
for (int c = 0; c < channels; ++c)
{
int srcIdx = channels*width*h + channels*w + c;
int divider = srcIdx / 3; // 0, 1, 2
// for (int i = 0; i < 3; ++i)
// {
// std::cout<<channels<<std::endl;
// std::cout<<ptMat[srcIdx]*1.0f<<std::endl;
nchwMat[divider + c * area] = static_cast<float>((ptMat[srcIdx] * 1.0f/255.0f - mean_rgb[c]) * 1.0f/std_rgb[c]);
// }
}
}
}
return nchwMat;
}
void RunModel(std::string model_dir,
const std::vector<shape_t>& input_shapes,
float * img,
size_t repeats,
size_t warmup,
size_t power_mode,
size_t thread_num,
size_t accelerate_opencl,
size_t print_output_elem) {
// 1. Set MobileConfig
MobileConfig config;
config.set_model_from_file(model_dir);
#ifdef METAL
std::string metal_lib_path = "../../../metal/lite.metallib";
config.set_metal_lib_path(metal_lib_path);
config.set_metal_use_mps(true);
#else
// NOTE: Use android gpu with opencl, you should ensure:
// first, [compile **cpu+opencl** paddlelite
// lib](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/demo_guides/opencl.md);
// second, [convert and use opencl nb
// model](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/user_guides/opt/opt_bin.md).
bool is_opencl_backend_valid =
::IsOpenCLBackendValid(/*check_fp16_valid = false*/);
std::cout << "is_opencl_backend_valid:"
<< (is_opencl_backend_valid ? "true" : "false") << std::endl;
if (is_opencl_backend_valid) {
if (accelerate_opencl != 0) {
// Set opencl kernel binary.
// Large addtitional prepare time is cost due to algorithm selecting and
// building kernel from source code.
// Prepare time can be reduced dramitically after building algorithm file
// and OpenCL kernel binary on the first running.
// The 1st running time will be a bit longer due to the compiling time if
// you don't call `set_opencl_binary_path_name` explicitly.
// So call `set_opencl_binary_path_name` explicitly is strongly
// recommended.
// Make sure you have write permission of the binary path.
// We strongly recommend each model has a unique binary name.
const std::string bin_path = "/data/local/tmp/";
const std::string bin_name = "lite_opencl_kernel.bin";
config.set_opencl_binary_path_name(bin_path, bin_name);
// opencl tune option
// CL_TUNE_NONE: 0
// CL_TUNE_RAPID: 1
// CL_TUNE_NORMAL: 2
// CL_TUNE_EXHAUSTIVE: 3
const std::string tuned_path = "/data/local/tmp/";
const std::string tuned_name = "lite_opencl_tuned.bin";
config.set_opencl_tune(CL_TUNE_NORMAL, tuned_path, tuned_name);
// opencl precision option
// CL_PRECISION_AUTO: 0, first fp16 if valid, default
// CL_PRECISION_FP32: 1, force fp32
// CL_PRECISION_FP16: 2, force fp16
config.set_opencl_precision(CL_PRECISION_FP16);
}
} else {
std::cout << "*** nb model will be running on cpu. ***" << std::endl;
// you can give backup cpu nb model instead
// config.set_model_from_file(cpu_nb_model_dir);
}
#endif
// NOTE: To load model transformed by model_optimize_tool before
// release/v2.3.0, plese use `set_model_dir` API as listed below.
// config.set_model_dir(model_dir);
config.set_power_mode(static_cast<paddle::lite_api::PowerMode>(power_mode));
config.set_threads(thread_num);
// 2. Create PaddlePredictor by MobileConfig
std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config);
// 3. Prepare input data
std::cout << "input_shapes.size():" << input_shapes.size() << std::endl; //一张图片 ,input_shape 是一个二维数组,每一行是[1 3 h w]
for (int j = 0; j < input_shapes.size(); ++j) {
auto input_tensor = predictor->GetInput(j);
input_tensor->Resize(input_shapes[j]);
auto input_data = input_tensor->mutable_data<float>();
int input_num = 1;
for (int i = 0; i < input_shapes[j].size(); ++i) { //input_shape[j].size():维度
input_num *= input_shapes[j][i];
}
// 赋值
// for (int i = 0; i < input_num; ++i) {
// input_data[i] = 1.f;
// }
for (int i = 0;i < input_num;i ++){
input_data[i] = img[i];
}
}
predictor->Run();
// 5. Get output
std::cout << "\n====== output summary ====== " << std::endl;
size_t output_tensor_num = predictor->GetOutputNames().size();
std::cout << "output tensor num:" << output_tensor_num << std::endl;
for (size_t tidx = 0; tidx < output_tensor_num; ++tidx) { // 这是有几个预测输出
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor =
predictor->GetOutput(tidx);
std::cout << "\n--- output tensor " << tidx << " ---" << std::endl;
auto out_shape = output_tensor->shape();
auto out_data = output_tensor->data<float>();
if(!out_data) {
std::cout<<"================= outdata null"<<std::endl;
}
else {
std::cout<<"================= outdata not null"<<std::endl;
}
auto out_mean = compute_mean<float>(out_data, ShapeProduction(out_shape));
auto out_std_dev = compute_standard_deviation<float>(
out_data, ShapeProduction(out_shape), true, out_mean);
std::cout << "output shape(NCHW):" << ShapePrint(out_shape) << std::endl; // 转字符串
std::cout << "output tensor " << tidx
<< " elem num:" << ShapeProduction(out_shape) << std::endl; // nchw总数
std::cout << "output tensor " << tidx
<< " standard deviation:" << out_std_dev << std::endl; // tidx 第i个输入, 平均值标准差
std::cout << "output tensor " << tidx << " mean value:" << out_mean
<< std::endl;
// print output,是否打印,
int h = out_shape[1]; // out_shape ={ 1, h, w}
int w = out_shape[2];
uint8_t* dst = (uint8_t*)malloc(h*w*sizeof(uint8_t));
if (print_output_elem) {
for (int i = 0; i < ShapeProduction(out_shape); ++i) {
std::cout << "out[" << tidx << "][" << i
<< "]:" << output_tensor->data<float>()[i] << std::endl;
// int r = i / w;
// int c = i % w;
dst[i] = output_tensor->data<float>()[i];
}
Mat savemat = Array2Mat(dst, h, w);
cv::imwrite("./mask.jpg", savemat);
}
}
}
int main(int argc, char** argv) {
// argc:传入参数的数目
// argv: 参数的字符串数组
std::cout<<"参数的数目:"<<argc<<std::endl;
std::cout<<"参数的地址:"<<argv<<std::endl;
std::vector<std::string> str_input_shapes;
int repeats = 10;
int warmup = 10;
// set arm power mode:
// 0 for big cluster, high performance
// 1 for little cluster
// 2 for all cores
// 3 for no bind
size_t power_mode = 0;
size_t thread_num = 1;
int accelerate_opencl = 1;
int print_output_elem = 0;
std::string img_path = "D:\\Test\\testOCR\\1.png";
std::string model_dir = "D:\\ProjectQt\\coldchain\\3.Sources\\momavi\\bin\\model\\textDetectionModel\\ch_PP-OCRv3_det_infer\\ch_PP-OCRv3_det_infer.nb";
std::cout<<"......opencv 读取图片...。。。"<<std::endl;
Mat img_mat ;
img_mat = cv::imread(img_path);
if (img_mat.empty() ) {
printf("读取图片失败\n");
}
cv::resize(img_mat,img_mat,Size(img_mat.cols/2, img_mat.rows/2));
cv::cvtColor(img_mat, img_mat, cv::COLOR_BGR2RGB);
std::cout<<"......输出图片信息...。。。"<<std::endl;
std::cout <<"rows:"<<img_mat.rows <<"cols:"<<img_mat.cols<<"channels:"<<img_mat.channels()<<"数据格式:"<<img_mat.type()<<std::endl;
cv::imwrite("./img_rgb.jpg", img_mat);
/********转通道*****************/
int channels = img_mat.channels();
int height = img_mat.rows;
int width = img_mat.cols;
float * src1 = Mat_to_nchw(img_mat);
// float * src2 = Mat_to_nchw2(img_mat);
for (int i = 0;i < 10;i++){
std::cout<<src1[i]<<std::endl;
}
repeats = atoi("100");
warmup = atoi("10");
power_mode = atoi("3");
thread_num = atoi("1");
accelerate_opencl = atoi("0");
print_output_elem = atoi("0");
std::cout<<".........run model........."<<std::endl;
std::vector<shape_t> input_shapes{{1, 3, img_mat.rows, img_mat.cols}}; // shape_t ==> std::vector<int64_t>
clock_t start = clock();
RunModel(model_dir,
input_shapes,
src1,
repeats,
warmup,
power_mode,
thread_num,
accelerate_opencl,
print_output_elem);
clock_t end = clock();
auto gap = (double)(end - start) / CLOCKS_PER_SEC ;
std::cout<<"运行时间,单位/s"<<gap<<std::endl;
return 0;
}
My problem is always getting output data is null Can someone help me?
可以检查下导出 nb 模型用的 paddlelite 版本是不是和推理版本是一致的