Paddle-Lite icon indicating copy to clipboard operation
Paddle-Lite copied to clipboard

Output data is null

Open reddevil1310 opened this issue 11 months ago • 1 comments

Hello everyone. I run paddle lite in windows with C++example. i use PP-OCRv3 model. https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb

My code I run as below:

#include <malloc.h>
#include <time.h>
#include <cmath>
#include <iostream>
#include <string>
#include <vector>
#include "paddle_api.h"  // NOLINT
#include <opencv2/opencv.hpp>
// If this demo is linked to static library:libpaddle_api_light_bundled.a
// , you should include `paddle_use_ops.h` and `paddle_use_kernels.h` to
// avoid linking errors such as `unsupport ops or kernels`.
/////////////////////////////////////////////////////////////////////////
// #include "paddle_use_kernels.h"  // NOLINT
// #include "paddle_use_ops.h"      // NOLINT

using namespace paddle::lite_api;  // NOLINT
// using namespace paddle::lite::utils::cv_p;
using namespace cv;
int64_t ShapeProduction(const shape_t& shape) {
 int64_t res = 1;
 for (auto i : shape) res *= i;
 return res;
}

std::string ShapePrint(const std::vector<shape_t>& shapes) {
 std::string shapes_str{""};
 for (size_t shape_idx = 0; shape_idx < shapes.size(); ++shape_idx) {
   auto shape = shapes[shape_idx];
   std::string shape_str;
   for (auto i : shape) {
     shape_str += std::to_string(i) + ",";
   }
   shapes_str += shape_str;
   shapes_str +=
       (shape_idx != 0 && shape_idx == shapes.size() - 1) ? "" : " : ";
 }
 return shapes_str;
}

std::string ShapePrint(const shape_t& shape) {
 std::string shape_str{""};
 for (auto i : shape) {
   shape_str += std::to_string(i) + " ";
 }
 return shape_str;
}

std::vector<std::string> split_string(const std::string& str_in) {
 std::vector<std::string> str_out;
 std::string tmp_str = str_in;
 while (!tmp_str.empty()) {
   size_t next_offset = tmp_str.find(":");
   str_out.push_back(tmp_str.substr(0, next_offset));
   if (next_offset == std::string::npos) {
     break;
   } else {
     tmp_str = tmp_str.substr(next_offset + 1);
   }
 }
 return str_out;
}

std::vector<int64_t> get_shape(const std::string& str_shape) {
 std::vector<int64_t> shape;
 std::string tmp_str = str_shape;
 while (!tmp_str.empty()) {
   int dim = atoi(tmp_str.data());
   shape.push_back(dim);
   size_t next_offset = tmp_str.find(",");
   if (next_offset == std::string::npos) {
     break;
   } else {
     tmp_str = tmp_str.substr(next_offset + 1);
   }
 }
 return shape;
}

template <typename T>
double compute_mean(const T* in, const size_t length) {
 double sum = 0.;
 for (size_t i = 0; i < length; ++i) {
   sum += in[i];
 }
 return sum / length;
}

template <typename T>
double compute_standard_deviation(const T* in,
                                 const size_t length,
                                 bool has_mean = false,
                                 double mean = 10000) {
 if (!has_mean) {
   mean = compute_mean<T>(in, length);
 }

 double variance = 0.;
 for (size_t i = 0; i < length; ++i) {
   variance += pow((in[i] - mean), 2);
 }
 variance /= length;
 return sqrt(variance);
}

uint8_t* Mat_to_array(const cv::Mat input)
{
 // 把Mat图像转成内存中的数组
   int height = input.rows;
   int width = input.cols;
   uint8_t* pRgb = new uint8_t[height * width * 3];
   for (int i = 0;i < height;i++)
   {
    for (int j = 0;j < width;j++)
    {
        for (int k = 0;k < 3;k++)
        {
            pRgb[i * width * 3 + j * 3 + k] = input.at<cv::Vec3b>(i, j)[k];
        }
    }
   }
   return pRgb;
}
Mat Array2Mat(uint8_t *array,  int row,  int col)
{
   uint8_t **dst = new uint8_t*[row]; //开辟行
   for(int i = 0; i < row; i++)  dst[i] = new uint8_t[col]; //开辟列

   for (int i = 0; i <row; ++i)
   {
       for (int j = 0; j < col; ++j)
       {
           dst[i][j] =  array[i * col + j] * 255;
       }
   }

   Mat img(row ,col,  CV_8UC1, (uint8_t*)array);
   return img;
}
float* Mat_to_nchw(cv::Mat& img){
 const int channels = img.channels();
 const int height = img.rows;
 const int width = img.cols;
 float* nchwMat = (float*)malloc(channels * height * width * sizeof(float));
 memset(nchwMat, 0, channels * height * width * sizeof(float));

 // Convert HWC to CHW and Normalize
 float mean_rgb[3] = {0.485, 0.456, 0.406};
 float std_rgb[3]  = {0.229, 0.224, 0.225};
 uint8_t* ptMat = img.ptr<uint8_t>(0);
 int area = height * width;
 for (int h = 0; h < height; ++h)
 {
   for (int w = 0; w < width; ++w)
   {
       for (int c = 0; c < channels; ++c)
       {

           int srcIdx = channels*width*h + channels*w + c;
           int divider = srcIdx / 3;  // 0, 1, 2
           // for (int i = 0; i < 3; ++i)
           // {
           // std::cout<<channels<<std::endl;
           // std::cout<<ptMat[srcIdx]*1.0f<<std::endl;
           nchwMat[divider + c * area] = static_cast<float>((ptMat[srcIdx] * 1.0f/255.0f - mean_rgb[c]) * 1.0f/std_rgb[c]);
           // }
       }
   }
 }
 return nchwMat;

}


void RunModel(std::string model_dir,
             const std::vector<shape_t>& input_shapes,
             float * img,
             size_t repeats,
             size_t warmup,
             size_t power_mode,
             size_t thread_num,
             size_t accelerate_opencl,
             size_t print_output_elem) {
 // 1. Set MobileConfig
 MobileConfig config;
 config.set_model_from_file(model_dir);

#ifdef METAL
 std::string metal_lib_path = "../../../metal/lite.metallib";
 config.set_metal_lib_path(metal_lib_path);
 config.set_metal_use_mps(true);
#else
 // NOTE: Use android gpu with opencl, you should ensure:
 //  first, [compile **cpu+opencl** paddlelite
 //    lib](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/demo_guides/opencl.md);
 //  second, [convert and use opencl nb
 //    model](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/user_guides/opt/opt_bin.md).

 bool is_opencl_backend_valid =
     ::IsOpenCLBackendValid(/*check_fp16_valid = false*/);
 std::cout << "is_opencl_backend_valid:"
           << (is_opencl_backend_valid ? "true" : "false") << std::endl;
 if (is_opencl_backend_valid) {
   if (accelerate_opencl != 0) {
     // Set opencl kernel binary.
     // Large addtitional prepare time is cost due to algorithm selecting and
     // building kernel from source code.
     // Prepare time can be reduced dramitically after building algorithm file
     // and OpenCL kernel binary on the first running.
     // The 1st running time will be a bit longer due to the compiling time if
     // you don't call `set_opencl_binary_path_name` explicitly.
     // So call `set_opencl_binary_path_name` explicitly is strongly
     // recommended.
     // Make sure you have write permission of the binary path.
     // We strongly recommend each model has a unique binary name.
     const std::string bin_path = "/data/local/tmp/";
     const std::string bin_name = "lite_opencl_kernel.bin";
     config.set_opencl_binary_path_name(bin_path, bin_name);

     // opencl tune option
     // CL_TUNE_NONE: 0
     // CL_TUNE_RAPID: 1
     // CL_TUNE_NORMAL: 2
     // CL_TUNE_EXHAUSTIVE: 3
     const std::string tuned_path = "/data/local/tmp/";
     const std::string tuned_name = "lite_opencl_tuned.bin";
     config.set_opencl_tune(CL_TUNE_NORMAL, tuned_path, tuned_name);

     // opencl precision option
     // CL_PRECISION_AUTO: 0, first fp16 if valid, default
     // CL_PRECISION_FP32: 1, force fp32
     // CL_PRECISION_FP16: 2, force fp16
     config.set_opencl_precision(CL_PRECISION_FP16);
   }
 } else {
   std::cout << "*** nb model will be running on cpu. ***" << std::endl;
   // you can give backup cpu nb model instead
   // config.set_model_from_file(cpu_nb_model_dir);
 }
#endif

 // NOTE: To load model transformed by model_optimize_tool before
 // release/v2.3.0, plese use `set_model_dir` API as listed below.
 // config.set_model_dir(model_dir);
 config.set_power_mode(static_cast<paddle::lite_api::PowerMode>(power_mode));
 config.set_threads(thread_num);
 // 2. Create PaddlePredictor by MobileConfig
 std::shared_ptr<PaddlePredictor> predictor =
     CreatePaddlePredictor<MobileConfig>(config);

 // 3. Prepare input data
 std::cout << "input_shapes.size():" << input_shapes.size() << std::endl; //一张图片 ,input_shape 是一个二维数组,每一行是[1 3 h w]
 for (int j = 0; j < input_shapes.size(); ++j) {
   auto input_tensor = predictor->GetInput(j);
   input_tensor->Resize(input_shapes[j]);
   auto input_data = input_tensor->mutable_data<float>();
   int input_num = 1;
   for (int i = 0; i < input_shapes[j].size(); ++i) { //input_shape[j].size():维度
     input_num *= input_shapes[j][i];
   }
   // 赋值
   // for (int i = 0; i < input_num; ++i) {
   //   input_data[i] = 1.f;
   // }
   for (int i = 0;i < input_num;i ++){
     input_data[i] = img[i];
   }
 }

 predictor->Run();

 // 5. Get output
 std::cout << "\n====== output summary ====== " << std::endl;
 size_t output_tensor_num = predictor->GetOutputNames().size();
 std::cout << "output tensor num:" << output_tensor_num << std::endl;

 for (size_t tidx = 0; tidx < output_tensor_num; ++tidx) { // 这是有几个预测输出
   std::unique_ptr<const paddle::lite_api::Tensor> output_tensor =
       predictor->GetOutput(tidx);
   std::cout << "\n--- output tensor " << tidx << " ---" << std::endl;
   auto out_shape = output_tensor->shape();
   auto out_data = output_tensor->data<float>();
   if(!out_data) {
       std::cout<<"================= outdata null"<<std::endl;
   }
   else {
       std::cout<<"================= outdata not null"<<std::endl;
   }

   auto out_mean = compute_mean<float>(out_data, ShapeProduction(out_shape));

   auto out_std_dev = compute_standard_deviation<float>(
       out_data, ShapeProduction(out_shape), true, out_mean);

   std::cout << "output shape(NCHW):" << ShapePrint(out_shape) << std::endl; // 转字符串
   std::cout << "output tensor " << tidx
             << " elem num:" << ShapeProduction(out_shape) << std::endl; // nchw总数
   std::cout << "output tensor " << tidx
             << " standard deviation:" << out_std_dev << std::endl; // tidx 第i个输入, 平均值标准差
   std::cout << "output tensor " << tidx << " mean value:" << out_mean
             << std::endl;

   // print output,是否打印,
   int h = out_shape[1];  // out_shape ={ 1, h, w}
   int w = out_shape[2];
   uint8_t* dst = (uint8_t*)malloc(h*w*sizeof(uint8_t));


   if (print_output_elem) {
     for (int i = 0; i < ShapeProduction(out_shape); ++i) {
       std::cout << "out[" << tidx << "][" << i
                 << "]:" << output_tensor->data<float>()[i] << std::endl;
       // int r = i / w;
       // int c = i % w;
       dst[i] = output_tensor->data<float>()[i];
     }
     Mat savemat = Array2Mat(dst, h, w);
     cv::imwrite("./mask.jpg", savemat);
   }

 }
}

int main(int argc, char** argv) {
 // argc:传入参数的数目
 // argv: 参数的字符串数组
 std::cout<<"参数的数目:"<<argc<<std::endl;
 std::cout<<"参数的地址:"<<argv<<std::endl;
 std::vector<std::string> str_input_shapes;


 int repeats = 10;
 int warmup = 10;
 // set arm power mode:
 // 0 for big cluster, high performance
 // 1 for little cluster
 // 2 for all cores
 // 3 for no bind
 size_t power_mode = 0;
 size_t thread_num = 1;
 int accelerate_opencl = 1;
 int print_output_elem = 0;

 std::string img_path = "D:\\Test\\testOCR\\1.png";
 std::string model_dir = "D:\\ProjectQt\\coldchain\\3.Sources\\momavi\\bin\\model\\textDetectionModel\\ch_PP-OCRv3_det_infer\\ch_PP-OCRv3_det_infer.nb";
 std::cout<<"......opencv 读取图片...。。。"<<std::endl;
 Mat img_mat ;
 img_mat = cv::imread(img_path);
 if (img_mat.empty() ) {
   printf("读取图片失败\n");
 }
 cv::resize(img_mat,img_mat,Size(img_mat.cols/2, img_mat.rows/2));
 cv::cvtColor(img_mat, img_mat, cv::COLOR_BGR2RGB);
 std::cout<<"......输出图片信息...。。。"<<std::endl;
 std::cout <<"rows:"<<img_mat.rows <<"cols:"<<img_mat.cols<<"channels:"<<img_mat.channels()<<"数据格式:"<<img_mat.type()<<std::endl;
 cv::imwrite("./img_rgb.jpg", img_mat);

/********转通道*****************/
 int channels = img_mat.channels();
 int height = img_mat.rows;
 int width = img_mat.cols;

 float * src1 = Mat_to_nchw(img_mat);
 // float * src2 = Mat_to_nchw2(img_mat);
 for (int i = 0;i < 10;i++){
   std::cout<<src1[i]<<std::endl;
 }

 repeats = atoi("100");
 warmup = atoi("10");
 power_mode = atoi("3");
 thread_num = atoi("1");
 accelerate_opencl = atoi("0");
 print_output_elem = atoi("0");
 std::cout<<".........run model........."<<std::endl;
 std::vector<shape_t> input_shapes{{1, 3, img_mat.rows, img_mat.cols}};  // shape_t ==> std::vector<int64_t>
 clock_t start = clock();
 RunModel(model_dir,
          input_shapes,
          src1,
          repeats,
          warmup,
          power_mode,
          thread_num,
          accelerate_opencl,
          print_output_elem);
 clock_t end = clock();
 auto gap = (double)(end - start) / CLOCKS_PER_SEC ;
 std::cout<<"运行时间,单位/s"<<gap<<std::endl;
 return 0;
}

My problem is always getting output data is null Can someone help me?

reddevil1310 avatar Jul 11 '23 03:07 reddevil1310

可以检查下导出 nb 模型用的 paddlelite 版本是不是和推理版本是一致的

zhupengyang avatar Feb 05 '24 06:02 zhupengyang