RT-DETR icon indicating copy to clipboard operation
RT-DETR copied to clipboard

pth2onnx和onnx2trt是没有问题,但是trt推理出现box坐标是特别大的值,标签和得分是正确的

Open Kingxudong opened this issue 5 months ago • 5 comments

pth转换为onnx,用官方的代码转,并且测试是正确。但是onnx转换为trt,用的是tensorRT8.6.1,转换没有问题,但是推理出现box坐标是特别大的值,标签和得分是正确的。 D:\tool\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\TensorRT-8.6.1.6\bin\trtexec.exe --onnx=model.onnx --workspace=4096 --avgRuns=100 --shapes=images:1x3x640x640 --saveEngine=model.trt

我的C++推理代码如下

#include #include #include #include <windows.h> #include #include #include <opencv2/opencv.hpp> #include "NvInfer.h" #include "NvInferRuntimeCommon.h" #include #include <opencv2/core/utils/filesystem.hpp>

using namespace nvinfer1; using namespace std;

#define CUDA_CHECK(call)
do {
cudaError_t status = call;
if (status != cudaSuccess) {
fprintf(stderr, "CUDA Error in file '%s' in line %d: %s\n",
FILE, LINE, cudaGetErrorString(status));
exit(EXIT_FAILURE);
}
} while (0)

class Logger : public ILogger { public: void log(Severity severity, const char* msg) noexcept override { if (severity != Severity::kINFO) std::cout << msg << std::endl; } } gLogger; class TensorRTInference { public: TensorRTInference(const std::string& enginePath); ~TensorRTInference(); void doInference(const std::vectorcv::String& image_paths);

private: IRuntime* runtime; ICudaEngine* engine_; IExecutionContext* context_; void* buffers_[5]; int inputIndex1_; int inputIndex2_; int outputIndex1_; int outputIndex2_; int outputIndex3_; int batchSize_; int inputSize1_; int inputSize2_; int outputSize1_; int outputSize2_; int outputSize3_;

void allocateBuffers();
void preprocess(const cv::Mat& image, float* buffer1, float* buffer2);
void postprocess(float* output1, float* output2, float* output3, cv::Mat& image);

};

int volume(const Dims& dims) { int vol = 1; for (int i = 0; i < dims.nbDims; ++i) { vol *= dims.d[i]; } return vol; } TensorRTInference::TensorRTInference(const std::string& enginePath) : engine_(nullptr), context_(nullptr) { std::ifstream engineFile(enginePath, std::ios::binary); if (!engineFile.good()) { std::cerr << "Error opening engine file: " << enginePath << std::endl; return; }

std::stringstream engineStream;
engineStream << engineFile.rdbuf();
engineFile.close();

runtime = createInferRuntime(gLogger);
if (!runtime) {
    std::cerr << "Error creating InferRuntime" << std::endl;
    return;
}

engine_ = runtime->deserializeCudaEngine(engineStream.str().data(), engineStream.str().size(), nullptr);
if (engine_ == nullptr) {
    std::cerr << "Error deserializing the engine file: " << enginePath << std::endl;
    return;
}

context_ = engine_->createExecutionContext();
if (!context_) {
    std::cerr << "Error creating ExecutionContext" << std::endl;
    return;
}

inputIndex1_ = engine_->getBindingIndex("images");
inputIndex2_ = engine_->getBindingIndex("orig_target_sizes");
outputIndex1_ = engine_->getBindingIndex("labels");
outputIndex2_ = engine_->getBindingIndex("boxes");
outputIndex3_ = engine_->getBindingIndex("scores");

// Get input sizes
const Dims& inputDims1 = engine_->getBindingDimensions(inputIndex1_);
const Dims& inputDims2 = engine_->getBindingDimensions(inputIndex2_);
for (int i = 0; i < inputDims2.nbDims; ++i) {
    std::cout << "inputDims2[" << i << "]: " << inputDims2.d[i] << std::endl;
}
const Dims& outputDims1 = engine_->getBindingDimensions(outputIndex1_);
const Dims& outputDims2 = engine_->getBindingDimensions(outputIndex2_);

const Dims& outputDims3 = engine_->getBindingDimensions(outputIndex3_);

batchSize_ = 1; 


inputSize1_ = volume(inputDims1) * batchSize_ * sizeof(float);
inputSize2_ = volume(inputDims2) * sizeof(float);
outputSize1_ = volume(outputDims1) * sizeof(float); 
outputSize2_ = volume(outputDims2) * sizeof(float); 
outputSize3_ = volume(outputDims3) * sizeof(float); 

cout << inputSize2_ << endl;

allocateBuffers();

}

std::vector ToTensor(cv::Mat image) { if (image.empty()) { std::cerr << "Error: Empty image" << std::endl; return {}; }

if (image.channels() != 3) {
    std::cerr << "Error: Image must have 3 channels" << std::endl;
    return {};
}

image.convertTo(image, CV_32FC3, 1.0f / 255.0f);

// Convert image to tensor
std::vector<cv::Mat> channels(3);
cv::split(image, channels);

std::vector<float> tensor(image.total() * image.channels());
int index = 0;


for (int c = 0; c < 3; ++c) {
    for (int i = 0; i < channels[c].rows; ++i) {
        for (int j = 0; j < channels[c].cols; ++j) {
            tensor[index++] = channels[c].at<float>(i, j);
        }
    }
}

return tensor;

} std::vector ToTensorAndNormalize(cv::Mat image) { if (image.empty()) { std::cerr << "Error: Empty image" << std::endl; return {}; }

if (image.channels() != 3) {
    std::cerr << "Error: Image must have 3 channels" << std::endl;
    return {};
}
cv::resize(image, image, cv::Size(640.f, 640.f));
image.convertTo(image, CV_32FC3, 1.0f / 255.0f);

const float mean[3] = { 0.485f, 0.456f, 0.406f };
const float std[3] = { 0.229f, 0.224f, 0.225f };

std::vector<cv::Mat> channels(3);
cv::split(image, channels);

std::vector<float> input_tensor;
input_tensor.reserve(640 * 640 * 3);  


for (int c = 0; c < 3; ++c) {
    for (int i = 0; i < channels[c].rows; ++i) {
        for (int j = 0; j < channels[c].cols; ++j) {
            float pixel = (channels[c].at<float>(i, j) - mean[c]) / std[c];
            input_tensor.push_back(pixel);
        }
    }
}

return input_tensor; 

}

TensorRTInference::~TensorRTInference() { if (context_) { context_->destroy(); } if (engine_) { engine_->destroy(); } if (runtime) { runtime->destroy(); } for (int i = 0; i < 5; ++i) { if (buffers_[i]) cudaFree(buffers_[i]); } }

void TensorRTInference::preprocess(const cv::Mat& image, float* buffer1, float* buffer2) { cv::Mat rgb_image;

if (image.channels() == 1) {
    cv::cvtColor(image, rgb_image, cv::COLOR_GRAY2RGB);
}
else if (image.channels() == 4) {
    cv::cvtColor(image, rgb_image, cv::COLOR_BGRA2RGB);
}
else if (image.channels() == 3) {
    cv::cvtColor(image, rgb_image, cv::COLOR_BGR2RGB);
}
else {
    rgb_image = image;  
}

std::vector<float> tensor1 = ToTensorAndNormalize(rgb_image);

//float tensor[1][2] = { {1, 1} };
//float orig_target_sizes[2] = { 640.0f, 640.0f };
float orig_target_sizes[2] = { static_cast<float>(image.cols), static_cast<float>(image.rows) };
std::cout << "Original target sizes: " << orig_target_sizes[0] << ", " << orig_target_sizes[1] << std::endl;

std::cout << "Input tensor size: " << tensor1.size() << std::endl;
//std::cout << "Input tensor size: " << tensor1.size() << std::endl;

std::memcpy(buffer1, tensor1.data(), inputSize1_);

std::memcpy(buffer2, orig_target_sizes, inputSize2_);

}

void TensorRTInference::postprocess(float* output1, float* output2, float* output3, cv::Mat& image) { float numDetections = outputSize2_ / (4 * sizeof(float)); float confThreshold = 0.5f;

std::cout << "Box " << (output2 + 1 * 4)[0] <<  std::endl;

for (int i = 0; i < numDetections; ++i) {

    float* bbox = output2 + i * 4;
    float labels = output1[i];

    int x1 = static_cast<float>(bbox[0]);
    int y1 = static_cast<float>(bbox[1]);
    int x2 = static_cast<float>(bbox[2]);
    int y2 = static_cast<float>(bbox[3]);
    
    // Draw bounding box
    cv::rectangle(image, cv::Point(x1, y1), cv::Point(x2, y2), cv::Scalar(0, 255, 0), 2);
    std::string label = "label: " + std::to_string(labels);
    cv::putText(image, label, cv::Point(x1, y1 - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 1);

}

float* additionalData = output3;

std::cout << "First values of output3:" << std::endl;
for (int i = 0; i < 50; ++i) {
    std::cout << additionalData[i] << " ";
}
std::cout << std::endl;

}

std::string replaceFolderName(const std::string& path, const std::string& oldFolder, const std::string& newFolder) { size_t pos = path.find(oldFolder); if (pos != std::string::npos) { std::string newPath = path.substr(0, pos) + newFolder + path.substr(pos + oldFolder.length()); return newPath; } else { return path; } }

void softmax(float* output, std::vector& probs) { probs.clear(); float sum = 0.0f; for (int i = 0; i < 2; ++i) { probs.push_back(std::exp(output[i])); sum += probs.back(); } for (int i = 0; i < 2; ++i) { probs[i] /= sum; } } void TensorRTInference::allocateBuffers() { std::cout << "Allocating buffers..." << std::endl; CUDA_CHECK(cudaMalloc(&buffers_[inputIndex1_], inputSize1_)); CUDA_CHECK(cudaMalloc(&buffers_[inputIndex2_], inputSize2_)); CUDA_CHECK(cudaMalloc(&buffers_[outputIndex1_], outputSize1_)); CUDA_CHECK(cudaMalloc(&buffers_[outputIndex2_], outputSize2_)); CUDA_CHECK(cudaMalloc(&buffers_[outputIndex3_], outputSize3_)); std::cout << "Buffers allocated successfully." << std::endl; } void TensorRTInference::doInference(const std::vectorcv::String& image_paths) { float* inputBuffer1 = new float[inputSize1_ / sizeof(float)]; float* inputBuffer2 = new float[inputSize2_ / sizeof(float)];

float* outputBuffer1 = new float[outputSize1_ / sizeof(float)];
float* outputBuffer2 = new float[outputSize2_ / sizeof(float)];
float* outputBuffer3 = new float[outputSize3_ / sizeof(float)];

for (const auto& filename : image_paths) {
    std::cout << "Processing image: " << filename << std::endl;
    clock_t start = clock();
    cv::Mat image = cv::imread(filename);
    int height = image.rows;
    int width = image.cols;
    int channels = image.channels();

    // Print the shape of the image
    std::cout << "Image shape: (" << height << ", " << width << ", " << channels << ")" << std::endl;

    if (image.empty()) {
        std::cerr << "Error loading image: " << filename << std::endl;
        continue;
    }

    preprocess(image, inputBuffer1, inputBuffer2);


    clock_t gpuStart = clock();
    CUDA_CHECK(cudaMemcpy(buffers_[inputIndex1_], inputBuffer1, inputSize1_, cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(buffers_[inputIndex2_], inputBuffer2, inputSize2_, cudaMemcpyHostToDevice));

    bool success = context_->executeV2(buffers_);
    if (!success) {
        std::cerr << "TensorRT execution failed." << std::endl;
        continue;
    }

    CUDA_CHECK(cudaMemcpy(outputBuffer1, buffers_[outputIndex1_], outputSize1_, cudaMemcpyDeviceToHost));
    CUDA_CHECK(cudaMemcpy(outputBuffer2, buffers_[outputIndex2_], outputSize2_, cudaMemcpyDeviceToHost));
    CUDA_CHECK(cudaMemcpy(outputBuffer3, buffers_[outputIndex3_], outputSize3_, cudaMemcpyDeviceToHost));

    clock_t gpuEnd = clock();
    std::cout << "GPU inference time: " << (gpuEnd - gpuStart) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;

    postprocess(outputBuffer1, outputBuffer2, outputBuffer3, image);
    std::cout << "First values of outputBuffer1:" << std::endl;
    for (int i = 0; i < 10; ++i) {
        std::cout << outputBuffer2[i] << " ";
    }
    std::cout << std::endl;
    std::string output_path = replaceFolderName(filename, "debug", "debug_out");
    cv::imwrite(output_path, image);

}

delete[] inputBuffer1;
delete[] inputBuffer2;
delete[] outputBuffer1;
delete[] outputBuffer2;
delete[] outputBuffer3;

}

int main(int argc, char** argv) { try { cudaSetDevice(0); TensorRTInference inference("D:\tool\TensorRT-8.6.1.6.Windows10.x86_64.cuda-11.8\TensorRT-8.6.1.6\bin\cfg_model.trt"); std::string img_dir = "E:\YOLOv8-main\val2017\debug"; std::vectorcv::String image_paths; cv::utils::fs::glob(img_dir, "*.png", image_paths); inference.doInference(image_paths); } catch (const std::exception& e) { std::cerr << "Exception: " << e.what() << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; } 能帮忙检查一下吗

Kingxudong avatar Sep 14 '24 01:09 Kingxudong