TensorRT use CustomGeluPluginDynamic segmentation fault!

Description

When I use the TensorRT C++ API and Layernorm plugin to build my network, it's segmentation fault! I debug the Layernorm.cpp, it's wrong in this code: https://github.com/NVIDIA/TensorRT/blob/87f3394404ff9f9ec92c906cd4c39b5562aea42e/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp#L430 and same problem occur in CustomGeluPluGin: https://github.com/NVIDIA/TensorRT/blob/87f3394404ff9f9ec92c906cd4c39b5562aea42e/plugin/geluPlugin/geluPlugin.cpp#L297 and how to solve the bug?

Environment

TensorRT Version: 8.4.0 NVIDIA GPU: T4 CUDA Version: 10.2 CUDNN Version: 8.3.2 Operating System: Linux

Sep 07 '22 01:09 oreo-lp

Can you provide a reproduce for this error? or your onnx model. I guess it's due to some attribute issue in your model.

Sep 07 '22 15:09 zerollzeng

The below is a network with only one gelu layer(CustomGeluPluginDynamic). When I run this code, "segmentation fault" error occur. Maybe CustomGeluPluginDynamic has some bugs? my env: linux, tensorrt=8.4.0.6, cuda10.2, cudnn8.3.2, c++11

(1) build main.cpp (2) run ./main (3) "segmentation fault" error occur.

Below is main.cpp

#include <iostream>
#include <string>
#include <vector>
#include <assert.h>
#include <fstream>

// cuda and tensorrt
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "common.h"


#define DEVICE 1
#define USE_FP16   

using namespace nvinfer1;
// using namespace bert;

// 生成engine的路径
const std::string CONV_TRT_PATH = "../trt/w2v_conv.trt";
const char* INPUT_BLOB_NAME = "input";
const char* OUTPUT_BLOB_NAME = "output";

Logger gLogger(Severity::kVERBOSE);

IPluginV2* get_gelu_plugin(){
    int32_t num = 100;
    auto pluginLst = getPluginRegistry()->getPluginCreatorList(&num);
    std::cout << "all registried plugin: " << std::endl;
    for(int i = 0; i < 100; ++i){
        std::cout << "[" << i << "]: " << pluginLst[i]->getPluginName() << std::endl;
    }
    std::cout << "show registried plugins done!" << std::endl;
    IPluginCreator* creator_gelu = getPluginRegistry()->getPluginCreator("CustomGeluPluginDynamic", "1");
    assert(creator_gelu != nullptr);
    const PluginFieldCollection* pluginData = creator_gelu->getFieldNames();
    assert(pluginData != nullptr);
    IPluginV2* plugin = creator_gelu->createPlugin("geluPlugin", pluginData);
    assert(plugin != nullptr);
    return plugin;
}

// 构建整个engine模型的框架
ICudaEngine* createEngine(unsigned int bs, IBuilder* builder, IBuilderConfig* config, DataType dtype){
    // (1) 创建整个network并加载权重
    // emplicit shape: 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)
    INetworkDefinition* network = builder->createNetworkV2(1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH));
    // (2) 设置network的输出数据(BCHW)
    ITensor* input_data = network->addInput(INPUT_BLOB_NAME, dtype, Dims4{1, 1, 512, 16399});
    assert(input_data);
    // show_dims(tranpose_1, "transpose");
    // (3.2) 添加layernorm层（plugin网络层，这一部分有点问题：segmentation fault）
    ITensor* inputs_ln1[] = {input_data};
    auto ln_1 = network->addPluginV2(inputs_ln1, 1, *get_gelu_plugin());
    assert(ln_1 != nullptr);

    // (4) 给模型设置输出数据
    ln_1->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*ln_1->getOutput(0));
    builder->setMaxBatchSize(bs);
    config->setMaxWorkspaceSize(16 * (1 << 25));
    // 使用fp16的数据格式进行推理
    #ifdef USE_FP16:
        config->setFlag(BuilderFlag::kFP16);
    #endif
    std::cout << "Building engine, please wait for a while ..." << std::endl;
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "Build engine successfully!" << std::endl;
    // 释放内存
    std::cout << "implicit batch = " << network->hasExplicitPrecision() << std::endl;
    network->destroy();
    return engine;
}

// 使用API构建整个网络
void API2Model(unsigned int bs, IHostMemory** modelStream){
    // (1) 创建builder
    IBuilder* builder = createInferBuilder(gLogger);
    IBuilderConfig* config = builder->createBuilderConfig();
    // (2) 创建engine
    ICudaEngine* engine = createEngine(bs, builder, config, DataType::kFLOAT);
    assert(engine != nullptr);
    // (3) 将整个模型进行序列化
    (*modelStream) = engine->serialize();
    // 后续可以考虑使用智能指针帮助我们释放内存
    engine->destroy();
    config->destroy();
}

int main(int argc, char** argv){
    cudaSetDevice(DEVICE);
    char* trtModelStream{nullptr};
    size_t size{0};
    // 进行序列化或者反序列化
    // 将模型进行序列化
    std::cout << "begin to serialize engine ..." << std::endl;
    IHostMemory* modelStream{nullptr};
    API2Model(1, &modelStream);
    assert(modelStream != nullptr);
    std::ofstream p(CONV_TRT_PATH, std::ios::binary);
    if(!p){
        std::cerr << "could not open plan output file" << std::endl;
        return -1;
    }
    p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
    modelStream->destroy();
    std::cout << "serialize engine done!" << std::endl;
    return 0;
}

Below is "common.h"

/*
 * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef INFER_C_COMMON_H
#define INFER_C_COMMON_H

#include <cuda_runtime_api.h>
#include <NvInfer.h>
#include <getopt.h>
#include <memory>
#include <vector>
#include <cassert>
#include <iostream>
#include <map>

using namespace nvinfer1;
using Severity = nvinfer1::ILogger::Severity;

class Logger : public ILogger
{
public:
    Logger(Severity severity)
        : mOstream(&std::cout)
        , mReportableSeverity(severity)
    {
    }

    template <typename T>
    Logger& operator<<(T const& obj)
    {
        if (mOstream != nullptr)
        {
            *mOstream << obj;
        }
        return *this;
    }

    Logger& report(Severity severity, const char* msg)
    {

        if (severity <= mReportableSeverity)
        {
            const std::map<Severity, std::string> prefixMapping = {{Severity::kINTERNAL_ERROR, "[DemoWav2vec][F] "},
                {Severity::kERROR, "[DemoWav2vec][E] "}, {Severity::kWARNING, "[DemoWav2vec][W] "},
                {Severity::kINFO, "[DemoWav2vec][I] "}, {Severity::kVERBOSE, "[DemoWav2vec][V] "}};

            assert(prefixMapping.find(severity) != prefixMapping.end());

            mOstream = &std::cout;

            *this << prefixMapping.at(severity) << msg;

            return *this;
        }
        mOstream = nullptr;
        return *this;
    }

private:
    void log(Severity severity, const char* msg) noexcept override
    {
        report(severity, msg) << "\n";
    }

    std::ostream* mOstream;
    Severity mReportableSeverity;
};

extern Logger gLogger;
#define gLogFatal gLogger.report(Severity::kINTERNAL_ERROR, "")
#define gLogError gLogger.report(Severity::kERROR, "")
#define gLogWarning gLogger.report(Severity::kWARNING, "")
#define gLogInfo gLogger.report(Severity::kINFO, "")
#define gLogVerbose gLogger.report(Severity::kVERBOSE, "")

// clang-format off
#define gpuErrChk(ans)                        \
    {                                         \
        gpuAssert((ans), __FILE__, __LINE__); \
    }
// clang-format on

inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
{
    if (code != cudaSuccess)
    {
        gLogError << "GPUassert: " << cudaGetErrorString(code) << " " << file << " " << line << "\n";
        if (abort)
        {
            exit(code);
        }
    }
}

template <typename T>
struct TrtDestroyer{
    void operator()(T* t){
        t->destroy();
    }
};

template <typename T>
using TrtUniquePtr = std::unique_ptr<T, TrtDestroyer<T>>;

#endif // INFER_C_COMMON_H

Sep 13 '22 10:09 oreo-lp

Have you tried to debug it with GDB? what does the stack trace like?

Sep 14 '22 06:09 zerollzeng

some bugs stop at IPluginV2* plugin = creator_gelu->createPlugin("geluPlugin", pluginData); in main.cpp. Debugs of GDB are as below images: [1] before IPluginV2* plugin = creator_gelu->createPlugin("geluPlugin", pluginData);

[2] run IPluginV2* plugin = creator_gelu->createPlugin("geluPlugin", pluginData);

Sep 15 '22 01:09 oreo-lp

Sry for the late reply, I don't have much time to debug your code but I think the way you add the input might be incorrect. you can refer to https://github.com/onnx/onnx-tensorrt/blob/9f82b2b6072be6c01f65306388e5c07621d3308f/builtin_op_importers.cpp#L4886 and https://github.com/onnx/onnx-tensorrt/blob/9f82b2b6072be6c01f65306388e5c07621d3308f/builtin_op_importers.cpp#L5197

@kevinch-nv may have some insight here.

Sep 19 '22 16:09 zerollzeng

    IPluginCreator* creator_gelu = getPluginRegistry()->getPluginCreator("CustomGeluPluginDynamic", "1");
    assert(creator_gelu != nullptr);
    const PluginFieldCollection* pluginData = creator_gelu->getFieldNames();
    assert(pluginData != nullptr);
    IPluginV2* plugin = creator_gelu->createPlugin("geluPlugin", pluginData);

It seems that you didn't set the plugin fields before creating the plugin...

Dec 02 '22 09:12 nvpohanh

Closing since no activity for more than 3 weeks, please reopen if you still have question, thanks!

Dec 27 '22 01:12 ttyio

TensorRT TensorRT copied to clipboard

use CustomGeluPluginDynamic segmentation fault!

Description

Environment

TensorRT
TensorRT copied to clipboard