TensorRT
TensorRT copied to clipboard
why 7234 和 8406 diffs in layer's output dimensions when constructing network
Description
Environment
TensorRT Version: NVIDIA GPU: NVIDIA Driver Version: CUDA Version: CUDNN Version: Operating System: Python Version (if applicable): Tensorflow Version (if applicable): PyTorch Version (if applicable): Baremetal or Container (if so, version):
Relevant Files
Steps To Reproduce
#include "/usr/local/tensorrt/include/NvInfer.h"
#include <vector>
#include <iostream>
// g++ A.cc -lnvinfer -lcudart -L /usr/local/tensorrt/lib/ -L/usr/local/cuda/lib64 -I /usr/local/cuda/include
// export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/tensorrt/lib:/usr/local/cuda/lib64
#define TRT_VERSION \
NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD
#include "/usr/local/cuda/include/cuda_runtime.h"
class TensorrtLogger : public nvinfer1::ILogger {
nvinfer1::ILogger::Severity verbosity_;
public:
TensorrtLogger(Severity verbosity = Severity::kWARNING)
: verbosity_(verbosity) {}
void log(Severity severity, const char* msg) noexcept override {
if (severity <= verbosity_) {
printf("%s\n", msg);
}
}
};
int main()
{
std::cout << TRT_VERSION << std::endl;
static TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
auto trt_builder = nvinfer1::createInferBuilder(trt_logger);
const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
auto trt_network = trt_builder->createNetworkV2(explicitBatch);
auto trt_config = trt_builder->createBuilderConfig();
trt_config->setMaxWorkspaceSize(1<<30);
nvinfer1::Dims a;
a.nbDims = 4;
a.d[0] = -1;
a.d[1] = -1;
a.d[2] = -1;
a.d[3] = 32;
trt_network->addInput("foo0", nvinfer1::DataType::kFLOAT, a);
nvinfer1::IOptimizationProfile* profile1 = trt_builder->createOptimizationProfile();
profile1->setDimensions("foo0", nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4(2, 4, 4, 32));
profile1->setDimensions("foo0", nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4(16, 64, 16, 32));
profile1->setDimensions("foo0", nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4(128, 256, 64, 32));
trt_config->addOptimizationProfile(profile1);
auto x = trt_network->getInput(0);
auto layer = trt_network->addShuffle(*x);
layer->setInput(1, *(trt_network->addShape(*x)->getOutput(0)));
for (int i = 0 ; i < layer->getOutput(0)->getDimensions().nbDims; i++)
{
std::cout << layer->getOutput(0)->getDimensions().d[i] << std::endl;
}
trt_network->markOutput(*layer->getOutput(0));
auto engine = trt_builder->buildEngineWithConfig(*trt_network, *trt_config);
auto engine_out_dims = engine->getBindingDimensions(0);
std::cout << "after build" << std::endl;
for (int i = 0 ; i < engine_out_dims.nbDims; i++)
{
std::cout << engine_out_dims.d[i] << std::endl;
}
}
- in 7234
7234
-1
-1
-1
-1
after build
-1
-1
-1
32
- in 8406
8406
-1
-1
-1
32
after build
-1
-1
-1
32
I think the second is good ! It seems like this: in 7234, shape infer is carried out when building engine. In 8034, it seems that when every layer is added, shape infer will be carried out. Is that right?
Yes, that was a bug already fixed in TRT 8.4.
Yes, that was a bug already fixed in TRT 8.4.
I think is was not a bug. It seems like this: in 7234, shape infer is carried out when building engine. In 8034, it seems that when every layer is added, shape infer will be carried out. Is that right?
Yes, that was a bug already fixed in TRT 8.4.
I think is was not a bug. It seems like this: in 7234, shape infer is carried out when building engine. In 8034, it seems that when every layer is added, shape infer will be carried out. Is that right?
please reply me