ResidualMaskingNetwork icon indicating copy to clipboard operation
ResidualMaskingNetwork copied to clipboard

Tensorrt Conversion returning NaN Tensors for pre-trained Models

Open Muaz65 opened this issue 3 years ago • 0 comments

I am trying to convert this model to tensorrt. First I convert this model to onnx

import torch
from models import resmasking_dropout1

model_path = '/media/soccer/Samsung 1TB SSD/Shahzeb/ResidualMaskingNetwork-master/emotion_model_v1.pt'

model = resmasking_dropout1(in_channels=3, num_classes=7)

model.cuda()

dummy_input = torch.randn(1, 3, 224, 224,  device='cuda')

out= model(dummy_input)

model.load_state_dict(torch.load(model_path))

model= model.half()


dummy_input = torch.randn(1, 3, 224, 224,  device='cuda')

dummy_input= dummy_input.half()

torch.onnx.export(model, dummy_input, "emotion-model-v1_half.onnx", verbose=True)
 

#After model conversion in onnx, I use the following script to convert it into tensorrt


import pycuda.driver as cuda

import pycuda.autoinit

import numpy as np
import tensorrt as trt
def set_net_batch(network, batch_size):
    """Set network input batch size.
    The ONNX file might have been generated with a different batch size,
    say, 64.
    """
    shape = list(network.get_input(0).shape)
    print("ONNX input shape before :", shape)
    print("ONNX input dtype before :", network.get_input(0).dtype)
    network.get_input(0).dtype = trt.float16
    shape[0] = batch_size
    network.get_input(0).shape = shape
    print("ONNX input shape after:", list(network.get_input(0).shape))
    print("ONNX input dtype after:", network.get_input(0).dtype)
    #shape = list(network.get_output(0).shape)
    #print("ONNX output shape before :", shape)
    print("ONNX output dtype before :", network.get_output(0).dtype)
    network.get_output(0).dtype = trt.float16
    #shape[0] = batch_size
    #network.get_output(0).shape = shape
    #print("ONNX output shape after:", list(network.get_output(0).shape))
    print("ONNX output dtype after:", network.get_output(0).dtype)
    return network
def build_engine(onnx_file_path, BATCH_SIZE, enable_fp16=False, enable_int8=False):
    TRT_LOGGER = trt.Logger()
    EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    MAX_BATCH_SIZE = BATCH_SIZE
    # initialize TensorRT engine and parse ONNX model
    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network(EXPLICIT_BATCH)
    parser = trt.OnnxParser(network, TRT_LOGGER)    # parse ONNX
    with open(onnx_file_path, 'rb') as model:
        print('Beginning ONNX file parsing')
        parser.parse(model.read())
    print('Completed parsing of ONNX file')
    # network = set_net_batch(network, MAX_BATCH_SIZE)
    # network.get_input(0).shape = [MAX_BATCH_SIZE, 3, 244, 244]
    # exit()
    # allow TensorRT to use up to 1GB of GPU memory for tactic selection
    builder.max_workspace_size = 1 << 28
    # we have only one image in batch
    builder.max_batch_size = MAX_BATCH_SIZE
    print('Building engine with max batch size: %d', builder.max_batch_size)
    # use FP16 mode if possible
    if builder.platform_has_fast_fp16 and enable_fp16:
        print("Using FP16 Mode ...")
        builder.fp16_mode = True
    # use INT8 mode if possible
    if builder.platform_has_fast_int8 and enable_int8:
        print("Using INT8 Mode ...")
        builder.int8_mode = True
    # generate TensorRT engine optimized for the target platform
    print('Building an engine...')
    engine = builder.build_cuda_engine(network)
    if engine is None:
        print('Failed to build engine')
        return None
    return engine
def main():
    # initialize TensorRT engine and parse ONNX model
    ONNX_FILE_PATH = './emotion/emotion-model-v1_half.onnx'
    engine = build_engine(ONNX_FILE_PATH, BATCH_SIZE=1, enable_fp16=True, enable_int8=False)
    TRT_FILE_PATH = './emotion/emotion-model-v1_half.trt'
    with open(TRT_FILE_PATH, 'wb') as engine_file:
        engine_file.write(engine.serialize())
    print("Completed creating engine")
if __name__ == "__main__":
    main()
```
`



Models return NAN tensors on inference. 

Muaz65 avatar Oct 13 '21 11:10 Muaz65