torch2trt
torch2trt copied to clipboard
NaN is the only return from TRTModule loaded model
Hi, I am experiencing an issue where I load a Torch2trt model and am only getting NaN as a return value.
Here is how I am saving the model after training (training code included):
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib.pyplot as plt
import cv2
import time
import os
from torch2trt.torch2trt import torch2trt
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device object
if torch.cuda.is_available():
print('using gpu')
transforms_train = transforms.Compose([
transforms.Resize((224, 224)), #must same as here
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(), # data augmentation
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalization
])
transforms_test = transforms.Compose([
transforms.Resize((224, 224)), #must same as here
transforms.CenterCrop((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train_dir = "/conveyor/train/"
test_dir = "/conveyor/test/"
train_classa_dir = "/conveyor/train/0/"
train_classb_dir = "/conveyor/train/1/"
test_classa_dir = "/conveyor/test/0/"
test_classb_dir = "/conveyor/test/1/"
train_dataset = datasets.ImageFolder(train_dir, transforms_train)
test_dataset = datasets.ImageFolder(test_dir, transforms_test)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=2)
model = models.resnet50(pretrained=True) #load resnet18 model
num_features = model.fc.in_features #extract fc layers features
model.fc = nn.Linear(num_features, 2) #(num_of_class == 2)
model = model.to(device)
criterion = nn.CrossEntropyLoss() #(set loss function)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
num_epochs = 50 # (set no of epochs)
start_time = time.time() # (for showing time)
for epoch in range(num_epochs): # (loop for every epoch)
print("Epoch {} running".format(epoch)) # (printing message)
""" Training Phase """
model.train() # (training model)
running_loss = 0. # (set loss 0)
running_corrects = 0
# load a batch data of images
for i, (inputs, labels) in enumerate(train_dataloader):
inputs = inputs.to(device)
labels = labels.to(device)
# forward inputs and get output
optimizer.zero_grad()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# get loss value and update the network weights
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(train_dataset)
epoch_acc = running_corrects / len(train_dataset) * 100.
print('[Train #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc,
time.time() - start_time))
""" Testing Phase """
model.eval()
with torch.no_grad():
running_loss = 0.
running_corrects = 0
for inputs, labels in test_dataloader:
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(test_dataset)
epoch_acc = running_corrects / len(test_dataset) * 100.
print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc,
time.time() - start_time))
#data = torch.ones((1, 3, 224, 224)).cuda()
model_trt = torch2trt(model, [inputs])
save_path = 'trt_test_resnet50_model_50_epochs.pth'
torch.save(model_trt.state_dict(), save_path)
The path saves successfully, and I am loading it and passing a processed image into model_trt:
model_trt = TRTModule()
model_trt.load_state_dict(torch.load('trt_test_resnet50_model_50_epochs.pth'))
model_trt.cuda()
output = model_trt(process_img(img))
The output variable when printed only returns 'tensor([nan, nan], device='cuda:0')
Not quite sure what I am doing wrong. Any insight or help would be greatly appreciated.