TensorRT
TensorRT copied to clipboard
Error Code 1: Cuda Driver (context is destroyed) CUDA_ERROR_CONTEXT_IS_DESTROYED
Description
[09/12/2024-14:32:23] [TRT] [E] 1: [defaultAllocator.cpp::nvinfer1::internal::DefaultAllocator::deallocate::42] Error Code 1: Cuda Runtime (invalid argument) [09/12/2024-14:32:23] [TRT] [E] 1: [defaultAllocator.cpp::nvinfer1::internal::DefaultAllocator::deallocate::42] Error Code 1: Cuda Runtime (invalid argument) [09/12/2024-14:32:23] [TRT] [E] 1: [defaultAllocator.cpp::nvinfer1::internal::DefaultAllocator::deallocate::42] Error Code 1: Cuda Runtime (invalid argument) [09/12/2024-14:32:23] [TRT] [E] 1: [defaultAllocator.cpp::nvinfer1::internal::DefaultAllocator::deallocate::42] Error Code 1: Cuda Runtime (invalid argument) [09/12/2024-14:32:23] [TRT] [E] 1: [defaultAllocator.cpp::nvinfer1::internal::DefaultAllocator::deallocate::42] Error Code 1: Cuda Runtime (invalid argument) [09/12/2024-14:32:23] [TRT] [E] 1: [defaultAllocator.cpp::nvinfer1::internal::DefaultAllocator::deallocate::42] Error Code 1: Cuda Runtime (invalid argument) [09/12/2024-14:32:23] [TRT] [E] 1: [cudaDriverHelpers.cpp::nvinfer1::CuDeleter<struct CUmod_st *,&enum cudaError_enum __cdecl nvinfer1::cuModuleUnloadWrapper(struct CUmod_st *)>::operator ()::30] Error Code 1: Cuda Driver (context is destroyed) [09/12/2024-14:32:23] [TRT] [E] 1: [cudaDriverHelpers.cpp::nvinfer1::CuDeleter<struct CUmod_st *,&enum cudaError_enum __cdecl nvinfer1::cuModuleUnloadWrapper(struct CUmod_st *)>::operator ()::30] Error Code 1: Cuda Driver (context is destroyed) CUDA_ERROR_CONTEXT_IS_DESTROYED
Environment
TensorRT Version: 8.5.3.1
NVIDIA GPU: RTX3080
NVIDIA Driver Version:
CUDA Version: 11.6
CUDNN Version: 8.9.5.30
Operating System: Win11
Python Version (if applicable): 3.8
Tensorflow Version (if applicable):
PyTorch Version (if applicable): 1.13.1
Baremetal or Container (if so, version):
Relevant Files
Model link: https://drive.google.com/drive/folders/16JJ274kvQfyUFzRmLV08hqkyib7OPE0U?usp=drive_link
Steps To Reproduce
Commands or scripts: `import warnings warnings.filterwarnings("ignore")
import ctypes import os import numpy as np import cv2 import random import tensorrt as trt import pycuda.autoinit import pycuda.driver as cuda
import torch import math from torchvision.ops import roi_align import argparse import os import platform import shutil import time from pathlib import Path import sys import json
import cv2 import torch
import numpy as np import argparse import time import cv2
from ultralytics import YOLO
class TrtInference(): _batch_size = 1 def init(self, model_path=None): self._model_path = model_path if self._model_path is None: print("please set trt model path!") exit() self.cuda_ctx = cuda.Device(0).make_context() #self.stream = cuda.Stream()
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
runtime = trt.Runtime(TRT_LOGGER)
# deserialize engine
with open(self._model_path, 'rb') as f:
buf = f.read()
self.engine = runtime.deserialize_cuda_engine(buf)
self.context = self.engine.create_execution_context()
#---------------------------#
for index, binding in enumerate(self.engine):
if self.engine.binding_is_input(binding):
batch_shape = list(self.engine.get_binding_shape(binding)).copy()
batch_shape[0] = self._batch_size
self.context.set_binding_shape(index, batch_shape)
self.host_inputs, self.host_outputs, self.cuda_inputs, self.cuda_outputs, self.bindings = self._allocate_buffers()
def _allocate_buffers(self):
host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings = \
[], [], [], [], []
for index, binding in enumerate(self.engine):
size = trt.volume(self.context.get_binding_shape(index)) * \
self.engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(cuda_mem))
if self.engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
return host_inputs, host_outputs, cuda_inputs, cuda_outputs, bindings
#def destroy(self):
def __del__(self):
"""Free CUDA memories and context."""
print('---del func---')
# del self.host_inputs
# del self.host_outputs
# del self.cuda_inputs
# del self.cuda_outputs
# del self.bindings
# del self.engine
# #del self.stream
# self.cuda_ctx.pop()
# del self.cuda_ctx
#print(self.cuda_ctx.detach())
#pycuda.tools.clear_context_caches()
#cuda.DeviceAllocation.free()
def inference(self, inputs):
self.cuda_ctx.push()
stream = cuda.Stream()
try:
np.copyto(self.host_inputs[0], inputs[0].ravel())
np.copyto(self.host_inputs[1], inputs[1].ravel())
np.copyto(self.host_inputs[2], inputs[2].ravel())
except:
return None
cuda.memcpy_htod_async(
self.cuda_inputs[0], self.host_inputs[0], stream)
cuda.memcpy_htod_async(
self.cuda_inputs[1], self.host_inputs[1], stream)
cuda.memcpy_htod_async(
self.cuda_inputs[2], self.host_inputs[2], stream)
self.context.execute_async(
batch_size=1,
bindings=self.bindings,
stream_handle=stream.handle)
cuda.memcpy_dtoh_async(
self.host_outputs[0], self.cuda_outputs[0], stream)
stream.synchronize()
self.cuda_ctx.pop()
output = np.reshape(self.host_outputs[0],(32,15))
return output
class ActionPredict: def init(self,slowfast_model_path='',yolo_model_path='', slowfast_label_path=''):
self.slowfast_model_path = slowfast_model_path
self.yolo_model_path = yolo_model_path
self.slowfast_label_path = slowfast_label_path
self.action = ''
self.name_list = []
self.box_list = []
self.action_names = self.get_class_names(self.slowfast_label_path)
self.yolo_model = YOLO(self.yolo_model_path,task='detect')
self.slowfast = TrtInference(self.slowfast_model_path)
def runActionPred(self,frames):
name_list, rect_list = self.run_object_detection(frames)
action = self.run_action_detection(frames,self.action_names)
self.action = action
self.box_list = rect_list
self.name_list = name_list
def get_action_preds(self):
return self.action
def get_name_list(self):
return self.name_list
def get_box_list(self):
return self.box_list
def detect_objects_single(self,yolo_model,frame):
tensor_bboxs = []
result_list = []
#------只检测中间帧-------#
results = yolo_model(frame,conf=0.5,task='detect',verbose=False)
#-----------动作识别使用-----------#
boxes = results[0].boxes
mask = boxes.cls == 2 #对应person的label序号
pred_boxes = boxes.xyxy[mask]
if(len(pred_boxes.tolist()) > 0):
tensor_bboxs.append(pred_boxes)#动作识别使用
#-----------动作识别使用-----------#
find_person = False
for result in results:
if find_person:
break
for box in result.boxes:
text_name = f"{result.names[int(box.cls[0])]}"
#print(text_name)
if text_name != 'person':
continue
find_person = True
#rect = (float(box.xyxy[0][0]), float(box.xyxy[0][1]),float(box.xyxy[0][2]), float(box.xyxy[0][3]))
result_list.append([float(box.xyxy[0][0]),float(box.xyxy[0][1]),float(box.xyxy[0][2]),float(box.xyxy[0][3])])
break
if len(result_list)==0:
for i in range(32-len(result_list)):
result_list.append([float(0),float(0),float(0),float(0)])
else:
for i in range(1,32):
result_list.append(result_list[0])
return torch.from_numpy(np.array(result_list))
def run_object_detection(self,frames):
tensor_bboxs = []
name_list = []
box_list = []
for frame in frames:
results = self.yolo_model(frame,conf=0.5,task='detect',verbose=False)
label_list = []
rect_list = []
#-----------动作识别使用-----------#
boxes = results[0].boxes
mask = boxes.cls == 2 #对应person的label序号
pred_boxes = boxes.xyxy[mask]
if(len(pred_boxes.tolist()) > 0):
tensor_bboxs.append(pred_boxes)#动作识别使用
#-----------动作识别使用-----------#
for result in results:
for box in result.boxes:
text_name = f"{result.names[int(box.cls[0])]}"
rect = (int(box.xyxy[0][0]), int(box.xyxy[0][1]),int(box.xyxy[0][2]), int(box.xyxy[0][3]))
label_list.append(text_name)
rect_list.append(rect)
name_list.append(label_list)
box_list.append(rect_list)
return name_list,box_list
def run_action_detection(self,frames,classes):
action = ''
bboxes = self.detect_objects_single(self.yolo_model,frames[15])
if bboxes is not None:
frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames]
frames = [self.scale(256, frame) for frame in frames]
inputs = self.process_cv2_inputs(frames)
if bboxes is not None:
bboxes = self.scale_boxes(256,bboxes,1080,1920)
index_pad = torch.full(
size=(bboxes.shape[0], 1),
fill_value=float(0),
device=bboxes.device,
)
# Pad frame index for each box.
bboxes = torch.cat([index_pad, bboxes], axis=1)
#print('------------1-----------')
for i in range(len(inputs)):
inputs[i] = inputs[i].numpy()
#print(inputs[i])
#print('------------2-----------')
if bboxes is not None:
inputs.append(bboxes.numpy().astype(np.float32))
outputs=self.slowfast.inference(inputs)
if outputs is None:
return ''
list_out = list(outputs[0])
index_of_max = list_out.index(max(list_out))
name = classes[index_of_max]
score = list_out[index_of_max]
#print(name + ': ' + str(score))
str_score = '%0.2f'%score
action = name + ': ' + str_score
#print('-----------outputs[0]------------')
#print(outputs)
else:
action = ''
return action
def scale(self,size, image):
"""
Scale the short side of the image to size.
Args:
size (int): size to scale the image.
image (array): image to perform short side scale. Dimension is
`height` x `width` x `channel`.
Returns:
(ndarray): the scaled image with dimension of
`height` x `width` x `channel`.
"""
height = image.shape[0]
width = image.shape[1]
# print(height,width)
if (width <= height and width == size) or (
height <= width and height == size
):
return image
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
else:
new_width = int(math.floor((float(width) / height) * size))
img = cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR
)
# print(new_width, new_height)
return img.astype(np.float32)
def tensor_normalize(self,tensor, mean, std, func=None):
"""
Normalize a given tensor by subtracting the mean and dividing the std.
Args:
tensor (tensor): tensor to normalize.
mean (tensor or list): mean value to subtract.
std (tensor or list): std to divide.
"""
if tensor.dtype == torch.uint8:
tensor = tensor.float()
tensor = tensor / 255.0
if type(mean) == list:
mean = torch.tensor(mean)
if type(std) == list:
std = torch.tensor(std)
if func is not None:
tensor = func(tensor)
tensor = tensor - mean
tensor = tensor / std
return tensor
def scale_boxes(self,size, boxes, height, width):
"""
Scale the short side of the box to size.
Args:
size (int): size to scale the image.
boxes (ndarray): bounding boxes to peform scale. The dimension is
`num boxes` x 4.
height (int): the height of the image.
width (int): the width of the image.
Returns:
boxes (ndarray): scaled bounding boxes.
"""
if (width <= height and width == size) or (
height <= width and height == size
):
return boxes
new_width = size
new_height = size
if width < height:
new_height = int(math.floor((float(height) / width) * size))
boxes *= float(new_height) / height
else:
new_width = int(math.floor((float(width) / height) * size))
boxes *= float(new_width) / width
return boxes
def process_cv2_inputs(self,frames):
"""
Normalize and prepare inputs as a list of tensors. Each tensor
correspond to a unique pathway.
Args:
frames (list of array): list of input images (correspond to one clip) in range [0, 255].
cfg (CfgNode): configs. Details can be found in
slowfast/config/defaults.py
"""
inputs = torch.from_numpy(np.array(frames)).float() / 255
inputs = self.tensor_normalize(inputs, [0.45,0.45,0.45], [0.225,0.225,0.225])
# T H W C -> C T H W.
inputs = inputs.permute(3, 0, 1, 2)
# Sample frames for num_frames specified.
index = torch.linspace(0, inputs.shape[1] - 1, 32).long()
#print(index)
inputs = torch.index_select(inputs, 1, index)
fast_pathway = inputs
slow_pathway = torch.index_select(
inputs,
1,
torch.linspace(
0, inputs.shape[1] - 1, inputs.shape[1] // 4
).long(),
)
frame_list = [slow_pathway, fast_pathway]
#print(np.shape(frame_list[0]))
inputs = [inp.unsqueeze(0) for inp in frame_list]
return inputs
def get_class_names(self,label_path):
name_list = []
with open(label_path, 'r') as file:
line = file.readline()
while line:
#print(line.strip()) # 去掉尾部的换行符
name = line.strip()
name_list.append(name)
line = file.readline()
return name_list
def create_video_writer(video_cap, output_filename): # grab the width, height, and fps of the frames in the video stream. frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(video_cap.get(cv2.CAP_PROP_FPS)) # initialize the FourCC and a video writer object fourcc = cv2.VideoWriter_fourcc(*'MP4V') writer = cv2.VideoWriter(output_filename, fourcc, fps, (frame_width, frame_height)) return writer
if name == "main": slowfast_model_path = 'action.engine' slowfast_label_path = 'action.txt' yolo_model_path = 'object.engine'
cap = cv2.VideoCapture('./test.mp4')
#cap = cv2.VideoCapture('rtmp://10.17.170.64/live/')
print(cap.isOpened())
frame_list = []
#output_filename = "out.mp4"
#writer = create_video_writer(cap, output_filename)
predictor = ActionPredict(slowfast_model_path,yolo_model_path,slowfast_label_path)
count = 0
while True:
_,frame = cap.read()
if frame is None:
break
frame_list.append(frame)
if len(frame_list) == 32:
start = cv2.getTickCount()
predictor.runActionPred(frame_list)
action = predictor.get_action_preds()
name_list = predictor.get_name_list()
box_list = predictor.get_box_list()
# 记录结束时间
end = cv2.getTickCount()
# 运行耗时
use_time = (end - start) / cv2.getTickFrequency()
print('use-time: %.4fs' % use_time)
print(action)
# LOGGER.info(name_list)
# LOGGER.info(box_list)
for i in range(len(frame_list)):
img = frame_list[i]
for j in range(0,len(name_list[i])):
name = name_list[i][j]
x1 = box_list[i][j][0]
y1 = box_list[i][j][1]
x2 = box_list[i][j][2]
y2 = box_list[i][j][3]
cv2.rectangle(img,(x1,y1),(x2,y2),(255,0,255),2, cv2.LINE_AA)
cv2.putText(img,name,(x1,y1+30),0,1.5,(255,255,0),2)
if name == 'person':
cv2.putText(img,action,(x1,y1-30),0,2.2,(0,255,0),3)
#writer.write(img)
showimg=cv2.resize(img,None,fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR)
cv2.imshow('img',showimg)
cv2.waitKey(1)
frame_list.clear()
count += 1
if count == 10:
break
else:
continue
cap.release()
#writer.release()
#predictor.slowfast.destroy()
print('------OK------')
`
Have you tried the latest release?: No
Can this model run on other frameworks? For example run ONNX model with ONNXRuntime (polygraphy run <model.onnx> --onnxrt
): Yes