TensorRT
TensorRT copied to clipboard
what does the errors mean?
Description
Environment
TensorRT Version: 8.4.1.5 NVIDIA GPU: 1060 NVIDIA Driver Version: 460 CUDA Version: 10.2 CUDNN Version: 8.4.0 Operating System: ubantu18 Python Version (if applicable): 3.6 Tensorflow Version (if applicable): PyTorch Version (if applicable): Baremetal or Container (if so, version):
i try to inference with python code of TRT,but when the code excute done, at last there is some errors as follows: that's amazing,
[08/01/2022-23:37:30] [TRT] [E] 1: [graphContext.h::~MyelinGraphContext::35] Error Code 1: Myelin (Error 709 destroying stream '0x19d55400'.) [08/01/2022-23:37:30] [TRT] [E] 1: [graphContext.h::~MyelinGraphContext::35] Error Code 1: Myelin (Error 709 destroying stream '0x19e2edb0'.)
Can you try using trtexec to see if it's reproducible? if yes then please share the onnx to us, we will debug it further.
If not please share your python script here.
Can you try using trtexec to see if it's reproducible? if yes then please share the onnx to us, we will debug it further.
the mode is too big about 60M,it can't upload
@zerollzeng it can execute success,but last interrupted by signal 11: SIGSEGV) ['moon'] Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)
class Textrec():
def __init__(self, engine_path):
"""
:param engine_path: The path to the serialized engine to load from disk.
"""
# Load TRT engine
self.logger = trt.Logger(trt.Logger.ERROR)
trt.init_libnvinfer_plugins(self.logger, namespace="")
self.engine = self.load_engine(engine_path)
self.context = self.engine.create_execution_context()
assert self.engine
assert self.context
self.context.active_optimization_profile = 0
self.input_binding_idxs, self.output_binding_idxs = self.get_binding_idxs(self.engine,
self.context.active_optimization_profile)
self.padvalue = 255
assert len(self.input_binding_idxs) >= 1
assert len(self.output_binding_idxs) >= 1
def get_binding_idxs(self, engine: trt.ICudaEngine, profile_index: int):
# Calculate start/end binding indices for current context's profile
num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
start_binding = profile_index * num_bindings_per_profile
end_binding = start_binding + num_bindings_per_profile
print("Engine/Binding Metadata")
print("\tNumber of optimization profiles: {}".format(engine.num_optimization_profiles))
print("\tNumber of bindings per profile: {}".format(num_bindings_per_profile))
print("\tFirst binding for profile {}: {}".format(profile_index, start_binding))
print("\tLast binding for profile {}: {}".format(profile_index, end_binding - 1))
# Separate input and output binding indices for convenience
input_binding_idxs = []
output_binding_idxs = []
for binding_index in range(start_binding, end_binding):
if engine.binding_is_input(binding_index):
input_binding_idxs.append(binding_index)
else:
output_binding_idxs.append(binding_index)
return input_binding_idxs, output_binding_idxs
def load_engine(self, filename: str):
# Load serialized engine file into memory
with open(filename, "rb") as f, trt.Runtime(self.logger) as runtime:
return runtime.deserialize_cuda_engine(f.read())
def get_input_host(self, img):
host_inputs = []
for bind_indx in self.input_binding_idxs:
input_shape = self.context.get_binding_shape(bind_indx)
input_name = self.engine.get_binding_name(bind_indx)
input_dtype = self.engine.get_binding_dtype(bind_indx)
img = self.pre_processimg(img)
host_inputs.append(np.ascontiguousarray(img))
return host_inputs
def sub_mean_div(self, img):
if len(img.shape) == 3:
img = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY)
img = img / 255
img = (img - 0.5) / 0.5
img = np.expand_dims(img, axis=0)
image = np.expand_dims(img, axis=0).astype(np.float32)
return image
def pre_processimg(self, img):
img = cv2.resize(img,(100,32))
# img = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY)
img = self.sub_mean_div(img)
return img
def setup_binding_shapes(self,
engine: trt.ICudaEngine,
context: trt.IExecutionContext,
host_inputs: List[np.ndarray],
input_binding_idxs: List[int],
output_binding_idxs: List[int],
):
# Explicitly set the dynamic input shapes, so the dynamic output
# shapes can be computed internally
for host_input, binding_index in zip(host_inputs, input_binding_idxs):
context.set_binding_shape(binding_index, host_input.shape)
assert context.all_binding_shapes_specified
host_outputs = []
device_outputs = []
for binding_index in output_binding_idxs:
output_shape = context.get_binding_shape(binding_index)
# Allocate buffers to hold output results after copying back to host
buffer = np.empty(output_shape, dtype=np.float32)
host_outputs.append(buffer)
# Allocate output buffers on device
device_outputs.append(cuda.mem_alloc(buffer.nbytes))
return host_outputs, device_outputs
def __call__(self, img):
host_inputs = self.get_input_host(img)
# allocate devidce mem for inputs
device_inputs = [cuda.mem_alloc(h_input.nbytes) for h_input in host_inputs]
# copy host inputs to device
for h_input, d_input in zip(host_inputs, device_inputs):
cuda.memcpy_htod(d_input, h_input)
# if inputs shape change this needs to bu called,if inputs shape always thse same call this once ,reuse this allcocation
host_outputs, device_outputs = self.setup_binding_shapes(
self.engine, self.context, host_inputs, self.input_binding_idxs, self.output_binding_idxs,
)
output_names = [self.engine.get_binding_name(binding_idx) for binding_idx in self.output_binding_idxs]
# Bindings are a list of device pointers for inputs and outputs
bindingsallocation = device_inputs + device_outputs
self.context.execute_v2(bindingsallocation)
# Copy outputs back to host to view results
for h_output, d_output in zip(host_outputs, device_outputs):
cuda.memcpy_dtoh(h_output, d_output)
preds = host_outputs[0].reshape(host_outputs[0].shape)
the mode is too big about 60M,it can't upload
trtexec fails too? are your using like /usr/src/tensorrt/bin/trtexec --onnx=your_model.onnx
https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#trtexec
closing since no activity for more than 3 weeks, please reopen if you still have question, thanks!