TensorRT what does the errors mean?

Description

Environment

TensorRT Version: 8.4.1.5 NVIDIA GPU: 1060 NVIDIA Driver Version: 460 CUDA Version: 10.2 CUDNN Version: 8.4.0 Operating System: ubantu18 Python Version (if applicable): 3.6 Tensorflow Version (if applicable): PyTorch Version (if applicable): Baremetal or Container (if so, version):

i try to inference with python code of TRT,but when the code excute done, at last there is some errors as follows: that's amazing,

[08/01/2022-23:37:30] [TRT] [E] 1: [graphContext.h::~MyelinGraphContext::35] Error Code 1: Myelin (Error 709 destroying stream '0x19d55400'.) [08/01/2022-23:37:30] [TRT] [E] 1: [graphContext.h::~MyelinGraphContext::35] Error Code 1: Myelin (Error 709 destroying stream '0x19e2edb0'.)

Aug 02 '22 06:08 cqray1990

Can you try using trtexec to see if it's reproducible? if yes then please share the onnx to us, we will debug it further.

Aug 03 '22 01:08 zerollzeng

If not please share your python script here.

Aug 03 '22 01:08 zerollzeng

Can you try using trtexec to see if it's reproducible? if yes then please share the onnx to us, we will debug it further.

the mode is too big about 60M,it can't upload

Aug 03 '22 01:08 cqray1990

@zerollzeng it can execute success,but last interrupted by signal 11: SIGSEGV) ['moon'] Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)

      class Textrec():
          def __init__(self, engine_path):
              """
              :param engine_path: The path to the serialized engine to load from disk.
              """
              # Load TRT engine
              self.logger = trt.Logger(trt.Logger.ERROR)
              trt.init_libnvinfer_plugins(self.logger, namespace="")
              self.engine = self.load_engine(engine_path)
              self.context = self.engine.create_execution_context()
              assert self.engine
              assert self.context
              self.context.active_optimization_profile = 0
              self.input_binding_idxs, self.output_binding_idxs = self.get_binding_idxs(self.engine,
                                                                                        self.context.active_optimization_profile)
              self.padvalue = 255
              assert len(self.input_binding_idxs) >= 1
              assert len(self.output_binding_idxs) >= 1
          def get_binding_idxs(self, engine: trt.ICudaEngine, profile_index: int):
              # Calculate start/end binding indices for current context's profile
              num_bindings_per_profile = engine.num_bindings // engine.num_optimization_profiles
              start_binding = profile_index * num_bindings_per_profile
              end_binding = start_binding + num_bindings_per_profile
              print("Engine/Binding Metadata")
              print("\tNumber of optimization profiles: {}".format(engine.num_optimization_profiles))
              print("\tNumber of bindings per profile: {}".format(num_bindings_per_profile))
              print("\tFirst binding for profile {}: {}".format(profile_index, start_binding))
              print("\tLast binding for profile {}: {}".format(profile_index, end_binding - 1))
      
              # Separate input and output binding indices for convenience
              input_binding_idxs = []
              output_binding_idxs = []
              for binding_index in range(start_binding, end_binding):
                  if engine.binding_is_input(binding_index):
                      input_binding_idxs.append(binding_index)
                  else:
                      output_binding_idxs.append(binding_index)
      
              return input_binding_idxs, output_binding_idxs
      
          def load_engine(self, filename: str):
              # Load serialized engine file into memory
              with open(filename, "rb") as f, trt.Runtime(self.logger) as runtime:
                  return runtime.deserialize_cuda_engine(f.read())
      
          def get_input_host(self, img):
              host_inputs = []
              for bind_indx in self.input_binding_idxs:
                  input_shape = self.context.get_binding_shape(bind_indx)
                  input_name = self.engine.get_binding_name(bind_indx)
                  input_dtype = self.engine.get_binding_dtype(bind_indx)
      
                  img = self.pre_processimg(img)
                  host_inputs.append(np.ascontiguousarray(img))
      
                  return host_inputs
      
          def sub_mean_div(self, img):
              if len(img.shape) == 3:
                  img = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY)
              img = img / 255
              img = (img - 0.5) / 0.5
              img = np.expand_dims(img, axis=0)
              image = np.expand_dims(img, axis=0).astype(np.float32)
              return image
      
          def pre_processimg(self, img):
              img = cv2.resize(img,(100,32))
              # img = cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY)
              img = self.sub_mean_div(img)
              return img
      
      
          def setup_binding_shapes(self,
                                   engine: trt.ICudaEngine,
                                   context: trt.IExecutionContext,
                                   host_inputs: List[np.ndarray],
                                   input_binding_idxs: List[int],
                                   output_binding_idxs: List[int],
                                   ):
              # Explicitly set the dynamic input shapes, so the dynamic output
              # shapes can be computed internally
              for host_input, binding_index in zip(host_inputs, input_binding_idxs):
                  context.set_binding_shape(binding_index, host_input.shape)
      
              assert context.all_binding_shapes_specified
      
              host_outputs = []
              device_outputs = []
              for binding_index in output_binding_idxs:
                  output_shape = context.get_binding_shape(binding_index)
                  # Allocate buffers to hold output results after copying back to host
                  buffer = np.empty(output_shape, dtype=np.float32)
                  host_outputs.append(buffer)
                  # Allocate output buffers on device
                  device_outputs.append(cuda.mem_alloc(buffer.nbytes))
              return host_outputs, device_outputs
      
          def __call__(self, img):
              host_inputs = self.get_input_host(img)
              # allocate devidce mem for inputs
              device_inputs = [cuda.mem_alloc(h_input.nbytes) for h_input in host_inputs]
              # copy host  inputs to device
              for h_input, d_input in zip(host_inputs, device_inputs):
                  cuda.memcpy_htod(d_input, h_input)
              # if inputs shape change this needs to bu called,if inputs shape always thse same call this once ,reuse this allcocation
              host_outputs, device_outputs = self.setup_binding_shapes(
                  self.engine, self.context, host_inputs, self.input_binding_idxs, self.output_binding_idxs,
              )
              output_names = [self.engine.get_binding_name(binding_idx) for binding_idx in self.output_binding_idxs]
              # Bindings are a list of device pointers for inputs and outputs
              bindingsallocation = device_inputs + device_outputs
      
              self.context.execute_v2(bindingsallocation)
      
              # Copy outputs back to host to view results
              for h_output, d_output in zip(host_outputs, device_outputs):
                  cuda.memcpy_dtoh(h_output, d_output)
              preds = host_outputs[0].reshape(host_outputs[0].shape)

Aug 03 '22 02:08 cqray1990

the mode is too big about 60M,it can't upload

trtexec fails too? are your using like /usr/src/tensorrt/bin/trtexec --onnx=your_model.onnx

Aug 03 '22 08:08 zerollzeng

https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#trtexec

Aug 03 '22 08:08 zerollzeng

closing since no activity for more than 3 weeks, please reopen if you still have question, thanks!

Dec 06 '22 01:12 ttyio

TensorRT TensorRT copied to clipboard

what does the errors mean?

Description

Environment

TensorRT
TensorRT copied to clipboard