tensorflow-onnx icon indicating copy to clipboard operation
tensorflow-onnx copied to clipboard

How to fix the output batch in tf2onnx

Open tinnapopd opened this issue 1 year ago • 0 comments

I attempt to convert mars-small128.pb model_link to saved_model.pb I use python3 -m tf2onnx.convert --graphdef mars-small128.pb --output saved_model.onnx --inputs "images:0" --outputs "features:0". Before that I investigate mars-small128.pb using tf.compat.v1, and the output layer has shape [None, 128].

After that the onnx file was successfully created, but when I try to build the tensorrt engine, the output shape is automatically changes to [1024, 128].

I attempt to modify the model output before exporting, but the engine cannot be built.

Here is my code to build an engine: `# --------------------------------------------------------------------------- #

Import

---------------------------------------------------------------------------

import logging import numpy as np import os import sys import tensorrt as trt

from cuda import cudart from pathlib import Path

root_dir = Path(file).resolve().parent sys.path.insert(1, os.path.join(root_dir, os.pardir))

from utils import common from utils.engine_calibrator import EngineCalibrator from utils.image_batcher import ImageBatcher

logging.basicConfig(level=logging.INFO) logging.getLogger("EngineBuilder").setLevel(logging.INFO) log = logging.getLogger("EngineBuilder")

---------------------------------------------------------------------------

Define functions/classes

---------------------------------------------------------------------------

class EngineBuilder: """ Parses an ONNX graph and builds a TensorRT engine from it. """

def __init__(self, verbose=False, workspace=8) -> None:
    """
    :param verbose: If enabled, a higher verbosity level will be set 
        on the TensorRT logger.
    :param workspace: Max memory workspace to allow, in Gb.
    """
    self.trt_logger = trt.Logger(trt.Logger.INFO)
    if verbose:
        self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE

    trt.init_libnvinfer_plugins(self.trt_logger, namespace="")

    self.builder = trt.Builder(self.trt_logger)
    self.config = self.builder.create_builder_config()
    self.config.max_workspace_size = workspace * (2 ** 30)

    self.batch_size = None
    self.network = None
    self.parser = None

def create_network(self, onnx_path: os.PathLike, batch_size=None) -> None:
    """
    Parse the ONNX graph and create the corresponding 
        TensorRT network definition.
    :param onnx_path: The path to the ONNX graph to load.
    :param batch_size: Static batch size to build the engine with.
    """
    network_flags = (
        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    )

    self.network = self.builder.create_network(network_flags)
    self.parser = trt.OnnxParser(self.network, self.trt_logger)

    onnx_path = os.path.realpath(onnx_path)
    with open(onnx_path, "rb") as f:
        if not self.parser.parse(f.read()):
            log.error("Failed to load ONNX file: {}".format(onnx_path))
            for error in range(self.parser.num_errors):
                log.error(self.parser.get_error(error))
            sys.exit(1)
    
    inputs = [
        self.network.get_input(i) for i in range(self.network.num_inputs)
    ]
    outputs = [
        self.network.get_output(i) for i in range(self.network.num_outputs)
    ]

    log.info("Network Description")

    # Set shape
    self.batch_size = batch_size
    profile = self.builder.create_optimization_profile()
    input_layer_name = None
    for input in inputs:
        log.info(
            "Input '{}' with shape {} and dtype {}".\
                format(input.name, input.shape, input.dtype)
        )
        if not input_layer_name:
            input_layer_name = str(input.name)
    
    for output in outputs:
        log.info(
            "Output '{}' with shape {} and dtype {}".\
                format(output.name, output.shape, output.dtype)
        )
    
    profile.set_shape(
        input_layer_name, 
        (self.batch_size, input.shape[1], input.shape[2], input.shape[3]), 
        (self.batch_size, input.shape[1], input.shape[2], input.shape[3]), 
        (self.batch_size, input.shape[1], input.shape[2], input.shape[3])
    )

    self.config.add_optimization_profile(profile)
    assert self.batch_size > 0
    self.builder.max_batch_size = self.batch_size

def create_engine(self, engine_path: os.PathLike, precision: str) -> None:
    """
    Build the TensorRT engine and serialize it to disk.
    :param engine_path: The path where to serialize the engine to.
    :param precision: The datatype to use for the engine, 
        either 'fp32' or 'fp16'.
    """
    engine_path = os.path.realpath(engine_path)
    engine_dir = os.path.dirname(engine_path)
    os.makedirs(engine_dir, exist_ok=True)
    log.info("Building {} Engine in {}".format(precision, engine_path))

    self.config.set_flag(trt.BuilderFlag.STRICT_TYPES)

    if precision == "fp16":
        if not self.builder.platform_has_fast_fp16:
            log.warning(
                "FP16 is not supported natively on this platform/device"
            )
        else:
            self.config.set_flag(trt.BuilderFlag.FP16)
    
    log.info("Starting engine build process, this might take a while...")
    with self.builder.build_engine(
        self.network, self.config
        ) as engine, open(engine_path, "wb") as f:
        log.info("Serializing engine to file: {:}".format(engine_path))
        f.write(engine.serialize())

def main( onnx: os.PathLike, engine: os.PathLike, batch_size=1, precision="fp16", verbose=False, workspace=8) -> None: """Main file to convert onnx to trt.

Args:
    onnx (str): The input ONNX model file to load
    engine (str): The output path for the TRT engine
    batch_size (int, optional): The static batch size to build 
        the engine with. Defaults to 1.
    precision (str, optional): The precision mode to build in, 
        either fp32/fp16. Defaults to "fp16".
    verbose (bool, optional): Enable more verbose log output. 
        Defaults to False.
    workspace (int, optional): The max memory workspace size to allow in Gb. 
        Defaults to 1.
"""

log.info(f"Build at the precision: {precision}")
log.info(f"Allow the workspace size (in GiB): {workspace}")
builder = EngineBuilder(verbose, workspace)
builder.create_network(onnx, batch_size)
builder.create_engine(engine, precision)

if name == 'main': pass`

tinnapopd avatar May 28 '24 10:05 tinnapopd