Source code for trtutils.trtexec._build

# Copyright (c) 2024 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
from __future__ import annotations

from typing import TYPE_CHECKING

from trtutils._flags import FLAGS
from trtutils._log import LOG

from ._run import run_trtexec

if TYPE_CHECKING:
    from collections.abc import Sequence
    from pathlib import Path


[docs] def build_engine( weights: Path, output: Path, use_dla_core: int | None = None, shapes: Sequence[tuple[str, tuple[int, ...]]] | None = None, workspace: int | None = None, add_args: Sequence[str] | None = None, *, fp16: bool | None = None, int8: bool | None = None, fp8: bool | None = None, best: bool | None = None, allow_gpu_fallback: bool | None = None, ) -> bool: """ Build an engine from a weight file using trtexec. Parameters ---------- weights : Path The path to the weight file to build the engine from. Examples are: .onnx, .prototxt If a .onnx file is provided, the engine will be built from the ONNX model. If a .prototxt file is provided, the engine will be built with random weights based on the model architecture. output : Path The path to save the built engine to. use_dla_core : int, optional The DLA core to use for building the engine, by default None. The DLA core should be either 0 or 1 if specified. shapes : tuple[tuple[int, ...], ...], optional The input shapes to use for the engine, by default None. If provided, the engine will be built with these input shapes. The name of the input must also be defined. An example could be: (("images", (1, 3, 640, 640)), ...) workspace : int, optional The workspace size to use for the engine, by default None. Expressed in MiB. fp16 : bool, optional Whether to use FP16 precision for the engine, by default None. int8 : bool, optional Whether to use INT8 precision for the engine, by default None. fp8 : bool, optional Whether to use FP8 precision for the engine, by default None. best : bool, optional Whether to use the best precision available for the engine, by default None. allow_gpu_fallback : bool, optional Whether to allow GPU fallback when a layer is not supported on DLA. By default, this is None. add_args : Sequence[str], optional Additional arguments to pass to trtexec, by default None. Returns ------- bool Whether the engine was built successfully. Raises ------ FileNotFoundError If the weight file is not found. IsADirectoryError If the weight file is a directory. ValueError If the weight file does not have a valid extension. ValueError If the DLA core is not 0 or 1. RuntimeError If the command generation failed. TypeError If the input shapes are not integers. """ if not weights.exists(): err_msg = f"Weight file not found at {weights}" raise FileNotFoundError(err_msg) if weights.is_dir(): err_msg = "Weight file should not be a directory" raise IsADirectoryError(err_msg) # ensure a valid suffix is present valid_weights = [ ".onnx", ".prototxt", ] if weights.suffix not in valid_weights: err_msg = "Weights file has invalid extension." err_msg += f" Supported extensions are: {valid_weights}." err_msg += f" Found: {weights.suffix}" raise ValueError(err_msg) if output.exists(): LOG.warning(f"Overwriting existing file at {output}") if use_dla_core is not None and use_dla_core not in [0, 1]: err_msg = "DLA core must be either 0 or 1" raise ValueError(err_msg) if allow_gpu_fallback and use_dla_core is None: LOG.warning("GPU fallback enabled without specifying DLA core") if best and (fp16 or int8 or fp8): LOG.warning("Best precision enabled with other precisions also being enabled.") LOG.warning("Best precision level ENABLES ALL precisions") if fp16 and int8: LOG.warning( "FP16 and INT8 precision cannot be used together. Using lower precision level.", ) fp16 = False if fp16 and fp8: LOG.warning( "FP16 and FP8 precision cannot be used together. Using lower precision level.", ) fp16 = False if int8 and fp8: LOG.warning( "INT8 and FP8 precision cannot be used together. Using lower precision level.", ) int8 = False # resolve the model and output paths weights_path_str = str(weights.resolve()) output_path_str = str(output.resolve()) # parse any shapes input if it exists shapes_str = "" if shapes: for name, shape in shapes: dim_str = "" for dim in shape: if not isinstance(dim, int): err_msg = "Input shapes must be integers" raise TypeError(err_msg) dim_str += f"{dim}x" dim_str = dim_str[:-1] shape_str = f"{name}:{dim_str}" shapes_str += f"{shape_str}," if len(shapes_str) > 0: shapes_str = shapes_str[:-1] # generate initial command with weight input command = "" if weights.suffix == ".onnx": command += f" --onnx={weights_path_str}" elif weights.suffix == ".prototxt": command += f" --deploy={weights_path_str}" # check length, if zero something went wrong if len(command) == 0: err_msg = "After generating command, no weight input was found." err_msg += " This is an internal error, please report." raise RuntimeError(err_msg) command += f" --saveEngine={output_path_str}" if FLAGS.TRT_10: command += " --skipInference" if isinstance(use_dla_core, int): command += f" --useDLACore={use_dla_core}" command += " --memPoolSize=dlaSRAM:1" if fp16: command += " --fp16" if int8: command += " --int8" if fp8: command += " --fp8" if best: command += " --best" if allow_gpu_fallback: command += " --allowGPUFallback" if shapes_str: command += f" --shapes={shapes_str}" if workspace: command += f" --workspace={workspace}" # handle additional arguments if add_args: for arg in add_args: command += f" {arg}" # debug print LOG.debug(f"TRTEXEC Command: {command}") success, _, stderr = run_trtexec(command) if not success: LOG.error(f"Error building engine from ONNX: {stderr}") return success