Source code for trtutils.inspect._inspect

# Copyright (c) 2024 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
# mypy: disable-error-code="import-untyped"
from __future__ import annotations

import contextlib
from pathlib import Path

with contextlib.suppress(ImportError):
    import tensorrt as trt

from trtutils._flags import FLAGS
from trtutils._log import LOG
from trtutils.core._engine import create_engine
from trtutils.core._stream import destroy_stream



[docs]
def inspect_engine(
    engine: Path | str | trt.ICudaEngine,
    *,
    verbose: bool | None = None,
) -> tuple[
    int,
    int,
    list[tuple[str, tuple[int, ...], trt.DataType, trt.TensorFormat]],
    list[tuple[str, tuple[int, ...], trt.DataType, trt.TensorFormat]],
]:
    """
    Inspect a TensorRT engine.

    Parameters
    ----------
    engine : Path | str | trt.ICudaEngine
        Path to the TensorRT engine file or an already loaded engine
    verbose : bool | None, optional
        Whether to print verbose output, by default None

    Returns
    -------
    tuple[int, int, list[tuple[str, tuple[int, ...], trt.DataType, trt.TensorFormat]], list[tuple[str, tuple[int, ...], trt.DataType, trt.TensorFormat]]]
        The size in bytes of the engine, the max batch size, and two lists of input and output tensors

    """
    loaded = False
    if isinstance(engine, (Path, str)):
        engine, context, logger, stream = create_engine(engine)
        loaded = True

    engine_mem_size: int = 0
    if FLAGS.MEMSIZE_V2:
        engine_mem_size = engine.device_memory_size_v2
    else:
        engine_mem_size = engine.device_memory_size

    # Get all input and output tensors first
    input_tensors = []
    output_tensors = []
    num_tensors = (
        range(engine.num_io_tensors) if FLAGS.TRT_10 else range(engine.num_bindings)
    )

    for i in num_tensors:
        # check
        if FLAGS.TRT_10:
            tensor_name = engine.get_tensor_name(i)
            shape = engine.get_tensor_shape(tensor_name)
            dtype = engine.get_tensor_dtype(tensor_name)
            fmt = engine.get_tensor_format(tensor_name)
            is_input = engine.get_tensor_mode(tensor_name) == trt.TensorIOMode.INPUT
        else:
            tensor_name = engine.get_binding_name(i)
            shape = engine.get_binding_shape(i)
            dtype = engine.get_binding_dtype(i)
            fmt = engine.get_binding_format(i)
            is_input = engine.binding_is_input(i)

        # store
        if is_input:
            input_tensors.append((tensor_name, shape, dtype, fmt))
        else:
            output_tensors.append((tensor_name, shape, dtype, fmt))

    batch_size: int = 0
    try:
        batch_size = engine.max_batch_size
    except AttributeError:
        if input_tensors:
            _, shape, _, _ = input_tensors[0]
            if shape and len(shape) > 0:
                batch_size = shape[0]

    if verbose:
        LOG.info("Engine Info:")
        LOG.info(f"\tMax Batch Size: {batch_size}")
        LOG.info(f"\tNum IO Tensors: {num_tensors}")
        LOG.info(f"\tDevice Memory Size: {engine_mem_size / (1024 * 1024):.2f} MB")
        LOG.info("\tInput Tensors:")
        for name, shape, dtype, fmt in input_tensors:
            LOG.info(f"\t\t{name}: shape={shape}, dtype={dtype}, format={fmt}")
        LOG.info("\tOutput Tensors:")
        for name, shape, dtype, fmt in output_tensors:
            LOG.info(f"\t\t{name}: shape={shape}, dtype={dtype}, format={fmt}")
        LOG.info("")

    if loaded:
        del engine
        del context
        del logger
        destroy_stream(stream)

    return engine_mem_size, batch_size, input_tensors, output_tensors