Source code for trtutils._benchmark

# Copyright (c) 2024 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
from __future__ import annotations

import time
from dataclasses import dataclass
from pathlib import Path
from statistics import mean, median
from typing import TYPE_CHECKING

from ._engine import ParallelTRTEngines, TRTEngine
from ._log import LOG

if TYPE_CHECKING:
    from collections.abc import Sequence

    from typing_extensions import Self



[docs]
@dataclass
class Metric:
    """A dataclass to store the results of a benchmark."""

    raw: list[float | int]
    mean: float | int = -1.0
    median: float | int = -1.0
    min: float | int = -1.0
    max: float | int = -1.0

    def __post_init__(self: Self) -> None:
        if not self.raw:
            err_msg = "Raw data cannot be empty"
            raise ValueError(err_msg)

        self.min = min(self.raw)
        self.median = median(self.raw)
        self.max = max(self.raw)
        self.mean = mean(self.raw)

    def __str__(self: Self) -> str:
        return f"Metric(mean={self.mean:.3f}, median={self.median:.3f}, min={self.min:.3f}, max={self.max:.3f})"

    def __repr__(self: Self) -> str:
        return f"Metric(mean={self.mean},median={self.median},min={self.min},max={self.max})"




[docs]
@dataclass
class BenchmarkResult:
    """A dataclass to store the results of a benchmark."""

    latency: Metric

    def __str__(self: Self) -> str:
        return f"BenchmarkResult(latency={self.latency})"

    def __repr__(self: Self) -> str:
        return f"BenchmarkResult(latency={self.latency!r})"




[docs]
def benchmark_engine(
    engine: TRTEngine | Path | str,
    iterations: int = 1000,
    warmup_iterations: int = 50,
    dla_core: int | None = None,
    *,
    warmup: bool | None = None,
    verbose: bool | None = None,
) -> BenchmarkResult:
    """
    Benchmark a TensorRT engine.

    Parameters
    ----------
    engine : TRTEngine | Path | str
        The engine to benchmark. Either a TRTEngine object or path to the engine file.
        If a path is given, then a TRTEngine will be created automatically.
    iterations : int, optional
        The number of iterations to run the benchmark for, by default 1000.
    warmup_iterations : int, optional
        The number of warmup iterations to run before the benchmark, by default 50.
    dla_core : int, optional
        The DLA core to assign DLA layers of the engine to. Default is None.
        If None, any DLA layers will be assigned to DLA core 0.
    warmup : bool, optional
        Whether to do warmup iterations, by default None
        If None, warmup will be set to True.
    verbose : bool, optional
        Whether ot not to output additional information to stdout.
        Default None/False.

    Returns
    -------
    BenchmarkResult
        A dataclass containing the results of the benchmark.

    """
    if verbose:
        LOG.debug("Running benchmark_engine")

    if isinstance(engine, (Path, str)):
        engine = TRTEngine(
            engine,
            warmup_iterations=warmup_iterations,
            dla_core=dla_core,
            warmup=warmup,
            verbose=verbose,
        )
    else:
        if warmup:
            for _ in range(warmup_iterations):
                engine.mock_execute(verbose=verbose)

    # list of metrics
    metric_names = ["latency"]

    # allocate spot for raw data
    raw: dict[str, list[float]] = {metric: [] for metric in metric_names}

    # pre-generate the false data
    false_data = engine.get_random_input(verbose=verbose)

    for _ in range(iterations):
        t0 = time.time()
        engine.mock_execute(false_data, verbose=verbose)
        t1 = time.time()

        raw["latency"].append(t1 - t0)

    # calculate the metrics
    metrics: dict[str, Metric] = {}
    for metric_name in metric_names:
        data = raw[metric_name]
        metric = Metric(data)
        metrics[metric_name] = metric
        LOG.debug(f"{metric_name}: {metric}")

    return BenchmarkResult(
        latency=metrics["latency"],
    )




[docs]
def benchmark_engines(
    engines: Sequence[TRTEngine | Path | str | tuple[TRTEngine | Path | str, int]],
    iterations: int = 1000,
    warmup_iterations: int = 50,
    *,
    warmup: bool | None = None,
    parallel: bool | None = None,
    verbose: bool | None = None,
) -> list[BenchmarkResult]:
    """
    Benchmark a TensorRT engine.

    Parameters
    ----------
    engines : Sequence[TRTEngine | Path | str | tuple[TRTEngine | Path | str, int]],
        The engines to benchmark as paths to the engine files.
    iterations : int, optional
        The number of iterations to run the benchmark for, by default 1000.
    warmup_iterations : int, optional
        The number of warmup iterations to run before the benchmark, by default 50.
    warmup : bool, optional
        Whether to do warmup iterations, by default None
        If None, warmup will be set to True.
    parallel : bool, optional
        Whether or not to process the engines in parallel.
        Useful for assessing concurrent execution performance.
        Will execute the engines in lockstep.
        If None, will benchmark each engine individually.
    verbose : bool, optional
        Whether ot not to output additional information to stdout.
        Default None/False.

    Returns
    -------
    list[BenchmarkResult]
        A list of dataclasses containing the results of the benchmark.
        If parallel was True, will only contain one item.

    """
    temp_engines: list[Path | TRTEngine] = []
    dla_assignments: list[int | None] = []
    for engine_info in engines:
        engine: TRTEngine | Path | str
        dla_core: int | None = None
        if isinstance(engine_info, tuple):
            engine = engine_info[0]
            dla_core = engine_info[1]
        else:
            engine = engine_info
        if isinstance(engine, str):
            engine = Path(engine)
        temp_engines.append(engine)
        dla_assignments.append(dla_core)

    if not parallel:
        return [
            benchmark_engine(
                engine,
                iterations,
                warmup_iterations,
                dla_core=dla_core,
                warmup=warmup,
                verbose=verbose,
            )
            for engine, dla_core in zip(temp_engines, dla_assignments)
        ]

    # otherwise we need a parallel setup
    trt_engines = ParallelTRTEngines(
        [
            (ep, dc) if dc is not None else ep
            for ep, dc in zip(temp_engines, dla_assignments)
        ],
        warmup_iterations=warmup_iterations,
        warmup=warmup,
    )

    # list of metrics
    metric_names = ["latency"]

    # allocate spot for raw data
    raw: dict[str, list[float]] = {metric: [] for metric in metric_names}

    # pre-generate the false data
    false_data = trt_engines.get_random_input()

    for _ in range(iterations):
        t0 = time.time()
        trt_engines.submit(false_data)
        trt_engines.retrieve()
        t1 = time.time()

        raw["latency"].append(t1 - t0)

    # calculate the metrics
    metrics: dict[str, Metric] = {}
    for metric_name in metric_names:
        data = raw[metric_name]
        metric = Metric(data)
        metrics[metric_name] = metric
        LOG.debug(f"{metric_name}: {metric}")

    return [
        BenchmarkResult(
            latency=metrics["latency"],
        ),
    ]