# Copyright (c) 2024-2026 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
from __future__ import annotations
import math
import time
from dataclasses import dataclass
from pathlib import Path
from statistics import mean, median, stdev
from typing import TYPE_CHECKING
from ._engine import TRTEngine
from ._log import LOG
from .parallel._parallel_engines import ParallelTRTEngines
if TYPE_CHECKING:
from collections.abc import Sequence
from typing_extensions import Self
[docs]
@dataclass
class Metric:
"""A dataclass to store the results of a benchmark."""
raw: list[float | int]
mean: float | int = -1.0
median: float | int = -1.0
min: float | int = -1.0
max: float | int = -1.0
std: float = 0.0
ci95: float = 0.0
def __post_init__(self: Self) -> None:
if not self.raw:
err_msg = "Raw data cannot be empty"
raise ValueError(err_msg)
self.min = min(self.raw)
self.median = median(self.raw)
self.max = max(self.raw)
self.mean = mean(self.raw)
self.std = stdev(self.raw) if len(self.raw) > 1 else 0.0
self.ci95 = 1.96 * self.std / math.sqrt(len(self.raw)) if len(self.raw) > 1 else 0.0
def __str__(self: Self) -> str:
return (
f"Metric(mean={self.mean:.3f}, median={self.median:.3f}, "
f"min={self.min:.3f}, max={self.max:.3f}, "
f"std={self.std:.3f}, ci95={self.ci95:.3f})"
)
def __repr__(self: Self) -> str:
return (
f"Metric(mean={self.mean},median={self.median},"
f"min={self.min},max={self.max},"
f"std={self.std},ci95={self.ci95})"
)
[docs]
@dataclass
class BenchmarkResult:
"""A dataclass to store the results of a benchmark."""
latency: Metric
def __str__(self: Self) -> str:
return f"BenchmarkResult(latency={self.latency})"
def __repr__(self: Self) -> str:
return f"BenchmarkResult(latency={self.latency!r})"
[docs]
def benchmark_engine(
engine: TRTEngine | Path | str,
iterations: int = 1000,
warmup_iterations: int = 50,
dla_core: int | None = None,
device: int | None = None,
*,
warmup: bool | None = None,
verbose: bool | None = None,
) -> BenchmarkResult:
"""
Benchmark a TensorRT engine.
Parameters
----------
engine : TRTEngine | Path | str
The engine to benchmark. Either a TRTEngine object or path to the engine file.
If a path is given, then a TRTEngine will be created automatically.
iterations : int, optional
The number of iterations to run the benchmark for, by default 1000.
warmup_iterations : int, optional
The number of warmup iterations to run before the benchmark, by default 50.
dla_core : int, optional
The DLA core to assign DLA layers of the engine to. Default is None.
If None, any DLA layers will be assigned to DLA core 0.
device : int, optional
The CUDA device index to use for the engine. Default is None,
which uses the current device.
warmup : bool, optional
Whether to do warmup iterations, by default None
If None, warmup will be set to True.
verbose : bool, optional
Whether ot not to output additional information to stdout.
Default None/False.
Returns
-------
BenchmarkResult
A dataclass containing the results of the benchmark.
"""
if verbose:
LOG.debug("Running benchmark_engine")
if isinstance(engine, (Path, str)):
engine = TRTEngine(
engine,
warmup_iterations=warmup_iterations,
dla_core=dla_core,
device=device,
warmup=warmup,
verbose=verbose,
)
else:
if warmup:
for _ in range(warmup_iterations):
engine.mock_execute(verbose=verbose)
# list of metrics
metric_names = ["latency"]
# allocate spot for raw data
raw: dict[str, list[float]] = {metric: [] for metric in metric_names}
# pre-generate the false data
false_data = engine.get_random_input(verbose=verbose)
for _ in range(iterations):
t0 = time.time()
engine.mock_execute(false_data, verbose=verbose)
t1 = time.time()
raw["latency"].append(t1 - t0)
# calculate the metrics
metrics: dict[str, Metric] = {}
for metric_name in metric_names:
data = raw[metric_name]
metric = Metric(data)
metrics[metric_name] = metric
LOG.debug(f"{metric_name}: {metric}")
return BenchmarkResult(
latency=metrics["latency"],
)
[docs]
def benchmark_engines(
engines: Sequence[
TRTEngine
| Path
| str
| tuple[TRTEngine | Path | str, int]
| tuple[TRTEngine | Path | str, int | None, int | None]
],
iterations: int = 1000,
warmup_iterations: int = 50,
*,
warmup: bool | None = None,
parallel: bool | None = None,
verbose: bool | None = None,
) -> list[BenchmarkResult]:
"""
Benchmark a TensorRT engine.
Parameters
----------
engines : Sequence[...]
The engines to benchmark. Each element can be a TRTEngine, Path, str,
a 2-tuple of (engine, dla_core), or a 3-tuple of (engine, dla_core, device).
iterations : int, optional
The number of iterations to run the benchmark for, by default 1000.
warmup_iterations : int, optional
The number of warmup iterations to run before the benchmark, by default 50.
warmup : bool, optional
Whether to do warmup iterations, by default None
If None, warmup will be set to True.
parallel : bool, optional
Whether or not to process the engines in parallel.
Useful for assessing concurrent execution performance.
Will execute the engines in lockstep.
If None, will benchmark each engine individually.
verbose : bool, optional
Whether ot not to output additional information to stdout.
Default None/False.
Returns
-------
list[BenchmarkResult]
A list of dataclasses containing the results of the benchmark.
If parallel was True, will only contain one item.
"""
temp_engines: list[Path | TRTEngine] = []
dla_assignments: list[int | None] = []
device_assignments: list[int | None] = []
for engine_info in engines:
dla_core: int | None = None
device: int | None = None
if isinstance(engine_info, tuple):
engine, dla_core = engine_info # type: ignore[assignment]
else:
engine = engine_info
if isinstance(engine, str):
engine = Path(engine)
temp_engines.append(engine) # type: ignore[arg-type]
dla_assignments.append(dla_core)
device_assignments.append(device)
if not parallel:
return [
benchmark_engine(
engine,
iterations,
warmup_iterations,
dla_core=dla_core,
device=device,
warmup=warmup,
verbose=verbose,
)
for engine, dla_core, device in zip(temp_engines, dla_assignments, device_assignments)
]
# otherwise we need a parallel setup
trt_engines = ParallelTRTEngines(
[(ep, dc) if dc is not None else ep for ep, dc in zip(temp_engines, dla_assignments)],
warmup_iterations=warmup_iterations,
warmup=warmup,
)
# list of metrics
metric_names = ["latency"]
# allocate spot for raw data
raw: dict[str, list[float]] = {metric: [] for metric in metric_names}
# pre-generate the false data
false_data = trt_engines.get_random_input()
for _ in range(iterations):
t0 = time.time()
trt_engines.submit(false_data)
trt_engines.retrieve()
t1 = time.time()
raw["latency"].append(t1 - t0)
# calculate the metrics
metrics: dict[str, Metric] = {}
for metric_name in metric_names:
data = raw[metric_name]
metric = Metric(data)
metrics[metric_name] = metric
LOG.debug(f"{metric_name}: {metric}")
return [
BenchmarkResult(
latency=metrics["latency"],
),
]