Source code for trtutils._benchmark

# Copyright (c) 2024 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
from __future__ import annotations

import time
from dataclasses import dataclass
from pathlib import Path
from statistics import mean, median
from typing import TYPE_CHECKING

from ._engine import ParallelTRTEngines, TRTEngine
from ._log import LOG

if TYPE_CHECKING:
    from collections.abc import Sequence

    from typing_extensions import Self


[docs] @dataclass class Metric: """A dataclass to store the results of a benchmark.""" raw: list[float | int] mean: float | int = -1.0 median: float | int = -1.0 min: float | int = -1.0 max: float | int = -1.0 def __post_init__(self: Self) -> None: if not self.raw: err_msg = "Raw data cannot be empty" raise ValueError(err_msg) self.min = min(self.raw) self.median = median(self.raw) self.max = max(self.raw) self.mean = mean(self.raw) def __str__(self: Self) -> str: return f"Metric(mean={self.mean:.3f}, median={self.median:.3f}, min={self.min:.3f}, max={self.max:.3f})" def __repr__(self: Self) -> str: return f"Metric(mean={self.mean},median={self.median},min={self.min},max={self.max})"
[docs] @dataclass class BenchmarkResult: """A dataclass to store the results of a benchmark.""" latency: Metric def __str__(self: Self) -> str: return f"BenchmarkResult(latency={self.latency})" def __repr__(self: Self) -> str: return f"BenchmarkResult(latency={self.latency!r})"
[docs] def benchmark_engine( engine: TRTEngine | Path | str, iterations: int = 1000, warmup_iterations: int = 50, dla_core: int | None = None, *, warmup: bool | None = None, verbose: bool | None = None, ) -> BenchmarkResult: """ Benchmark a TensorRT engine. Parameters ---------- engine : TRTEngine | Path | str The engine to benchmark. Either a TRTEngine object or path to the engine file. If a path is given, then a TRTEngine will be created automatically. iterations : int, optional The number of iterations to run the benchmark for, by default 1000. warmup_iterations : int, optional The number of warmup iterations to run before the benchmark, by default 50. dla_core : int, optional The DLA core to assign DLA layers of the engine to. Default is None. If None, any DLA layers will be assigned to DLA core 0. warmup : bool, optional Whether to do warmup iterations, by default None If None, warmup will be set to True. verbose : bool, optional Whether ot not to output additional information to stdout. Default None/False. Returns ------- BenchmarkResult A dataclass containing the results of the benchmark. """ if verbose: LOG.debug("Running benchmark_engine") if isinstance(engine, (Path, str)): engine = TRTEngine( engine, warmup_iterations=warmup_iterations, dla_core=dla_core, warmup=warmup, verbose=verbose, ) else: if warmup: for _ in range(warmup_iterations): engine.mock_execute(verbose=verbose) # list of metrics metric_names = ["latency"] # allocate spot for raw data raw: dict[str, list[float]] = {metric: [] for metric in metric_names} # pre-generate the false data false_data = engine.get_random_input(verbose=verbose) for _ in range(iterations): t0 = time.time() engine.mock_execute(false_data, verbose=verbose) t1 = time.time() raw["latency"].append(t1 - t0) # calculate the metrics metrics: dict[str, Metric] = {} for metric_name in metric_names: data = raw[metric_name] metric = Metric(data) metrics[metric_name] = metric LOG.debug(f"{metric_name}: {metric}") return BenchmarkResult( latency=metrics["latency"], )
[docs] def benchmark_engines( engines: Sequence[TRTEngine | Path | str | tuple[TRTEngine | Path | str, int]], iterations: int = 1000, warmup_iterations: int = 50, *, warmup: bool | None = None, parallel: bool | None = None, verbose: bool | None = None, ) -> list[BenchmarkResult]: """ Benchmark a TensorRT engine. Parameters ---------- engines : Sequence[TRTEngine | Path | str | tuple[TRTEngine | Path | str, int]], The engines to benchmark as paths to the engine files. iterations : int, optional The number of iterations to run the benchmark for, by default 1000. warmup_iterations : int, optional The number of warmup iterations to run before the benchmark, by default 50. warmup : bool, optional Whether to do warmup iterations, by default None If None, warmup will be set to True. parallel : bool, optional Whether or not to process the engines in parallel. Useful for assessing concurrent execution performance. Will execute the engines in lockstep. If None, will benchmark each engine individually. verbose : bool, optional Whether ot not to output additional information to stdout. Default None/False. Returns ------- list[BenchmarkResult] A list of dataclasses containing the results of the benchmark. If parallel was True, will only contain one item. """ temp_engines: list[Path | TRTEngine] = [] dla_assignments: list[int | None] = [] for engine_info in engines: engine: TRTEngine | Path | str dla_core: int | None = None if isinstance(engine_info, tuple): engine = engine_info[0] dla_core = engine_info[1] else: engine = engine_info if isinstance(engine, str): engine = Path(engine) temp_engines.append(engine) dla_assignments.append(dla_core) if not parallel: return [ benchmark_engine( engine, iterations, warmup_iterations, dla_core=dla_core, warmup=warmup, verbose=verbose, ) for engine, dla_core in zip(temp_engines, dla_assignments) ] # otherwise we need a parallel setup trt_engines = ParallelTRTEngines( [ (ep, dc) if dc is not None else ep for ep, dc in zip(temp_engines, dla_assignments) ], warmup_iterations=warmup_iterations, warmup=warmup, ) # list of metrics metric_names = ["latency"] # allocate spot for raw data raw: dict[str, list[float]] = {metric: [] for metric in metric_names} # pre-generate the false data false_data = trt_engines.get_random_input() for _ in range(iterations): t0 = time.time() trt_engines.submit(false_data) trt_engines.retrieve() t1 = time.time() raw["latency"].append(t1 - t0) # calculate the metrics metrics: dict[str, Metric] = {} for metric_name in metric_names: data = raw[metric_name] metric = Metric(data) metrics[metric_name] = metric LOG.debug(f"{metric_name}: {metric}") return [ BenchmarkResult( latency=metrics["latency"], ), ]