Source code for trtutils.jetson._benchmark

# Copyright (c) 2024 Justin Davis (davisjustin302@gmail.com)
#
# MIT License
from __future__ import annotations

import time
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING

from jetsontools import TegraStats, filter_data, get_powerdraw, parse_tegrastats

from trtutils._benchmark import Metric
from trtutils._engine import ParallelTRTEngines, TRTEngine
from trtutils._log import LOG

if TYPE_CHECKING:
    from collections.abc import Sequence

    from jetsontools._parsing import Metric as JMetric  # typing fix
    from typing_extensions import Self


[docs] @dataclass class JetsonBenchmarkResult: latency: Metric power_draw: Metric energy: Metric def __str__(self: Self) -> str: return f"JetsonBenchmarkResult(latency={self.latency}, power_draw={self.power_draw}, energy={self.energy})" def __repr__(self: Self) -> str: return f"JetsonBenchmarkResult(latency={self.latency!r}, power_draw={self.power_draw!r}, energy={self.energy!r})"
[docs] def benchmark_engine( engine: TRTEngine | Path | str, iterations: int = 1000, warmup_iterations: int = 50, tegra_interval: int = 5, dla_core: int | None = None, *, warmup: bool | None = None, verbose: bool | None = None, ) -> JetsonBenchmarkResult: """ Benchmark a TensorRT engine on a Jetson device. Parameters ---------- engine : TRTEngine | Path | str The engine to benchmark. Either a TRTEngine object or path to the engine file. If a path is given, then a TRTEngine will be created automatically. iterations : int, optional The number of iterations to run the benchmark for, by default 1000. warmup_iterations : int, optional The number of warmup iterations to run before the benchmark, by default 50. tegra_interval : int, optional The number of milliseconds between each tegrastats sampling. The smaller the number, the more samples per second are generated. By default 5 milliseconds between samples. dla_core : int, optional The DLA core to assign DLA layers of the engine to. Default is None. If None, any DLA layers will be assigned to DLA core 0. warmup : bool, optional Whether to do warmup iterations, by default None If None, warmup will be set to True. verbose : bool, optional Whether ot not to output additional information to stdout. Default None/False. Returns ------- BenchmarkResult A dataclass containing the results of the benchmark. """ if isinstance(engine, (Path, str)): engine = TRTEngine( engine, warmup_iterations=warmup_iterations, warmup=warmup, dla_core=dla_core, verbose=verbose, ) else: if warmup: for _ in range(warmup_iterations): engine.mock_execute() # list of metrics metric_names = ["latency", "power_draw", "energy"] raw: dict[str, list[float]] = {metric: [] for metric in metric_names} # pre-generate the false data false_data = engine.get_random_input(verbose=verbose) # create temp file location for data to go temp_file = Path(Path.cwd()) / "temptegra.txt" # store the start/stop times of the engine execution start_stop_times: list[tuple[float, float]] = [] with TegraStats(temp_file, interval=tegra_interval): for _ in range(iterations): t0 = time.time() engine.mock_execute(false_data, verbose=verbose) t1 = time.time() raw["latency"].append(t1 - t0) start_stop_times.append((t0, t1)) # parse the tegra data tegradata = parse_tegrastats(temp_file) # delete the temp file if temp_file.exists(): temp_file.unlink() # filter the data by actual times during execution filtered_data, per_inference = filter_data(tegradata, start_stop_times) # get the energy values powerdraw_data: dict[str, JMetric] = get_powerdraw(filtered_data) raw["power_draw"] = powerdraw_data["VDD_TOTAL"].raw # compute energy values # for energy values need to compute powerdraw per infernece # then compute energy energy_data = [ get_powerdraw(inf_data)["VDD_TOTAL"].mean * (inf_stop - inf_start) for (inf_start, inf_stop), inf_data in per_inference if len(inf_data) > 0 ] raw["energy"] = energy_data # calculate the metrics metrics: dict[str, Metric] = {} for metric_name in metric_names: data = raw[metric_name] metric = Metric(data) metrics[metric_name] = metric LOG.debug(f"{metric_name}: {metric}") return JetsonBenchmarkResult( latency=metrics["latency"], power_draw=metrics["power_draw"], energy=metrics["energy"], )
[docs] def benchmark_engines( engines: Sequence[TRTEngine | Path | str | tuple[TRTEngine | Path | str, int]], iterations: int = 1000, warmup_iterations: int = 50, tegra_interval: int = 5, *, warmup: bool | None = None, parallel: bool | None = None, verbose: bool | None = None, ) -> list[JetsonBenchmarkResult]: """ Benchmark a TensorRT engine. Parameters ---------- engines : Sequence[TRTEngine | Path | str | tuple[TRTEngine | Path | str, int]] The engines to benchmark as paths to the engine files. iterations : int, optional The number of iterations to run the benchmark for, by default 1000. warmup_iterations : int, optional The number of warmup iterations to run before the benchmark, by default 50. tegra_interval : int, optional The number of milliseconds between each tegrastats sampling. The smaller the number, the more samples per second are generated. By default 5 milliseconds between samples. warmup : bool, optional Whether to do warmup iterations, by default None If None, warmup will be set to True. parallel : bool, optional Whether or not to process the engines in parallel. Useful for assessing concurrent execution performance. Will execute the engines in lockstep. If None, will benchmark each engine individually. verbose : bool, optional Whether ot not to output additional information to stdout. Default None/False. Returns ------- list[JetsonBenchmarkResult] A list of dataclasses containing the results of the benchmark. If parallel was True, will only contain one item. """ temp_engines: list[Path | TRTEngine] = [] dla_assignments: list[int | None] = [] for engine_info in engines: engine: TRTEngine | Path | str dla_core: int | None = None if isinstance(engine_info, tuple): engine = engine_info[0] dla_core = engine_info[1] else: engine = engine_info if isinstance(engine, str): engine = Path(engine) temp_engines.append(engine) dla_assignments.append(dla_core) if not parallel: return [ benchmark_engine( engine, iterations, warmup_iterations, tegra_interval, dla_core=dla_core, warmup=warmup, verbose=verbose, ) for engine, dla_core in zip(temp_engines, dla_assignments) ] # otherwise we need a parallel setup trt_engines = ParallelTRTEngines( [ (ep, dc) if dc is not None else ep for ep, dc in zip(temp_engines, dla_assignments) ], warmup_iterations=warmup_iterations, warmup=warmup, ) # list of metrics metric_names = ["latency", "power_draw", "energy"] raw: dict[str, list[float]] = {metric: [] for metric in metric_names} # pre-generate the false data false_data = trt_engines.get_random_input() # create temp file location for data to go temp_file = Path(Path.cwd()) / "temptegra.txt" # store the start/stop times of the engine execution start_stop_times: list[tuple[float, float]] = [] with TegraStats(temp_file, interval=tegra_interval): for _ in range(iterations): t0 = time.time() trt_engines.submit(false_data) trt_engines.retrieve() t1 = time.time() raw["latency"].append(t1 - t0) start_stop_times.append((t0, t1)) # parse the tegra data tegradata = parse_tegrastats(temp_file) # delete the temp file if temp_file.exists(): temp_file.unlink() # filter the data by actual times during execution filtered_data, per_inference = filter_data(tegradata, start_stop_times) # get the energy values powerdraw_data: dict[str, JMetric] = get_powerdraw(filtered_data) raw["power_draw"] = powerdraw_data["VDD_TOTAL"].raw # compute energy values # for energy values need to compute powerdraw per infernece # then compute energy energy_data = [ get_powerdraw(inf_data)["VDD_TOTAL"].mean * (inf_stop - inf_start) for (inf_start, inf_stop), inf_data in per_inference if len(inf_data) > 0 ] raw["energy"] = energy_data # calculate the metrics metrics: dict[str, Metric] = {} for metric_name in metric_names: data = raw[metric_name] metric = Metric(data) metrics[metric_name] = metric LOG.debug(f"{metric_name}: {metric}") return [ JetsonBenchmarkResult( latency=metrics["latency"], power_draw=metrics["power_draw"], energy=metrics["energy"], ), ]