| from parser.aggregate import aggregate_times, nan_to_zero, LAYER_TOTAL |
| from parser.naming import layers, names, phases, make_tag, subphases |
| from parser.naming import LAYER_APPLICATION, LAYER_CPU |
| from parser.naming import (PHASE_INITIALIZATION, PHASE_PREPARATION, PHASE_COMPILATION, |
| PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION, PHASE_RESULTS, |
| PHASE_TERMINATION, PHASE_OVERALL, PHASE_WARMUP, |
| PHASE_BENCHMARK) |
| import json |
| import math |
| import sys |
| |
| def print_stats(tracker_map, print_detail=True, total_times=False, per_execution=False, |
| json_output=False, starting_mark='', sep=''): |
| """ Prints statistics for a single Overall phase as text or json. |
| |
| For text output: |
| By default prints the self-time for each layer, prints total times instead if |
| given total_times=True. |
| |
| By default prints stats for all phases, prints only the Execution and its |
| subphases (as per-execution times) if per_execution=True. |
| |
| If per_execution=True and the trace contains separate Warmup and Benchmark |
| phases, prints only the Benchmark phase. |
| |
| For json output: |
| The json output is internal to NNAPI and is not guaranteed stable or |
| extensively defined. It does however contain a version field so that |
| backwards-compativle tools can be created on top of it. |
| |
| The json output includes both the statistics themselves produced by |
| aggregate_times as well as the values used to create the text output |
| so that those are easily available. |
| |
| Look at the end of the function for the fields included in the json. |
| """ |
| PHASE_EXECUTION_LESS_IO_AND_RESULTS = "PEO" |
| phases_to_pick = phases + [PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS] |
| |
| for tracker in tracker_map.values(): |
| if not tracker.is_complete(): |
| sys.stderr.write("Incomplete trace, not able to print all statistics\n") |
| return |
| if sep: |
| print(sep) |
| |
| # Select template and statistics to use |
| times, self_times, has_warmup_and_benchmark, execution_counts = aggregate_times(tracker_map) |
| if not per_execution: |
| template = TEMPLATE_ALL_PHASES |
| else: |
| template = TEMPLATE_EXECUTION_ONLY |
| if total_times: |
| template = template.replace("self-times", "total time") |
| times_to_use = times |
| else: |
| times_to_use = self_times |
| if has_warmup_and_benchmark and per_execution: |
| template = template.replace("Execution", "Benchmark") |
| for phase in [PHASE_EXECUTION] + subphases[PHASE_EXECUTION]: |
| for layer in layers + [LAYER_TOTAL]: |
| times_to_use[phase][layer] = times_to_use[PHASE_BENCHMARK][phase][layer] |
| |
| # Rewrite template shorthand |
| template = template.replace(":fl", ":>11.2f") |
| template = template.replace(":f", ":>9.2f") |
| |
| # Gather template inputs from statistics |
| values = dict() |
| full_total = 0.0 |
| has_cpu = False |
| for layer in layers: |
| for phase in phases_to_pick: |
| t = times_to_use[phase][layer] |
| values[make_tag(layer, phase)] = t |
| if layer == LAYER_CPU: |
| has_cpu = (has_cpu or t > 0.0) |
| |
| # Calculate layer totals and PHASE_EXECUTION_LESS_IO_AND_RESULTS |
| for phase in phases_to_pick: |
| values[make_tag(LAYER_TOTAL, phase)] = times_to_use[phase][LAYER_TOTAL] |
| for layer in layers + [LAYER_TOTAL]: |
| values[make_tag(layer, PHASE_EXECUTION_LESS_IO_AND_RESULTS)] = ( |
| values[make_tag(layer, PHASE_EXECUTION)] - |
| values[make_tag(layer, PHASE_INPUTS_AND_OUTPUTS)] - |
| values[make_tag(layer, PHASE_RESULTS)]) |
| values[make_tag(layer, PHASE_OVERALL)] = times_to_use[PHASE_OVERALL][layer] |
| # Calculate layer execution percentages |
| for layer in layers: |
| if values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)] > 0.0: |
| values[make_tag(layer, "PEp")] = (values[make_tag(layer, PHASE_EXECUTION)] * 100.0 / |
| values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)]) |
| else: |
| values[make_tag(layer, "PEp")] = math.nan |
| |
| # Make output numbers per-execution if desired |
| if per_execution: |
| if has_warmup_and_benchmark: |
| divide_by = execution_counts[PHASE_BENCHMARK] |
| else: |
| divide_by = execution_counts[PHASE_OVERALL] |
| for layer in (layers + [LAYER_TOTAL]): |
| for phase in [PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION_LESS_IO_AND_RESULTS, PHASE_RESULTS, PHASE_EXECUTION]: |
| if divide_by != 0: |
| values[layer + "_" + phase] = values[layer + "_" + phase] / divide_by |
| else: |
| values[layer + "_" + phase] = math.nan |
| |
| # Generate and print output |
| if not json_output: |
| # Apply template and prettify numbers |
| output = template.format(**values) |
| output = output.replace(" 0.00%", " -") |
| output = output.replace(" 0.00", " -") |
| output = output.replace(" nan", " n/a") |
| |
| # Print output |
| print(starting_mark) |
| for line in output.splitlines(): |
| if line[0:3] == "CPU" and not has_cpu: |
| continue |
| print(line) |
| if print_detail: |
| for pid in tracker_map: |
| tracker = tracker_map[pid] |
| tracker.print_stats() |
| for pid in tracker_map: |
| tracker = tracker_map[pid] |
| tracker.print() |
| else: |
| output = dict(times=times, self_times=self_times, execution_counts=execution_counts, |
| template_inputs=values, version=1, starting_mark=starting_mark) |
| output = json.dumps(output, indent=2, sort_keys=True) |
| # JSON doesn't recognize NaN |
| output = output.replace("NaN", "null") |
| print(output) |
| |
| def reset_trackers(tracker_map): |
| for pid in tracker_map: |
| tracker = tracker_map[pid] |
| tracker.reset() |
| |
| TEMPLATE_ALL_PHASES = """ |
| =========================================================================================================================================== |
| NNAPI timing summary (self-times, ms wall-clock) Execution |
| ---------------------------------------------------- |
| Initialization Preparation Compilation I/O Compute Results Ex. total Termination Total |
| -------------- ----------- ----------- ----------- ------------ ----------- ----------- ----------- ---------- |
| Application {LA_PI:f} {LA_PP:f} {LA_PC:f} {LA_PIO:fl} {LA_PEO:fl} {LA_PR:f} {LA_PE:f} {LA_PT:f} {LA_PO:f}* |
| Runtime {LR_PI:f} {LR_PP:f} {LR_PC:f} {LR_PIO:fl} {LR_PEO:fl} {LR_PR:f} {LR_PE:f} {LR_PT:f} {LR_PO:f} |
| IPC {LI_PI:f} {LI_PP:f} {LI_PC:f} {LI_PIO:fl} {LI_PEO:fl} {LI_PR:f} {LI_PE:f} {LI_PT:f} {LI_PO:f} |
| Driver {LD_PI:f} {LD_PP:f} {LD_PC:f} {LD_PIO:fl} {LD_PEO:fl} {LD_PR:f} {LD_PE:f} {LD_PT:f} {LD_PO:f} |
| CPU {LC_PI:f} {LC_PP:f} {LC_PC:f} {LC_PIO:fl} {LC_PEO:fl} {LC_PR:f} {LC_PE:f} {LC_PT:f} {LC_PO:f} |
| |
| Total {LT_PI:f}* {LT_PP:f}* {LT_PC:f}* {LT_PIO:fl}* {LT_PEO:fl}* {LT_PR:f}* {LT_PE:f}* {LT_PT:f}* {LT_PO:f}* |
| =========================================================================================================================================== |
| * This total ignores missing (n/a) values and thus is not necessarily consistent with the rest of the numbers |
| """ |
| |
| TEMPLATE_EXECUTION_ONLY = """ |
| ================================================================================ |
| NNAPI timing summary (self-times, ms wall-clock) Execution |
| ------------------------------------------------------------------ |
| I/O Compute Results Total Percentage |
| ----------- ------------ ----------- ----------- ----------- |
| Application {LA_PIO:fl} {LA_PEO:fl} {LA_PR:f} {LA_PE:f} {LA_PEp:fl}% |
| Runtime {LR_PIO:fl} {LR_PEO:fl} {LR_PR:f} {LR_PE:f} {LR_PEp:fl}% |
| IPC {LI_PIO:fl} {LI_PEO:fl} {LI_PR:f} {LI_PE:f} {LI_PEp:fl}% |
| Driver {LD_PIO:fl} {LD_PEO:fl} {LD_PR:f} {LD_PE:f} {LD_PEp:fl}% |
| CPU {LC_PIO:fl} {LC_PEO:fl} {LC_PR:f} {LC_PE:f} {LC_PEp:fl}% |
| |
| Total {LT_PIO:fl}* {LT_PEO:fl}* {LT_PR:f}* {LT_PE:f} 100% |
| ================================================================================ |
| * This total ignores missing (n/a) values and thus is not necessarily consistent |
| with the rest of the numbers |
| """ |