tools/systrace_parser/parser/aggregate.py - platform/packages/modules/NeuralNetworks - Git at Google

 """ NNAPI systrace parser - aggegation of timing from multiple threads """

 # TODO:
 # - phase and layer totals
 import math

 from parser.naming import layers, phases, subphases
 from parser.naming import (PHASE_OVERALL, PHASE_TERMINATION, PHASE_WARMUP,
                            PHASE_BENCHMARK, PHASE_EXECUTION, PHASE_INITIALIZATION,
                            PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS)
 from parser.naming import LAYER_APPLICATION, LAYER_IPC, LAYER_DRIVER
 LAYER_TOTAL = "LT"  # Total across layers

 def aggregate_times(tracker_map, special_case_lr_pe=True):
   """ Takes the trackers for each thread and produces timing statistics for
       all layers and phases.

       Returns (times, self_times, has_warmup and has_benchmark, execution_counts),
       where:
         - times and self_times are nested dictionaries of the form
           phase -> layer -> time with the following notes:
           - phase is flattened over all phases, except PHASE_WARMUP and
             PHASE_BENCHMARK, where the structure is phase -> phase -> layer -> time
           - PHASE_WARMUP and PHASE_BENCHMARK only nest execution and its
             subphases
           - PHASE_WARMUP and PHASE_BENCHMARK are not present if the trace does
             not contain them
           - the first level phase contains total over PHASE_WARMUP and
             PHASE_BENCHMARK if present
           - time may be math.nan if the data is not present in the trace
           - in addition to the layer from parser.naming, LAYER_TOTAL holds
             the total time spent in that layer over all phases
         - execution_counts contains a dictionary of the form
           {PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK} -> no of executions
   """
   all_application_phases = [PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK]
   # Calculate execution counts
   execution_counts = dict()
   for app_phase in all_application_phases:
     execution_count = 0
     for pid in tracker_map:
       execution_count = max(execution_count, tracker_map[pid].get_execution_count(app_phase))
     execution_counts[app_phase] = execution_count
   has_warmup = bool(execution_counts[PHASE_WARMUP])
   has_benchmark = bool(execution_counts[PHASE_BENCHMARK])
   if not (has_warmup and has_benchmark):
     all_application_phases = [PHASE_OVERALL]

   # Create dicts
   times = {}
   self_times = {}
   if has_warmup and has_benchmark:
     for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
       times[app_phase] = {}
       self_times[app_phase] = {}
       for phase in _phase_and_subphases(PHASE_EXECUTION):
         times[app_phase][phase] = {}
         self_times[app_phase][phase] = {}
   for phase in phases + [PHASE_OVERALL] + subphases[PHASE_EXECUTION]:
     times[phase] = {}
     self_times[phase] = {}

   # Gather total times from all threads, calculate layer and phase totals
   for layer in layers:
     for phase0 in [PHASE_OVERALL] + phases:
       for phase in _phase_and_subphases(phase0):
         t = 0.0
         tag = layer + "_" + phase
         for app_phase in all_application_phases:
           t0 = 0.0
           if layer == LAYER_DRIVER and phase == PHASE_EXECUTION:
             # Calculate driver execution times from begins and ends
             begins = []
             ends = []
             for pid in tracker_map:
               begins = begins + tracker_map[pid].get_ld_pe_begins(app_phase)
               ends = ends + tracker_map[pid].get_ld_pe_ends(app_phase)
             assert len(begins) == len(ends)
             begins.sort()
             ends.sort()
             for i in range(0, len(begins)):
               t0 += (ends[i] - begins[i])
           else:
             for pid in tracker_map:
               t0 += tracker_map[pid].get_stat(tag, app_phase, special_case_lr_pe)
           if phase0 == PHASE_EXECUTION and (app_phase != PHASE_OVERALL):
             times[app_phase][phase][layer] = zero_to_nan_if_missing(t0, phase, layer)
           t += t0
         times[phase][layer] = zero_to_nan_if_missing(t, phase, layer)
     if not times[PHASE_OVERALL][layer]:
       times[PHASE_OVERALL][layer] = sum(nan_to_zero(times[phase][layer]) for phase in phases)
   for phase0 in [PHASE_OVERALL] + phases:
     for phase in _phase_and_subphases(phase0):
       times[phase][LAYER_TOTAL] = max_ignoring_nans(times[phase].values())
       if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
         for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
           times[app_phase][phase][LAYER_TOTAL] = max_ignoring_nans(times[app_phase][phase].values())

   # Calculate self-times for each layer
   for phase0 in [PHASE_OVERALL] + phases:
     for phase in _phase_and_subphases(phase0):
       self_times[phase][LAYER_TOTAL] = times[phase][LAYER_TOTAL]
       if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
         for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
           self_times[app_phase][phase][LAYER_TOTAL] = times[app_phase][phase][LAYER_TOTAL]
       t = 0.0
       for layer in reversed(layers):
         if math.isnan(times[phase][layer]):
           self_times[phase][layer] = math.nan
         elif times[phase][layer] == 0.0:
           self_times[phase][layer] = 0.0
         elif (phase == PHASE_OVERALL and
               (layer == LAYER_DRIVER or layer == LAYER_IPC) and
               times[PHASE_EXECUTION][LAYER_DRIVER] == 0.0):
           # Driver was only used for initialization phase, did not support
           # execution of the model
           if layer == LAYER_DRIVER:
             self_times[phase][layer] = times[phase][layer]
           else:
             self_times[phase][layer] = times[phase][layer] - times[phase][LAYER_DRIVER]
         else:
           self_times[phase][layer] = times[phase][layer] - t
           t = times[phase][layer]
       if phase0 == PHASE_EXECUTION and (has_benchmark or has_warmup):
         for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
           t = 0.0
           for layer in reversed(layers):
             if math.isnan(times[app_phase][phase][layer]):
               self_times[app_phase][phase][layer] = math.nan
             elif times[app_phase][phase][layer] == 0.0:
               self_times[app_phase][phase][layer] = 0.0
             else:
               self_times[app_phase][phase][layer] = times[app_phase][phase][layer] - t
               t = times[app_phase][phase][layer]

   return (times, self_times, has_warmup and has_benchmark, execution_counts)

 def zero_to_nan_if_missing(f, phase, layer):
   """ Turn zero time to a NaN to indicate missing data, when we think that
       the data is really missing. Data should only be missing from the
       Application layer (applications may not have any tracing) and
       the subphases of Execution in the Driver layer (other phases are
       discernible from the automatic HIDL tracepoints)."""
   if f == 0.0:
     if layer == LAYER_APPLICATION:
       return math.nan
     if layer == LAYER_DRIVER and phase in subphases[PHASE_EXECUTION]:
       return math.nan
   return f

 def nan_to_zero(f):
   if math.isnan(f):
     return 0.0
   return f

 def _phase_and_subphases(phase):
   if phase == PHASE_OVERALL:
     return [phase]
   if phase == PHASE_WARMUP or phase == PHASE_BENCHMARK:
     return []
   return [phase] + subphases.get(phase, [])

 def max_ignoring_nans(xs):
   return max(map(nan_to_zero, xs))
	""" NNAPI systrace parser - aggegation of timing from multiple threads """

	# TODO:
	# - phase and layer totals
	import math

	from parser.naming import layers, phases, subphases
	from parser.naming import (PHASE_OVERALL, PHASE_TERMINATION, PHASE_WARMUP,
	PHASE_BENCHMARK, PHASE_EXECUTION, PHASE_INITIALIZATION,
	PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS)
	from parser.naming import LAYER_APPLICATION, LAYER_IPC, LAYER_DRIVER
	LAYER_TOTAL = "LT" # Total across layers

	def aggregate_times(tracker_map, special_case_lr_pe=True):
	""" Takes the trackers for each thread and produces timing statistics for
	all layers and phases.

	Returns (times, self_times, has_warmup and has_benchmark, execution_counts),
	where:
	- times and self_times are nested dictionaries of the form
	phase -> layer -> time with the following notes:
	- phase is flattened over all phases, except PHASE_WARMUP and
	PHASE_BENCHMARK, where the structure is phase -> phase -> layer -> time
	- PHASE_WARMUP and PHASE_BENCHMARK only nest execution and its
	subphases
	- PHASE_WARMUP and PHASE_BENCHMARK are not present if the trace does
	not contain them
	- the first level phase contains total over PHASE_WARMUP and
	PHASE_BENCHMARK if present
	- time may be math.nan if the data is not present in the trace
	- in addition to the layer from parser.naming, LAYER_TOTAL holds
	the total time spent in that layer over all phases
	- execution_counts contains a dictionary of the form
	{PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK} -> no of executions
	"""
	all_application_phases = [PHASE_OVERALL, PHASE_WARMUP, PHASE_BENCHMARK]
	# Calculate execution counts
	execution_counts = dict()
	for app_phase in all_application_phases:
	execution_count = 0
	for pid in tracker_map:
	execution_count = max(execution_count, tracker_map[pid].get_execution_count(app_phase))
	execution_counts[app_phase] = execution_count
	has_warmup = bool(execution_counts[PHASE_WARMUP])
	has_benchmark = bool(execution_counts[PHASE_BENCHMARK])
	if not (has_warmup and has_benchmark):
	all_application_phases = [PHASE_OVERALL]

	# Create dicts
	times = {}
	self_times = {}
	if has_warmup and has_benchmark:
	for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
	times[app_phase] = {}
	self_times[app_phase] = {}
	for phase in _phase_and_subphases(PHASE_EXECUTION):
	times[app_phase][phase] = {}
	self_times[app_phase][phase] = {}
	for phase in phases + [PHASE_OVERALL] + subphases[PHASE_EXECUTION]:
	times[phase] = {}
	self_times[phase] = {}

	# Gather total times from all threads, calculate layer and phase totals
	for layer in layers:
	for phase0 in [PHASE_OVERALL] + phases:
	for phase in _phase_and_subphases(phase0):
	t = 0.0
	tag = layer + "_" + phase
	for app_phase in all_application_phases:
	t0 = 0.0
	if layer == LAYER_DRIVER and phase == PHASE_EXECUTION:
	# Calculate driver execution times from begins and ends
	begins = []
	ends = []
	for pid in tracker_map:
	begins = begins + tracker_map[pid].get_ld_pe_begins(app_phase)
	ends = ends + tracker_map[pid].get_ld_pe_ends(app_phase)
	assert len(begins) == len(ends)
	begins.sort()
	ends.sort()
	for i in range(0, len(begins)):
	t0 += (ends[i] - begins[i])
	else:
	for pid in tracker_map:
	t0 += tracker_map[pid].get_stat(tag, app_phase, special_case_lr_pe)
	if phase0 == PHASE_EXECUTION and (app_phase != PHASE_OVERALL):
	times[app_phase][phase][layer] = zero_to_nan_if_missing(t0, phase, layer)
	t += t0
	times[phase][layer] = zero_to_nan_if_missing(t, phase, layer)
	if not times[PHASE_OVERALL][layer]:
	times[PHASE_OVERALL][layer] = sum(nan_to_zero(times[phase][layer]) for phase in phases)
	for phase0 in [PHASE_OVERALL] + phases:
	for phase in _phase_and_subphases(phase0):
	times[phase][LAYER_TOTAL] = max_ignoring_nans(times[phase].values())
	if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
	for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
	times[app_phase][phase][LAYER_TOTAL] = max_ignoring_nans(times[app_phase][phase].values())

	# Calculate self-times for each layer
	for phase0 in [PHASE_OVERALL] + phases:
	for phase in _phase_and_subphases(phase0):
	self_times[phase][LAYER_TOTAL] = times[phase][LAYER_TOTAL]
	if phase0 == PHASE_EXECUTION and (has_warmup and has_benchmark):
	for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
	self_times[app_phase][phase][LAYER_TOTAL] = times[app_phase][phase][LAYER_TOTAL]
	t = 0.0
	for layer in reversed(layers):
	if math.isnan(times[phase][layer]):
	self_times[phase][layer] = math.nan
	elif times[phase][layer] == 0.0:
	self_times[phase][layer] = 0.0
	elif (phase == PHASE_OVERALL and
	(layer == LAYER_DRIVER or layer == LAYER_IPC) and
	times[PHASE_EXECUTION][LAYER_DRIVER] == 0.0):
	# Driver was only used for initialization phase, did not support
	# execution of the model
	if layer == LAYER_DRIVER:
	self_times[phase][layer] = times[phase][layer]
	else:
	self_times[phase][layer] = times[phase][layer] - times[phase][LAYER_DRIVER]
	else:
	self_times[phase][layer] = times[phase][layer] - t
	t = times[phase][layer]
	if phase0 == PHASE_EXECUTION and (has_benchmark or has_warmup):
	for app_phase in [PHASE_WARMUP, PHASE_BENCHMARK]:
	t = 0.0
	for layer in reversed(layers):
	if math.isnan(times[app_phase][phase][layer]):
	self_times[app_phase][phase][layer] = math.nan
	elif times[app_phase][phase][layer] == 0.0:
	self_times[app_phase][phase][layer] = 0.0
	else:
	self_times[app_phase][phase][layer] = times[app_phase][phase][layer] - t
	t = times[app_phase][phase][layer]

	return (times, self_times, has_warmup and has_benchmark, execution_counts)

	def zero_to_nan_if_missing(f, phase, layer):
	""" Turn zero time to a NaN to indicate missing data, when we think that
	the data is really missing. Data should only be missing from the
	Application layer (applications may not have any tracing) and
	the subphases of Execution in the Driver layer (other phases are
	discernible from the automatic HIDL tracepoints)."""
	if f == 0.0:
	if layer == LAYER_APPLICATION:
	return math.nan
	if layer == LAYER_DRIVER and phase in subphases[PHASE_EXECUTION]:
	return math.nan
	return f

	def nan_to_zero(f):
	if math.isnan(f):
	return 0.0
	return f

	def _phase_and_subphases(phase):
	if phase == PHASE_OVERALL:
	return [phase]
	if phase == PHASE_WARMUP or phase == PHASE_BENCHMARK:
	return []
	return [phase] + subphases.get(phase, [])

	def max_ignoring_nans(xs):
	return max(map(nan_to_zero, xs))