Tools/scripts/summarize_stats.py - platform/external/python/cpython3 - Git at Google

 """Print a summary of specialization stats for all files in the
 default stats folders.
 """

 import collections
 import os.path
 import opcode
 from datetime import date
 import itertools
 import argparse

 if os.name == "nt":
     DEFAULT_DIR = "c:\\temp\\py_stats\\"
 else:
     DEFAULT_DIR = "/tmp/py_stats/"

 #Create list of all instruction names
 specialized = iter(opcode._specialized_instructions)
 opname = ["<0>"]
 for name in opcode.opname[1:]:
     if name.startswith("<"):
         try:
             name = next(specialized)
         except StopIteration:
             pass
     opname.append(name)

 # opcode_name --> opcode
 # Sort alphabetically.
 opmap = {name: i for i, name in enumerate(opname)}
 opmap = dict(sorted(opmap.items()))

 TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"

 def print_specialization_stats(name, family_stats, defines):
     if "specializable" not in family_stats:
         return
     total = sum(family_stats.get(kind, 0) for kind in TOTAL)
     if total == 0:
         return
     with Section(name, 3, f"specialization stats for {name} family"):
         rows = []
         for key in sorted(family_stats):
             if key.startswith("specialization.failure_kinds"):
                 continue
             if key in ("specialization.hit", "specialization.miss"):
                 label = key[len("specialization."):]
             elif key == "execution_count":
                 label = "unquickened"
             elif key in ("specialization.success",  "specialization.failure", "specializable"):
                 continue
             elif key.startswith("pair"):
                 continue
             else:
                 label = key
             rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
         emit_table(("Kind", "Count", "Ratio"), rows)
         print_title("Specialization attempts", 4)
         total_attempts = 0
         for key in ("specialization.success",  "specialization.failure"):
             total_attempts += family_stats.get(key, 0)
         rows = []
         for key in ("specialization.success",  "specialization.failure"):
             label = key[len("specialization."):]
             label = label[0].upper() + label[1:]
             val = family_stats.get(key, 0)
             rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
         emit_table(("", "Count:", "Ratio:"), rows)
         total_failures = family_stats.get("specialization.failure", 0)
         failure_kinds = [ 0 ] * 30
         for key in family_stats:
             if not key.startswith("specialization.failure_kind"):
                 continue
             _, index = key[:-1].split("[")
             index =  int(index)
             failure_kinds[index] = family_stats[key]
         failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
         failures.sort(reverse=True)
         rows = []
         for value, index in failures:
             if not value:
                 continue
             rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
         emit_table(("Failure kind", "Count:", "Ratio:"), rows)

 def gather_stats():
     stats = collections.Counter()
     for filename in os.listdir(DEFAULT_DIR):
         with open(os.path.join(DEFAULT_DIR, filename)) as fd:
             for line in fd:
                 key, value = line.split(":")
                 key = key.strip()
                 value = int(value)
                 stats[key] += value
     return stats

 def extract_opcode_stats(stats):
     opcode_stats = [ {} for _ in range(256) ]
     for key, value in stats.items():
         if not key.startswith("opcode"):
             continue
         n, _, rest = key[7:].partition("]")
         opcode_stats[int(n)][rest.strip(".")] = value
     return opcode_stats

 def parse_kinds(spec_src):
     defines = collections.defaultdict(list)
     for line in spec_src:
         line = line.strip()
         if not line.startswith("#define SPEC_FAIL_"):
             continue
         line = line[len("#define SPEC_FAIL_"):]
         name, val = line.split()
         defines[int(val.strip())].append(name.strip())
     return defines

 def pretty(defname):
     return defname.replace("_", " ").lower()

 def kind_to_text(kind, defines, opname):
     if kind < 7:
         return pretty(defines[kind][0])
     if opname.endswith("ATTR"):
         opname = "ATTR"
     if opname.endswith("SUBSCR"):
         opname = "SUBSCR"
     if opname.startswith("PRECALL"):
         opname = "CALL"
     for name in defines[kind]:
         if name.startswith(opname):
             return pretty(name[len(opname)+1:])
     return "kind " + str(kind)

 def categorized_counts(opcode_stats):
     basic = 0
     specialized = 0
     not_specialized = 0
     specialized_instructions = {
         op for op in opcode._specialized_instructions
         if "__" not in op and "ADAPTIVE" not in op}
     adaptive_instructions = {
         op for op in opcode._specialized_instructions
         if "ADAPTIVE" in op}
     for i, opcode_stat in enumerate(opcode_stats):
         if "execution_count" not in opcode_stat:
             continue
         count = opcode_stat['execution_count']
         name = opname[i]
         if "specializable" in opcode_stat:
             not_specialized += count
         elif name in adaptive_instructions:
             not_specialized += count
         elif name in specialized_instructions:
             miss = opcode_stat.get("specialization.miss", 0)
             not_specialized += miss
             specialized += count - miss
         else:
             basic += count
     return basic, not_specialized, specialized

 def print_title(name, level=2):
     print("#"*level, name)
     print()

 class Section:

     def __init__(self, title, level=2, summary=None):
         self.title = title
         self.level = level
         if summary is None:
             self.summary = title.lower()
         else:
             self.summary = summary

     def __enter__(self):
         print_title(self.title, self.level)
         print("<details>")
         print("<summary>", self.summary, "</summary>")
         print()
         return self

     def __exit__(*args):
         print()
         print("</details>")
         print()

 def emit_table(header, rows):
     width = len(header)
     header_line = "|"
     under_line = "|"
     for item in header:
         under = "---"
         if item.endswith(":"):
             item = item[:-1]
             under += ":"
         header_line += item + " | "
         under_line += under + "|"
     print(header_line)
     print(under_line)
     for row in rows:
         if width is not None and len(row) != width:
             raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
         print("|", " | ".join(str(i) for i in row), "|")
     print()

 def emit_execution_counts(opcode_stats, total):
     with Section("Execution counts", summary="execution counts for all instructions"):
         counts = []
         for i, opcode_stat in enumerate(opcode_stats):
             if "execution_count" in opcode_stat:
                 count = opcode_stat['execution_count']
                 miss = 0
                 if "specializable" not in opcode_stat:
                     miss = opcode_stat.get("specialization.miss")
                 counts.append((count, opname[i], miss))
         counts.sort(reverse=True)
         cumulative = 0
         rows = []
         for (count, name, miss) in counts:
             cumulative += count
             if miss:
                 miss =  f"{100*miss/count:0.1f}%"
             else:
                 miss = ""
             rows.append((name, count, f"{100*count/total:0.1f}%",
                         f"{100*cumulative/total:0.1f}%", miss))
         emit_table(
             ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
             rows
         )


 def emit_specialization_stats(opcode_stats):
     spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
     with open(spec_path) as spec_src:
         defines = parse_kinds(spec_src)
     with Section("Specialization stats", summary="specialization stats by family"):
         for i, opcode_stat in enumerate(opcode_stats):
             name = opname[i]
             print_specialization_stats(name, opcode_stat, defines)

 def emit_specialization_overview(opcode_stats, total):
     basic, not_specialized, specialized = categorized_counts(opcode_stats)
     with Section("Specialization effectiveness"):
         emit_table(("Instructions", "Count:", "Ratio:"), (
             ("Basic", basic, f"{basic*100/total:0.1f}%"),
             ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
             ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
         ))

 def emit_call_stats(stats):
     with Section("Call stats", summary="Inlined calls and frame stats"):
         total = 0
         for key, value in stats.items():
             if "Calls to" in key:
                 total += value
         rows = []
         for key, value in stats.items():
             if "Calls to" in key:
                 rows.append((key, value, f"{100*value/total:0.1f}%"))
         for key, value in stats.items():
             if key.startswith("Frame"):
                 rows.append((key, value, f"{100*value/total:0.1f}%"))
         emit_table(("", "Count:", "Ratio:"), rows)

 def emit_object_stats(stats):
     with Section("Object stats", summary="allocations, frees and dict materializatons"):
         total = stats.get("Object new values")
         rows = []
         for key, value in stats.items():
             if key.startswith("Object"):
                 if "materialize" in key:
                     materialize = f"{100*value/total:0.1f}%"
                 else:
                     materialize = ""
                 label = key[6:].strip()
                 label = label[0].upper() + label[1:]
                 rows.append((label, value, materialize))
         emit_table(("",  "Count:", "Ratio:"), rows)

 def get_total(opcode_stats):
     total = 0
     for opcode_stat in opcode_stats:
         if "execution_count" in opcode_stat:
             total += opcode_stat['execution_count']
     return total

 def emit_pair_counts(opcode_stats, total):
     pair_counts = []
     for i, opcode_stat in enumerate(opcode_stats):
         if i == 0:
             continue
         for key, value in opcode_stat.items():
             if key.startswith("pair_count"):
                 x, _, _ = key[11:].partition("]")
                 if value:
                     pair_counts.append((value, (i, int(x))))
     with Section("Pair counts", summary="Pair counts for top 100 pairs"):
         pair_counts.sort(reverse=True)
         cumulative = 0
         rows = []
         for (count, pair) in itertools.islice(pair_counts, 100):
             i, j = pair
             cumulative += count
             rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
                         f"{100*cumulative/total:0.1f}%"))
         emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
             rows
         )
     with Section("Predecessor/Successor Pairs", summary="Top 3 predecessors and successors of each opcode"):
         predecessors = collections.defaultdict(collections.Counter)
         successors = collections.defaultdict(collections.Counter)
         total_predecessors = collections.Counter()
         total_successors = collections.Counter()
         for count, (first, second) in pair_counts:
             if count:
                 predecessors[second][first] = count
                 successors[first][second] = count
                 total_predecessors[second] += count
                 total_successors[first] += count
         for name, i in opmap.items():
             total1 = total_predecessors[i]
             total2 = total_successors[i]
             if total1 == 0 and total2 == 0:
                 continue
             pred_rows = succ_rows = ()
             if total1:
                 pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
                              for (pred, count) in predecessors[i].most_common(3)]
             if total2:
                 succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
                              for (succ, count) in successors[i].most_common(3)]
             with Section(name, 3, f"Successors and predecessors for {name}"):
                 emit_table(("Predecessors", "Count:", "Percentage:"),
                     pred_rows
                 )
                 emit_table(("Successors", "Count:", "Percentage:"),
                     succ_rows
                 )

 def main():
     stats = gather_stats()
     opcode_stats = extract_opcode_stats(stats)
     total = get_total(opcode_stats)
     emit_execution_counts(opcode_stats, total)
     emit_pair_counts(opcode_stats, total)
     emit_specialization_stats(opcode_stats)
     emit_specialization_overview(opcode_stats, total)
     emit_call_stats(stats)
     emit_object_stats(stats)
     print("---")
     print("Stats gathered on:", date.today())

 if __name__ == "__main__":
     main()
	"""Print a summary of specialization stats for all files in the
	default stats folders.
	"""

	import collections
	import os.path
	import opcode
	from datetime import date
	import itertools
	import argparse

	if os.name == "nt":
	DEFAULT_DIR = "c:\\temp\\py_stats\\"
	else:
	DEFAULT_DIR = "/tmp/py_stats/"

	#Create list of all instruction names
	specialized = iter(opcode._specialized_instructions)
	opname = ["<0>"]
	for name in opcode.opname[1:]:
	if name.startswith("<"):
	try:
	name = next(specialized)
	except StopIteration:
	pass
	opname.append(name)

	# opcode_name --> opcode
	# Sort alphabetically.
	opmap = {name: i for i, name in enumerate(opname)}
	opmap = dict(sorted(opmap.items()))

	TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"

	def print_specialization_stats(name, family_stats, defines):
	if "specializable" not in family_stats:
	return
	total = sum(family_stats.get(kind, 0) for kind in TOTAL)
	if total == 0:
	return
	with Section(name, 3, f"specialization stats for {name} family"):
	rows = []
	for key in sorted(family_stats):
	if key.startswith("specialization.failure_kinds"):
	continue
	if key in ("specialization.hit", "specialization.miss"):
	label = key[len("specialization."):]
	elif key == "execution_count":
	label = "unquickened"
	elif key in ("specialization.success", "specialization.failure", "specializable"):
	continue
	elif key.startswith("pair"):
	continue
	else:
	label = key
	rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
	emit_table(("Kind", "Count", "Ratio"), rows)
	print_title("Specialization attempts", 4)
	total_attempts = 0
	for key in ("specialization.success", "specialization.failure"):
	total_attempts += family_stats.get(key, 0)
	rows = []
	for key in ("specialization.success", "specialization.failure"):
	label = key[len("specialization."):]
	label = label[0].upper() + label[1:]
	val = family_stats.get(key, 0)
	rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
	emit_table(("", "Count:", "Ratio:"), rows)
	total_failures = family_stats.get("specialization.failure", 0)
	failure_kinds = [ 0 ] * 30
	for key in family_stats:
	if not key.startswith("specialization.failure_kind"):
	continue
	_, index = key[:-1].split("[")
	index = int(index)
	failure_kinds[index] = family_stats[key]
	failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
	failures.sort(reverse=True)
	rows = []
	for value, index in failures:
	if not value:
	continue
	rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
	emit_table(("Failure kind", "Count:", "Ratio:"), rows)

	def gather_stats():
	stats = collections.Counter()
	for filename in os.listdir(DEFAULT_DIR):
	with open(os.path.join(DEFAULT_DIR, filename)) as fd:
	for line in fd:
	key, value = line.split(":")
	key = key.strip()
	value = int(value)
	stats[key] += value
	return stats

	def extract_opcode_stats(stats):
	opcode_stats = [ {} for _ in range(256) ]
	for key, value in stats.items():
	if not key.startswith("opcode"):
	continue
	n, _, rest = key[7:].partition("]")
	opcode_stats[int(n)][rest.strip(".")] = value
	return opcode_stats

	def parse_kinds(spec_src):
	defines = collections.defaultdict(list)
	for line in spec_src:
	line = line.strip()
	if not line.startswith("#define SPEC_FAIL_"):
	continue
	line = line[len("#define SPEC_FAIL_"):]
	name, val = line.split()
	defines[int(val.strip())].append(name.strip())
	return defines

	def pretty(defname):
	return defname.replace("_", " ").lower()

	def kind_to_text(kind, defines, opname):
	if kind < 7:
	return pretty(defines[kind][0])
	if opname.endswith("ATTR"):
	opname = "ATTR"
	if opname.endswith("SUBSCR"):
	opname = "SUBSCR"
	if opname.startswith("PRECALL"):
	opname = "CALL"
	for name in defines[kind]:
	if name.startswith(opname):
	return pretty(name[len(opname)+1:])
	return "kind " + str(kind)

	def categorized_counts(opcode_stats):
	basic = 0
	specialized = 0
	not_specialized = 0
	specialized_instructions = {
	op for op in opcode._specialized_instructions
	if "__" not in op and "ADAPTIVE" not in op}
	adaptive_instructions = {
	op for op in opcode._specialized_instructions
	if "ADAPTIVE" in op}
	for i, opcode_stat in enumerate(opcode_stats):
	if "execution_count" not in opcode_stat:
	continue
	count = opcode_stat['execution_count']
	name = opname[i]
	if "specializable" in opcode_stat:
	not_specialized += count
	elif name in adaptive_instructions:
	not_specialized += count
	elif name in specialized_instructions:
	miss = opcode_stat.get("specialization.miss", 0)
	not_specialized += miss
	specialized += count - miss
	else:
	basic += count
	return basic, not_specialized, specialized

	def print_title(name, level=2):
	print("#"*level, name)
	print()

	class Section:

	def __init__(self, title, level=2, summary=None):
	self.title = title
	self.level = level
	if summary is None:
	self.summary = title.lower()
	else:
	self.summary = summary

	def __enter__(self):
	print_title(self.title, self.level)
	print("<details>")
	print("<summary>", self.summary, "</summary>")
	print()
	return self

	def __exit__(*args):
	print()
	print("</details>")
	print()

	def emit_table(header, rows):
	width = len(header)
	header_line = "\|"
	under_line = "\|"
	for item in header:
	under = "---"
	if item.endswith(":"):
	item = item[:-1]
	under += ":"
	header_line += item + " \| "
	under_line += under + "\|"
	print(header_line)
	print(under_line)
	for row in rows:
	if width is not None and len(row) != width:
	raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
	print("\|", " \| ".join(str(i) for i in row), "\|")
	print()

	def emit_execution_counts(opcode_stats, total):
	with Section("Execution counts", summary="execution counts for all instructions"):
	counts = []
	for i, opcode_stat in enumerate(opcode_stats):
	if "execution_count" in opcode_stat:
	count = opcode_stat['execution_count']
	miss = 0
	if "specializable" not in opcode_stat:
	miss = opcode_stat.get("specialization.miss")
	counts.append((count, opname[i], miss))
	counts.sort(reverse=True)
	cumulative = 0
	rows = []
	for (count, name, miss) in counts:
	cumulative += count
	if miss:
	miss = f"{100*miss/count:0.1f}%"
	else:
	miss = ""
	rows.append((name, count, f"{100*count/total:0.1f}%",
	f"{100*cumulative/total:0.1f}%", miss))
	emit_table(
	("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
	rows
	)


	def emit_specialization_stats(opcode_stats):
	spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
	with open(spec_path) as spec_src:
	defines = parse_kinds(spec_src)
	with Section("Specialization stats", summary="specialization stats by family"):
	for i, opcode_stat in enumerate(opcode_stats):
	name = opname[i]
	print_specialization_stats(name, opcode_stat, defines)

	def emit_specialization_overview(opcode_stats, total):
	basic, not_specialized, specialized = categorized_counts(opcode_stats)
	with Section("Specialization effectiveness"):
	emit_table(("Instructions", "Count:", "Ratio:"), (
	("Basic", basic, f"{basic*100/total:0.1f}%"),
	("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
	("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
	))

	def emit_call_stats(stats):
	with Section("Call stats", summary="Inlined calls and frame stats"):
	total = 0
	for key, value in stats.items():
	if "Calls to" in key:
	total += value
	rows = []
	for key, value in stats.items():
	if "Calls to" in key:
	rows.append((key, value, f"{100*value/total:0.1f}%"))
	for key, value in stats.items():
	if key.startswith("Frame"):
	rows.append((key, value, f"{100*value/total:0.1f}%"))
	emit_table(("", "Count:", "Ratio:"), rows)

	def emit_object_stats(stats):
	with Section("Object stats", summary="allocations, frees and dict materializatons"):
	total = stats.get("Object new values")
	rows = []
	for key, value in stats.items():
	if key.startswith("Object"):
	if "materialize" in key:
	materialize = f"{100*value/total:0.1f}%"
	else:
	materialize = ""
	label = key[6:].strip()
	label = label[0].upper() + label[1:]
	rows.append((label, value, materialize))
	emit_table(("", "Count:", "Ratio:"), rows)

	def get_total(opcode_stats):
	total = 0
	for opcode_stat in opcode_stats:
	if "execution_count" in opcode_stat:
	total += opcode_stat['execution_count']
	return total

	def emit_pair_counts(opcode_stats, total):
	pair_counts = []
	for i, opcode_stat in enumerate(opcode_stats):
	if i == 0:
	continue
	for key, value in opcode_stat.items():
	if key.startswith("pair_count"):
	x, _, _ = key[11:].partition("]")
	if value:
	pair_counts.append((value, (i, int(x))))
	with Section("Pair counts", summary="Pair counts for top 100 pairs"):
	pair_counts.sort(reverse=True)
	cumulative = 0
	rows = []
	for (count, pair) in itertools.islice(pair_counts, 100):
	i, j = pair
	cumulative += count
	rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
	f"{100*cumulative/total:0.1f}%"))
	emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
	rows
	)
	with Section("Predecessor/Successor Pairs", summary="Top 3 predecessors and successors of each opcode"):
	predecessors = collections.defaultdict(collections.Counter)
	successors = collections.defaultdict(collections.Counter)
	total_predecessors = collections.Counter()
	total_successors = collections.Counter()
	for count, (first, second) in pair_counts:
	if count:
	predecessors[second][first] = count
	successors[first][second] = count
	total_predecessors[second] += count
	total_successors[first] += count
	for name, i in opmap.items():
	total1 = total_predecessors[i]
	total2 = total_successors[i]
	if total1 == 0 and total2 == 0:
	continue
	pred_rows = succ_rows = ()
	if total1:
	pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
	for (pred, count) in predecessors[i].most_common(3)]
	if total2:
	succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
	for (succ, count) in successors[i].most_common(3)]
	with Section(name, 3, f"Successors and predecessors for {name}"):
	emit_table(("Predecessors", "Count:", "Percentage:"),
	pred_rows
	)
	emit_table(("Successors", "Count:", "Percentage:"),
	succ_rows
	)

	def main():
	stats = gather_stats()
	opcode_stats = extract_opcode_stats(stats)
	total = get_total(opcode_stats)
	emit_execution_counts(opcode_stats, total)
	emit_pair_counts(opcode_stats, total)
	emit_specialization_stats(opcode_stats)
	emit_specialization_overview(opcode_stats, total)
	emit_call_stats(stats)
	emit_object_stats(stats)
	print("---")
	print("Stats gathered on:", date.today())

	if __name__ == "__main__":
	main()