blob: 3f47e5b018b21bf50c6d9f3e3b2a7cee3c8034de [file] [log] [blame]
# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import annotations
import json
import logging
import math
from math import floor, log10
from typing import (TYPE_CHECKING, Any, Callable, Dict, Hashable, Iterable,
List, Optional, Sequence, Set, Tuple, Union)
from crossbench.probes import helper
if TYPE_CHECKING:
from crossbench.path import LocalPath
from crossbench.types import Json, JsonDict
def format_metric(value: Union[float, int],
stddev: Optional[float] = None) -> str:
"""Format value and stdev to only expose significant + 1 digits.
Example outputs:
100 ± 10%
100.1 ± 1.2%
100.12 ± 0.12%
100.123 ± 0.012%
100.1235 ± 0.0012%
"""
if not stddev:
return str(value)
stddev = float(stddev)
stddev_significant_digit = int(floor(log10(abs(stddev))))
value_width = max(0, 1 - stddev_significant_digit)
percent = stddev / value * 100
percent_significant_digit = int(floor(log10(abs(percent))))
percent_width = max(0, 1 - percent_significant_digit)
return f"{value:.{value_width}f} ± {percent:.{percent_width}f}%"
def is_number(value: Any) -> bool:
return isinstance(value, (int, float))
class Metric:
"""
Metric provides simple statistical getters if the collected values are
ints or floats only.
"""
@classmethod
def from_json(cls, json_data: JsonDict) -> Metric:
values = json_data["values"]
assert isinstance(values, list)
return cls(values)
def __init__(self, values: Optional[List] = None) -> None:
self.values = values or []
self._is_numeric: bool = all(map(is_number, self.values))
def __len__(self) -> int:
return len(self.values)
@property
def is_numeric(self) -> bool:
return self._is_numeric
@property
def min(self) -> float:
assert self._is_numeric
return min(self.values)
@property
def max(self) -> float:
assert self._is_numeric
return max(self.values)
@property
def sum(self) -> float:
assert self._is_numeric
return sum(self.values)
@property
def average(self) -> float:
assert self._is_numeric
return sum(self.values) / len(self.values)
@property
def geomean(self) -> float:
assert self._is_numeric
return geomean(self.values)
@property
def stddev(self) -> float:
assert self._is_numeric
# We're ignoring here any actual distribution of the data and use this as a
# rough estimate of the quality of the data
average = self.average
variance = 0.0
for value in self.values:
variance += (average - value)**2
variance /= len(self.values)
return math.sqrt(variance)
def append(self, value: Any) -> None:
self.values.append(value)
self._is_numeric = self._is_numeric and is_number(value)
def to_json(self) -> JsonDict:
json_data: JsonDict = {"values": self.values}
if not self.values:
return json_data
if self.is_numeric:
json_data["min"] = self.min
average = json_data["average"] = self.average
json_data["geomean"] = self.geomean
json_data["max"] = self.max
json_data["sum"] = self.sum
stddev = json_data["stddev"] = self.stddev
if average == 0:
json_data["stddevPercent"] = 0
else:
json_data["stddevPercent"] = (stddev / average) * 100
return json_data
# Try to simplify repeated non-numeric values
if not isinstance(self.values[0], Hashable):
return json_data
if len(set(self.values)) == 1:
return self.values[0]
return json_data
def geomean(values: Iterable[Union[int, float]]) -> float:
product: float = 1
length: int = 0
for value in values:
product *= value
length += 1
return product**(1 / length)
def metric_geomean(metric: Metric) -> float:
return metric.geomean
class MetricsMerger:
"""
Merges hierarchical data into 1-level aggregated data;
Input:
data_1 ={
"a": {
"aa": 1.1,
"ab": 2
}
"b": 2.1
}
data_2 = {
"a": {
"aa": 1.2
}
"b": 2.2,
"c": 2
}
The merged data maps str => Metric():
MetricsMerger(data_1, data_2).data == {
"a/aa": Metric(1.1, 1.2)
"a/ab": Metric(2)
"b": Metric(2.1, 2.2)
"c": Metric(2)
}
"""
@classmethod
def merge_json_list(cls,
files: Iterable[LocalPath],
key_fn: Optional[helper.KeyFnType] = None,
merge_duplicate_paths: bool = False) -> MetricsMerger:
merger = cls(key_fn=key_fn)
for file in files:
with file.open(encoding="utf-8") as f:
merger.merge_values(
json.load(f), merge_duplicate_paths=merge_duplicate_paths)
return merger
def __init__(self,
*args: Union[Dict, List[Dict]],
key_fn: Optional[helper.KeyFnType] = None):
"""Create a new MetricsMerger
Args:
*args (optional): Optional hierarchical data to be merged.
key_fn (optional): Maps property paths (Tuple[str,...]) to strings used
as keys to group/merge values, or None to skip property paths.
"""
self._data: Dict[str, Metric] = {}
self._key_fn: helper.KeyFnType = key_fn or helper._default_flatten_key_fn
self._ignored_keys: Set[str] = set()
for data in args:
self.add(data)
@property
def data(self) -> Dict[str, Metric]:
return self._data
def merge_values(self,
data: Dict[str, Dict],
prefix_path: Tuple[str, ...] = (),
merge_duplicate_paths: bool = False) -> None:
"""Merge a previously json-serialized MetricsMerger object"""
for property_name, item in data.items():
path = prefix_path + (property_name,)
key = self._key_fn(path)
if key is None or key in self._ignored_keys:
continue
if key in self._data:
if merge_duplicate_paths:
values = self._data[key]
for value in item["values"]:
values.append(value)
else:
logging.debug(
"Removing Metric with the same key-path='%s', key='%s"
"from multiple files.", path, key)
del self._data[key]
self._ignored_keys.add(key)
else:
self._data[key] = Metric.from_json(item)
def add(self, data: Union[Dict, List[Dict]]) -> None:
""" Merge "arbitrary" hierarchical data that ends up having primitive leafs.
Anything that is not a dict is considered a leaf node.
"""
if isinstance(data, list):
# Assume that top-level lists are repetitions of the same data
for item in data:
self._merge(item)
else:
self._merge(data)
def _merge(
self, data: Union[Dict,
List[Dict]], parent_path: Tuple[str, ...] = ()) -> None:
assert isinstance(data, dict)
for property_name, value in data.items():
path = parent_path + (property_name,)
key: Optional[str] = self._key_fn(path)
if key is None:
continue
if isinstance(value, dict):
self._merge(value, path)
else:
if key in self._data:
values = self._data[key]
else:
values = self._data[key] = Metric()
if isinstance(value, list):
for v in value:
values.append(v)
else:
values.append(value)
def to_json(self,
value_fn: Optional[Callable[[Any], Json]] = None,
sort: bool = True) -> JsonDict:
items = []
for key, value in self._data.items():
assert isinstance(value, Metric)
if value_fn is None:
json_value: Json = value.to_json()
else:
json_value = value_fn(value)
items.append((key, json_value))
if sort:
# Make sure the data is always in the same order, independent of the input
# order
items.sort()
return dict(items)
class CSVFormatter:
"""
Headers: [
["label_1", "value_1"],
["label_2", "value_2"],
]
Input: {
"A_1/B_1/Async": 1,
"A_1/B_2/Sync": 2,
"A_1/Total": 3,
"Total": 3,
}
Output: [
["label_1", "", "", "", "value_1],
["label_2", "", "", "", "value_2],
["A_1/B1/Async", "A1", "B1", "Async", 1],
["A_1/B2/Sync", "A1", "B2", "Sync", 2],
["A_1/Total", "A1", "Total", "", 3],
["Total" "Total", "", "", 3],
]
"""
def __init__(self,
metrics: MetricsMerger,
value_fn: Optional[Callable[[Any], Any]] = None,
headers: Sequence[Tuple[Any, ...]] = (),
include_parts: bool = True,
sort: bool = True):
self._table: List[Sequence[Any]] = []
converted = metrics.to_json(value_fn, sort)
items = self.format_items(converted, sort=sort)
max_path_depth: int = self.extract_max_depth(items, include_parts)
self.append_headers(headers, max_path_depth)
self.append_body(items, include_parts, max_path_depth)
def extract_max_depth(self, items: Sequence[Tuple[str, Json]],
include_parts: bool) -> int:
max_path_depth = 0
if include_parts:
for path, _ in items:
max_path_depth = max(max_path_depth, path.count("/"))
max_path_depth += 1
return max_path_depth
def append_headers(self, headers, max_path_depth: int) -> None:
header_padding = ("",) * max_path_depth
for header in headers:
assert isinstance(header, tuple), (
f"Additional CSV headers must be tuples, got {type(header)}: "
f"{header}")
row = header[:1] + header_padding + header[1:]
self._table.append(row)
def append_body(self, items: Sequence[Tuple[str, Json]], include_parts: bool,
max_path_depth: int) -> None:
for path, value in items:
if include_parts:
parts = tuple(path.split("/"))
buffer = ("",) * (max_path_depth - len(parts))
row = (path,) + parts + buffer + (value,)
else:
row = (path, value)
self._table.append(row)
def format_items(self, data: Dict[str, Json],
sort: bool) -> Sequence[Tuple[str, Json]]:
items = tuple(data.items())
if not sort:
return items
return sorted(items)
@property
def table(self) -> List[Sequence[Any]]:
return self._table