Add plots for statistical significance of measurements.
* This is useful when comparing benchmark runs.
* Inspired by [Statistics for Hackers](https://youtu.be/Iq9DzN6mvYA?t=800)
Change-Id: I4f2bfdf174263e1fa1542e115c0caabcafdb53b3
Test: `npm run-script dev`
diff --git a/development/plot-benchmarks/src/lib/App.svelte b/development/plot-benchmarks/src/lib/App.svelte
index ffa3e57..f1ace88 100644
--- a/development/plot-benchmarks/src/lib/App.svelte
+++ b/development/plot-benchmarks/src/lib/App.svelte
@@ -3,9 +3,17 @@
import { writable } from "svelte/store";
import type { FileMetadata } from "../types/files.js";
import Session from "./Session.svelte";
+ import { wrap } from "comlink";
+ import { StatService } from "../workers/service.js";
// Stores
let entries: Writable<FileMetadata[]> = writable([]);
+ const url = new URL("../workers/worker.ts", import.meta.url);
+ const service = wrap<StatService>(
+ new Worker(url, {
+ type: "module",
+ })
+ );
function onFilesChanged(event) {
const detail: FileMetadata[] = event.detail;
@@ -22,5 +30,5 @@
</details>
<div class="container">
- <Session fileEntries={$entries} on:entries={onFilesChanged} />
+ <Session fileEntries={$entries} {service} on:entries={onFilesChanged} />
</div>
diff --git a/development/plot-benchmarks/src/lib/Chart.svelte b/development/plot-benchmarks/src/lib/Chart.svelte
index 7a063a8..9a224e2 100644
--- a/development/plot-benchmarks/src/lib/Chart.svelte
+++ b/development/plot-benchmarks/src/lib/Chart.svelte
@@ -10,6 +10,7 @@
export let data: Data;
export let chartType: ChartType = "line";
+ export let isExperimental: boolean = false;
$: {
if ($chart) {
@@ -27,7 +28,9 @@
onMount(() => {
const onUpdate = (chart: Chart) => {
$chart = chart;
- $items = chart.options.plugins.legend.labels.generateLabels(chart);
+ // Bad typings.
+ const legend = chart.options.plugins.legend as any;
+ $items = legend.labels.generateLabels(chart);
};
const plugins = {
legend: {
@@ -65,7 +68,14 @@
⎘
</button>
</div>
- <canvas id="chart" class="chart" bind:this={element} />
+ <canvas class="chart" bind:this={element} />
+ {#if isExperimental}
+ <footer class="slim">
+ <section class="experimental">
+ <kbd>Experimental</kbd>
+ </section>
+ </footer>
+ {/if}
</article>
{#if $items}
@@ -89,4 +99,17 @@
border: none;
padding: 5px;
}
+
+ .slim {
+ margin-bottom: 0px;
+ padding: 0;
+ }
+
+ .experimental {
+ display: flex;
+ flex-direction: row;
+ flex-wrap: nowrap;
+ justify-content: center;
+ margin-bottom: 0px;
+ }
</style>
diff --git a/development/plot-benchmarks/src/lib/Dataset.svelte b/development/plot-benchmarks/src/lib/Dataset.svelte
index a19eff0..6e81cc0 100644
--- a/development/plot-benchmarks/src/lib/Dataset.svelte
+++ b/development/plot-benchmarks/src/lib/Dataset.svelte
@@ -1,14 +1,23 @@
<script lang="ts">
import { createEventDispatcher } from "svelte";
import { Session, type IndexedWrapper } from "../wrappers/session.js";
- import type { SelectionEvent, Selection } from "../types/events.js";
+ import type {
+ SelectionEvent,
+ Selection,
+ StatEvent,
+ StatInfo,
+ StatType,
+ } from "../types/events.js";
export let name: string;
export let datasetGroup: IndexedWrapper[];
+ // Dispatchers
+ let selectionDispatcher = createEventDispatcher<SelectionEvent>();
+ let statDispatcher = createEventDispatcher<StatEvent>();
// State
- let dispatcher = createEventDispatcher<SelectionEvent>();
let selected: boolean = true;
+ let compute: boolean = false;
let sources: Set<string>;
let sampledMetrics: Set<string>;
let metrics: Set<string>;
@@ -22,7 +31,21 @@
name: name,
enabled: selected,
};
- dispatcher("selections", [selection]);
+ selectionDispatcher("selections", [selection]);
+ };
+
+ let stat = function (type: StatType) {
+ return function (event: Event) {
+ event.stopPropagation();
+ const target = event.target as HTMLInputElement;
+ compute = target.checked;
+ const stat: StatInfo = {
+ name: name,
+ type: type,
+ enabled: compute
+ };
+ statDispatcher("info", [stat]);
+ };
};
$: {
@@ -45,16 +68,32 @@
<hgroup>
<div class="section">
<span class="item">{name}</span>
- <fieldset class="item">
- <label for="switch">
- <input
- type="checkbox"
- role="switch"
- checked={selected}
- on:change={selection}
- />
- </label>
- </fieldset>
+ <div class="item actions">
+ <fieldset>
+ <label for="switch">
+ Show
+ <input
+ type="checkbox"
+ role="switch"
+ checked={selected}
+ on:change={selection}
+ />
+ </label>
+ </fieldset>
+ {#if sources.size > 1}
+ <fieldset>
+ <label for="switch">
+ P
+ <input
+ type="checkbox"
+ role="switch"
+ checked={compute}
+ on:change={stat("p")}
+ />
+ </label>
+ </fieldset>
+ {/if}
+ </div>
</div>
<div class="details">
<div class="sources">
@@ -103,4 +142,14 @@
.section .item {
margin: 0px 10px;
}
+
+ .actions {
+ display: flex;
+ flex-direction: row;
+ justify-content: flex-end;
+ }
+
+ .actions fieldset {
+ margin-left: 5px;
+ }
</style>
diff --git a/development/plot-benchmarks/src/lib/Group.svelte b/development/plot-benchmarks/src/lib/Group.svelte
index e538e68..6dcee76 100644
--- a/development/plot-benchmarks/src/lib/Group.svelte
+++ b/development/plot-benchmarks/src/lib/Group.svelte
@@ -1,18 +1,27 @@
<script lang="ts">
import { createEventDispatcher } from "svelte";
- import type { Selection, SelectionEvent } from "../types/events.js";
+ import type {
+ Selection,
+ SelectionEvent,
+ StatEvent,
+ StatInfo,
+ } from "../types/events.js";
import { Session, type IndexedWrapper } from "../wrappers/session.js";
import Dataset from "./Dataset.svelte";
export let className: string;
export let datasetGroup: IndexedWrapper[];
- let dispatcher = createEventDispatcher<SelectionEvent>();
+ let selectionDispatcher = createEventDispatcher<SelectionEvent>();
+ let statDispatcher = createEventDispatcher<StatEvent>();
let datasetNames: Set<string>;
+ // Forward events.
let selection = function (event: CustomEvent<Selection[]>) {
- // Forward events.
- dispatcher("selections", event.detail);
+ selectionDispatcher("selections", event.detail);
+ };
+ let stat = function (event: CustomEvent<StatInfo[]>) {
+ statDispatcher("info", event.detail);
};
$: {
@@ -24,7 +33,7 @@
<summary>{className}</summary>
<div class="details">
{#each datasetNames as name (name)}
- <Dataset {datasetGroup} {name} on:selections={selection} />
+ <Dataset {datasetGroup} {name} on:selections={selection} on:info={stat} />
{/each}
</div>
</details>
diff --git a/development/plot-benchmarks/src/lib/Session.svelte b/development/plot-benchmarks/src/lib/Session.svelte
index b751f67..3a0a062 100644
--- a/development/plot-benchmarks/src/lib/Session.svelte
+++ b/development/plot-benchmarks/src/lib/Session.svelte
@@ -1,19 +1,31 @@
<script lang="ts">
import { createEventDispatcher } from "svelte";
- import { writable, type Writable } from "svelte/store";
+ import {
+ writable,
+ type Readable,
+ type Writable,
+ derived,
+ } from "svelte/store";
import { readBenchmarks } from "../files.js";
import { ChartDataTransforms } from "../transforms/data-transforms.js";
import { Transforms } from "../transforms/metric-transforms.js";
import { STANDARD_MAPPER } from "../transforms/standard-mappers.js";
import type { Data, Series } from "../types/chart.js";
import type { Metrics } from "../types/data.js";
- import type { FileMetadataEvent, Selection } from "../types/events.js";
+ import type {
+ FileMetadataEvent,
+ Selection,
+ StatInfo,
+ } from "../types/events.js";
import type { FileMetadata } from "../types/files.js";
import { Session, type IndexedWrapper } from "../wrappers/session.js";
import Chart from "./Chart.svelte";
import Group from "./Group.svelte";
+ import type { StatService } from "../workers/service.js";
+ import type { Remote } from "comlink";
export let fileEntries: FileMetadata[];
+ export let service: Remote<StatService>;
// State
let eventDispatcher = createEventDispatcher<FileMetadataEvent>();
@@ -23,13 +35,23 @@
let chartData: Data;
let classGroups: Record<string, IndexedWrapper[]>;
let size: number;
+ let activeSeries: Promise<Series[]>;
// Stores
let activeDragDrop: Writable<boolean> = writable(false);
let suppressed: Writable<Set<string>> = writable(new Set());
+ let activeStats: Writable<StatInfo[]> = writable([]);
+ let active: Readable<Set<string>> = derived(activeStats, ($activeStats) => {
+ const datasets = [];
+ for (let i = 0; i < $activeStats.length; i += 1) {
+ const activeStat = $activeStats[i];
+ datasets.push(activeStat.name);
+ }
+ return new Set(datasets);
+ });
// Events
- let handler = function (event: CustomEvent<Selection[]>) {
+ let selectionHandler = function (event: CustomEvent<Selection[]>) {
const selections: Selection[] = event.detail;
for (let i = 0; i < selections.length; i += 1) {
const selection = selections[i];
@@ -42,9 +64,28 @@
$suppressed = $suppressed;
};
+ let statHandler = function (event: CustomEvent<StatInfo[]>) {
+ const statistics = event.detail;
+ for (let i = 0; i < statistics.length; i += 1) {
+ const statInfo = statistics[i];
+ if (!statInfo.enabled) {
+ const index = $activeStats.findIndex(
+ (entry) => entry.name == statInfo.name && entry.type == statInfo.type
+ );
+ if (index >= 0) {
+ $activeStats.splice(index, 1);
+ }
+ } else {
+ $activeStats.push(statInfo);
+ }
+ $activeStats = $activeStats;
+ }
+ };
+
$: {
session = new Session(fileEntries);
metrics = Transforms.buildMetrics(session, $suppressed);
+ activeSeries = service.pSeries(metrics, $active);
series = ChartDataTransforms.mapToSeries(metrics, STANDARD_MAPPER);
chartData = ChartDataTransforms.mapToDataset(series);
classGroups = session.classGroups;
@@ -115,13 +156,26 @@
>
<h5>Benchmarks</h5>
{#each Object.entries(classGroups) as [className, wrappers]}
- <Group {className} datasetGroup={wrappers} on:selections={handler} />
+ <Group
+ {className}
+ datasetGroup={wrappers}
+ on:selections={selectionHandler}
+ on:info={statHandler}
+ />
{/each}
</article>
{#if series.length > 0}
<Chart data={chartData} />
{/if}
+
+ {#await activeSeries}
+ <article aria-busy="true" />
+ {:then chartData}
+ {#if chartData.length > 0}
+ <Chart data={ChartDataTransforms.mapToDataset(chartData)} isExperimental={true} />
+ {/if}
+ {/await}
{/if}
<style>
diff --git a/development/plot-benchmarks/src/transforms/metric-transforms.ts b/development/plot-benchmarks/src/transforms/metric-transforms.ts
index e35da44..571b4ae 100644
--- a/development/plot-benchmarks/src/transforms/metric-transforms.ts
+++ b/development/plot-benchmarks/src/transforms/metric-transforms.ts
@@ -19,7 +19,6 @@
const wrapper = wrappers[j];
const datasetName = wrappers[j].value.datasetName();
if (suppressed.has(datasetName)) {
- console.log(`Skipping suppressed dataset name ${datasetName}`, session);
continue;
}
const source = wrapper.source;
diff --git a/development/plot-benchmarks/src/transforms/standard-mappers.ts b/development/plot-benchmarks/src/transforms/standard-mappers.ts
index 8b928cd..6b3646a 100644
--- a/development/plot-benchmarks/src/transforms/standard-mappers.ts
+++ b/development/plot-benchmarks/src/transforms/standard-mappers.ts
@@ -10,7 +10,7 @@
for (let i = 0; i < entries.length; i += 1) {
const [source, chartData] = entries[i];
const label = labelFor(metric, source);
- const points = histogramPoints(chartData.values);
+ const [points, _, __] = histogramPoints(chartData.values);
series.push({
label: label,
type: "line",
@@ -43,20 +43,51 @@
return series;
}
-function histogramPoints(runs: number[][], buckets: number = 10): Point[] {
+export function histogramPoints(
+ runs: number[][],
+ buckets: number = 10,
+ target: number | null = null
+): [Point[], Point[] | null, number | null] {
const flattened = runs.flat();
// Default comparator coerces types to string !
flattened.sort((a, b) => a - b); // in-place
const min = flattened[0];
const max = flattened[flattened.length - 1];
+ let targetPoints: Point[] | null = null;
+ let pN: number = 0;
+ let maxFreq: number = 0;
const histogram = new Array(buckets).fill(0);
const slots = buckets - 1; // The actual number of slots in the histogram
for (let i = 0; i < flattened.length; i += 1) {
- let n = normalize(flattened[i], min, max);
- let index = Math.ceil(n * slots);
+ const value = flattened[i];
+ if (value >= target) {
+ pN += 1;
+ }
+ const n = normalize(value, min, max);
+ const index = Math.ceil(n * slots);
histogram[index] = histogram[index] + 1;
+ if (maxFreq < histogram[index]) {
+ maxFreq = histogram[index];
+ }
}
- return singlePoints(histogram);
+ if (target) {
+ const n = normalize(target, min, max);
+ const index = Math.ceil(n * slots);
+ targetPoints = selectPoints(buckets, index, maxFreq);
+ }
+ return [singlePoints(histogram), targetPoints, (pN / flattened.length)];
+}
+
+function selectPoints(buckets: number, index: number, target: number) {
+ const points: Point[] = [];
+ for (let i = 0; i < buckets; i += 1) {
+ const y = i == index ? target : 0;
+ points.push({
+ x: i + 1, // 1 based index
+ y: y
+ });
+ }
+ return points;
}
function singlePoints(runs: number[]): Point[] {
@@ -71,6 +102,15 @@
}
function normalize(n: number, min: number, max: number): number {
+ if (n < min || n > max) {
+ console.warn(`Warning n(${n}) is not in the range of (${min}, ${max})`);
+ if (n < min) {
+ n = min;
+ }
+ if (n > max) {
+ n = max;
+ }
+ }
return (n - min) / (max - min + 1e-5);
}
@@ -78,7 +118,11 @@
* Generates a series label.
*/
function labelFor<T>(metric: Metric<T>, source: string): string {
- return `${source}[${metric.class} ${metric.benchmark} ${metric.label}]`;
+ return `${source} {${metric.class}${metric.benchmark}} - ${metric.label}`;
+}
+
+export function datasetName(metric: Metric<any>): string {
+ return `${metric.class}_${metric.benchmark}`;
}
/**
diff --git a/development/plot-benchmarks/src/types/events.ts b/development/plot-benchmarks/src/types/events.ts
index 4dbe7d3b..ae8c3c7 100644
--- a/development/plot-benchmarks/src/types/events.ts
+++ b/development/plot-benchmarks/src/types/events.ts
@@ -1,7 +1,7 @@
import type { FileMetadata } from "./files.js";
export interface FileMetadataEvent {
- entries: Array<FileMetadata>;
+ entries: FileMetadata[];
}
export interface Selection {
@@ -9,5 +9,16 @@
enabled: boolean;
}
export interface SelectionEvent {
- selections: Array<Selection>;
+ selections: Selection[];
+}
+
+export type StatType = 'p';
+export interface StatInfo {
+ name: string;
+ type: StatType;
+ enabled: boolean;
+}
+
+export interface StatEvent {
+ info: StatInfo[];
}
diff --git a/development/plot-benchmarks/src/workers/service.ts b/development/plot-benchmarks/src/workers/service.ts
new file mode 100644
index 0000000..9ee37e0
--- /dev/null
+++ b/development/plot-benchmarks/src/workers/service.ts
@@ -0,0 +1,132 @@
+import { datasetName, histogramPoints } from "../transforms/standard-mappers.js";
+import type { Series } from "../types/chart.js";
+import type { ChartData, Metrics } from "../types/data.js";
+
+export class StatService {
+ pSeries(metrics: Metrics<number>, activeDatasets: Set<string>): Series[] {
+ if (activeDatasets.size <= 0) {
+ return [];
+ }
+
+ const series: Series[] = [];
+ const sampled = metrics.sampled;
+ if (sampled) {
+ for (let i = 0; i < sampled.length; i += 1) {
+ const metric = sampled[i];
+ const name = datasetName(metric);
+ if (activeDatasets.has(name)) {
+ const data: Record<string, ChartData<number[]>> = metric.data;
+ const entries = Object.entries(data);
+ const comparables: ChartData<number[]>[] = entries.map(entry => entry[1]);
+ if (comparables.length > 1) {
+ const reference = comparables[0];
+ for (let j = 1; j < comparables.length; j += 1) {
+ const target = comparables[j];
+ const [delta, distribution] = this.buildDistribution(reference, target);
+ const [points, pPlots, p] = histogramPoints([distribution], 20, delta);
+ series.push({
+ label: `${name} { ${metric.label} } - Likelihood`,
+ type: "line",
+ data: points,
+ options: {
+ tension: 0.3
+ }
+ });
+ if (pPlots && pPlots.length > 0) {
+ series.push({
+ label: `${name} { ${metric.label} } - { P = ${p} }`,
+ type: "bar",
+ data: pPlots,
+ options: {
+ tension: 0.01
+ }
+ });
+ }
+ }
+ }
+ }
+ }
+ }
+ return series;
+ }
+
+ private buildDistribution(
+ reference: ChartData<number[]>,
+ target: ChartData<number[]>,
+ N: number = 1_000
+ ): [number, number[]] {
+ // Compute delta mean
+ const referenceData = reference.values;
+ const targetData = target.values;
+ const referenceMedian = this.arrayMedian(referenceData);
+ const targetMedian = this.arrayMedian(targetData);
+ const deltaMedian = Math.abs(referenceMedian - targetMedian);
+ // Simulate
+ const rs = referenceData.length;
+ const ts = targetData.length;
+ const combined: number[][] = [...referenceData, ...targetData];
+ const medians = [];
+ for (let i = 0; i < N; i += 1) {
+ const [r, t] = this.shuffleSplit(combined, [rs, ts]);
+ const mr = this.arrayMedian(r);
+ const mt = this.arrayMedian(t);
+ medians.push(Math.abs(mr - mt));
+ }
+ return [deltaMedian, medians];
+ }
+
+ private shuffleSplit<T>(data: T[], sizes: number[]): T[][] {
+ const shuffled = this.shuffle(data);
+ const splits: T[][] = [];
+ let index = 0;
+ for (let i = 0; i < sizes.length; i += 1) {
+ const size = sizes[i];
+ let split: T[] = [];
+ for (let j = 0; j < size; j += 1) {
+ const k = index + j;
+ if (k < shuffled.length) {
+ split.push(shuffled[k]);
+ }
+ }
+ index += size;
+ splits.push(split);
+ }
+ return splits;
+ }
+
+ private arrayMedian(data: number[][]): number {
+ // We don't want to compute median of medians here.
+ // This is because while individual runs are correlated
+ // we can still look at the actual metrics in aggregate.
+ return this.median(data.flat());
+ }
+
+ private median(data: number[]): number {
+ const copy = [...data];
+ // Default comparator coerces types to string !
+ copy.sort((a, b) => a - b); // in-place
+ const length = copy.length;
+ const index = Math.trunc(length / 2);
+ return copy[index];
+ }
+
+ private shuffle<T>(data: T[], multiplier: number = 1): T[] {
+ if (data.length <= 0) {
+ return [];
+ }
+
+ let copy = [...data];
+ const count = copy.length * multiplier;
+ const slots = copy.length - 1;
+ for (let i = 0; i < count; i += 1) {
+ const sourceIndex = Math.ceil(Math.random() * slots);
+ const targetIndex = Math.ceil(Math.random() * slots);
+ let source = copy[sourceIndex];
+ let target = copy[targetIndex];
+ copy[sourceIndex] = target;
+ copy[targetIndex] = source;
+ }
+ return copy;
+ }
+
+}
diff --git a/development/plot-benchmarks/src/workers/worker.ts b/development/plot-benchmarks/src/workers/worker.ts
new file mode 100644
index 0000000..367e74c
--- /dev/null
+++ b/development/plot-benchmarks/src/workers/worker.ts
@@ -0,0 +1,10 @@
+/* Stub worker. */
+
+import { expose } from "comlink";
+import { StatService } from "./service.js";
+
+// This is always running in the context of a Web Worker.
+declare var self: Worker;
+
+const service = new StatService();
+expose(service, self);