library/test/src/bench.rs - toolchain/rustc - Git at Google

 //! Benchmarking module.

 use std::panic::{AssertUnwindSafe, catch_unwind};
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 use std::{cmp, io};

 use super::Sender;
 use super::event::CompletedTest;
 use super::options::BenchMode;
 use super::test_result::TestResult;
 use super::types::{TestDesc, TestId};
 use crate::stats;

 /// An identity function that *__hints__* to the compiler to be maximally pessimistic about what
 /// `black_box` could do.
 ///
 /// See [`std::hint::black_box`] for details.
 #[inline(always)]
 pub fn black_box<T>(dummy: T) -> T {
     std::hint::black_box(dummy)
 }

 /// Manager of the benchmarking runs.
 ///
 /// This is fed into functions marked with `#[bench]` to allow for
 /// set-up & tear-down before running a piece of code repeatedly via a
 /// call to `iter`.
 #[derive(Clone)]
 pub struct Bencher {
     mode: BenchMode,
     summary: Option<stats::Summary>,
     pub bytes: u64,
 }

 impl Bencher {
     /// Callback for benchmark functions to run in their body.
     pub fn iter<T, F>(&mut self, mut inner: F)
     where
         F: FnMut() -> T,
     {
         if self.mode == BenchMode::Single {
             ns_iter_inner(&mut inner, 1);
             return;
         }

         self.summary = Some(iter(&mut inner));
     }

     pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String>
     where
         F: FnMut(&mut Bencher) -> Result<(), String>,
     {
         let result = f(self);
         result.map(|_| self.summary)
     }
 }

 #[derive(Debug, Clone, PartialEq)]
 pub struct BenchSamples {
     pub ns_iter_summ: stats::Summary,
     pub mb_s: usize,
 }

 pub fn fmt_bench_samples(bs: &BenchSamples) -> String {
     use std::fmt::Write;
     let mut output = String::new();

     let median = bs.ns_iter_summ.median;
     let deviation = bs.ns_iter_summ.max - bs.ns_iter_summ.min;

     write!(
         output,
         "{:>14} ns/iter (+/- {})",
         fmt_thousands_sep(median, ','),
         fmt_thousands_sep(deviation, ',')
     )
     .unwrap();
     if bs.mb_s != 0 {
         write!(output, " = {} MB/s", bs.mb_s).unwrap();
     }
     output
 }

 // Format a number with thousands separators
 fn fmt_thousands_sep(mut n: f64, sep: char) -> String {
     use std::fmt::Write;
     let mut output = String::new();
     let mut trailing = false;
     for &pow in &[9, 6, 3, 0] {
         let base = 10_usize.pow(pow);
         if pow == 0 || trailing || n / base as f64 >= 1.0 {
             match (pow, trailing) {
                 // modern CPUs can execute multiple instructions per nanosecond
                 // e.g. benching an ADD takes about 0.25ns.
                 (0, true) => write!(output, "{:06.2}", n / base as f64).unwrap(),
                 (0, false) => write!(output, "{:.2}", n / base as f64).unwrap(),
                 (_, true) => write!(output, "{:03}", n as usize / base).unwrap(),
                 _ => write!(output, "{}", n as usize / base).unwrap(),
             }
             if pow != 0 {
                 output.push(sep);
             }
             trailing = true;
         }
         n %= base as f64;
     }

     output
 }

 fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64
 where
     F: FnMut() -> T,
 {
     let start = Instant::now();
     for _ in 0..k {
         black_box(inner());
     }
     start.elapsed().as_nanos() as u64
 }

 pub fn iter<T, F>(inner: &mut F) -> stats::Summary
 where
     F: FnMut() -> T,
 {
     // Initial bench run to get ballpark figure.
     let ns_single = ns_iter_inner(inner, 1);

     // Try to estimate iter count for 1ms falling back to 1m
     // iterations if first run took < 1ns.
     let ns_target_total = 1_000_000; // 1ms
     let mut n = ns_target_total / cmp::max(1, ns_single);

     // if the first run took more than 1ms we don't want to just
     // be left doing 0 iterations on every loop. The unfortunate
     // side effect of not being able to do as many runs is
     // automatically handled by the statistical analysis below
     // (i.e., larger error bars).
     n = cmp::max(1, n);

     let mut total_run = Duration::new(0, 0);
     let samples: &mut [f64] = &mut [0.0_f64; 50];
     loop {
         let loop_start = Instant::now();

         for p in &mut *samples {
             *p = ns_iter_inner(inner, n) as f64 / n as f64;
         }

         stats::winsorize(samples, 5.0);
         let summ = stats::Summary::new(samples);

         for p in &mut *samples {
             let ns = ns_iter_inner(inner, 5 * n);
             *p = ns as f64 / (5 * n) as f64;
         }

         stats::winsorize(samples, 5.0);
         let summ5 = stats::Summary::new(samples);

         let loop_run = loop_start.elapsed();

         // If we've run for 100ms and seem to have converged to a
         // stable median.
         if loop_run > Duration::from_millis(100)
             && summ.median_abs_dev_pct < 1.0
             && summ.median - summ5.median < summ5.median_abs_dev
         {
             return summ5;
         }

         total_run += loop_run;
         // Longest we ever run for is 3s.
         if total_run > Duration::from_secs(3) {
             return summ5;
         }

         // If we overflow here just return the results so far. We check a
         // multiplier of 10 because we're about to multiply by 2 and the
         // next iteration of the loop will also multiply by 5 (to calculate
         // the summ5 result)
         n = match n.checked_mul(10) {
             Some(_) => n * 2,
             None => {
                 return summ5;
             }
         };
     }
 }

 pub fn benchmark<F>(
     id: TestId,
     desc: TestDesc,
     monitor_ch: Sender<CompletedTest>,
     nocapture: bool,
     f: F,
 ) where
     F: FnMut(&mut Bencher) -> Result<(), String>,
 {
     let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 };

     let data = Arc::new(Mutex::new(Vec::new()));

     if !nocapture {
         io::set_output_capture(Some(data.clone()));
     }

     let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f)));

     io::set_output_capture(None);

     let test_result = match result {
         //bs.bench(f) {
         Ok(Ok(Some(ns_iter_summ))) => {
             let ns_iter = cmp::max(ns_iter_summ.median as u64, 1);
             let mb_s = bs.bytes * 1000 / ns_iter;

             let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize };
             TestResult::TrBench(bs)
         }
         Ok(Ok(None)) => {
             // iter not called, so no data.
             // FIXME: error in this case?
             let samples: &mut [f64] = &mut [0.0_f64; 1];
             let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 };
             TestResult::TrBench(bs)
         }
         Err(_) => TestResult::TrFailed,
         Ok(Err(_)) => TestResult::TrFailed,
     };

     let stdout = data.lock().unwrap().to_vec();
     let message = CompletedTest::new(id, desc, test_result, None, stdout);
     monitor_ch.send(message).unwrap();
 }

 pub fn run_once<F>(f: F) -> Result<(), String>
 where
     F: FnMut(&mut Bencher) -> Result<(), String>,
 {
     let mut bs = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 };
     bs.bench(f).map(|_| ())
 }
	//! Benchmarking module.

	use std::panic::{AssertUnwindSafe, catch_unwind};
	use std::sync::{Arc, Mutex};
	use std::time::{Duration, Instant};
	use std::{cmp, io};

	use super::Sender;
	use super::event::CompletedTest;
	use super::options::BenchMode;
	use super::test_result::TestResult;
	use super::types::{TestDesc, TestId};
	use crate::stats;

	/// An identity function that __hints__ to the compiler to be maximally pessimistic about what
	/// `black_box` could do.
	///
	/// See [`std::hint::black_box`] for details.
	#[inline(always)]
	pub fn black_box<T>(dummy: T) -> T {
	std::hint::black_box(dummy)
	}

	/// Manager of the benchmarking runs.
	///
	/// This is fed into functions marked with `#[bench]` to allow for
	/// set-up & tear-down before running a piece of code repeatedly via a
	/// call to `iter`.
	#[derive(Clone)]
	pub struct Bencher {
	mode: BenchMode,
	summary: Option<stats::Summary>,
	pub bytes: u64,
	}

	impl Bencher {
	/// Callback for benchmark functions to run in their body.
	pub fn iter<T, F>(&mut self, mut inner: F)
	where
	F: FnMut() -> T,
	{
	if self.mode == BenchMode::Single {
	ns_iter_inner(&mut inner, 1);
	return;
	}

	self.summary = Some(iter(&mut inner));
	}

	pub fn bench<F>(&mut self, mut f: F) -> Result<Option<stats::Summary>, String>
	where
	F: FnMut(&mut Bencher) -> Result<(), String>,
	{
	let result = f(self);
	result.map(\|_\| self.summary)
	}
	}

	#[derive(Debug, Clone, PartialEq)]
	pub struct BenchSamples {
	pub ns_iter_summ: stats::Summary,
	pub mb_s: usize,
	}

	pub fn fmt_bench_samples(bs: &BenchSamples) -> String {
	use std::fmt::Write;
	let mut output = String::new();

	let median = bs.ns_iter_summ.median;
	let deviation = bs.ns_iter_summ.max - bs.ns_iter_summ.min;

	write!(
	output,
	"{:>14} ns/iter (+/- {})",
	fmt_thousands_sep(median, ','),
	fmt_thousands_sep(deviation, ',')
	)
	.unwrap();
	if bs.mb_s != 0 {
	write!(output, " = {} MB/s", bs.mb_s).unwrap();
	}
	output
	}

	// Format a number with thousands separators
	fn fmt_thousands_sep(mut n: f64, sep: char) -> String {
	use std::fmt::Write;
	let mut output = String::new();
	let mut trailing = false;
	for &pow in &[9, 6, 3, 0] {
	let base = 10_usize.pow(pow);
	if pow == 0 \|\| trailing \|\| n / base as f64 >= 1.0 {
	match (pow, trailing) {
	// modern CPUs can execute multiple instructions per nanosecond
	// e.g. benching an ADD takes about 0.25ns.
	(0, true) => write!(output, "{:06.2}", n / base as f64).unwrap(),
	(0, false) => write!(output, "{:.2}", n / base as f64).unwrap(),
	(_, true) => write!(output, "{:03}", n as usize / base).unwrap(),
	_ => write!(output, "{}", n as usize / base).unwrap(),
	}
	if pow != 0 {
	output.push(sep);
	}
	trailing = true;
	}
	n %= base as f64;
	}

	output
	}

	fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64
	where
	F: FnMut() -> T,
	{
	let start = Instant::now();
	for _ in 0..k {
	black_box(inner());
	}
	start.elapsed().as_nanos() as u64
	}

	pub fn iter<T, F>(inner: &mut F) -> stats::Summary
	where
	F: FnMut() -> T,
	{
	// Initial bench run to get ballpark figure.
	let ns_single = ns_iter_inner(inner, 1);

	// Try to estimate iter count for 1ms falling back to 1m
	// iterations if first run took < 1ns.
	let ns_target_total = 1_000_000; // 1ms
	let mut n = ns_target_total / cmp::max(1, ns_single);

	// if the first run took more than 1ms we don't want to just
	// be left doing 0 iterations on every loop. The unfortunate
	// side effect of not being able to do as many runs is
	// automatically handled by the statistical analysis below
	// (i.e., larger error bars).
	n = cmp::max(1, n);

	let mut total_run = Duration::new(0, 0);
	let samples: &mut [f64] = &mut [0.0_f64; 50];
	loop {
	let loop_start = Instant::now();

	for p in &mut *samples {
	*p = ns_iter_inner(inner, n) as f64 / n as f64;
	}

	stats::winsorize(samples, 5.0);
	let summ = stats::Summary::new(samples);

	for p in &mut *samples {
	let ns = ns_iter_inner(inner, 5 * n);
	p = ns as f64 / (5 n) as f64;
	}

	stats::winsorize(samples, 5.0);
	let summ5 = stats::Summary::new(samples);

	let loop_run = loop_start.elapsed();

	// If we've run for 100ms and seem to have converged to a
	// stable median.
	if loop_run > Duration::from_millis(100)
	&& summ.median_abs_dev_pct < 1.0
	&& summ.median - summ5.median < summ5.median_abs_dev
	{
	return summ5;
	}

	total_run += loop_run;
	// Longest we ever run for is 3s.
	if total_run > Duration::from_secs(3) {
	return summ5;
	}

	// If we overflow here just return the results so far. We check a
	// multiplier of 10 because we're about to multiply by 2 and the
	// next iteration of the loop will also multiply by 5 (to calculate
	// the summ5 result)
	n = match n.checked_mul(10) {
	Some(_) => n * 2,
	None => {
	return summ5;
	}
	};
	}
	}

	pub fn benchmark<F>(
	id: TestId,
	desc: TestDesc,
	monitor_ch: Sender<CompletedTest>,
	nocapture: bool,
	f: F,
	) where
	F: FnMut(&mut Bencher) -> Result<(), String>,
	{
	let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 };

	let data = Arc::new(Mutex::new(Vec::new()));

	if !nocapture {
	io::set_output_capture(Some(data.clone()));
	}

	let result = catch_unwind(AssertUnwindSafe(\|\| bs.bench(f)));

	io::set_output_capture(None);

	let test_result = match result {
	//bs.bench(f) {
	Ok(Ok(Some(ns_iter_summ))) => {
	let ns_iter = cmp::max(ns_iter_summ.median as u64, 1);
	let mb_s = bs.bytes * 1000 / ns_iter;

	let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize };
	TestResult::TrBench(bs)
	}
	Ok(Ok(None)) => {
	// iter not called, so no data.
	// FIXME: error in this case?
	let samples: &mut [f64] = &mut [0.0_f64; 1];
	let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 };
	TestResult::TrBench(bs)
	}
	Err(_) => TestResult::TrFailed,
	Ok(Err(_)) => TestResult::TrFailed,
	};

	let stdout = data.lock().unwrap().to_vec();
	let message = CompletedTest::new(id, desc, test_result, None, stdout);
	monitor_ch.send(message).unwrap();
	}

	pub fn run_once<F>(f: F) -> Result<(), String>
	where
	F: FnMut(&mut Bencher) -> Result<(), String>,
	{
	let mut bs = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 };
	bs.bench(f).map(\|_\| ())
	}