| #!/usr/bin/python2 |
| """ |
| Postprocessing module for IOzone. It is capable to pick results from an |
| IOzone run, calculate the geometric mean for all throughput results for |
| a given file size or record size, and then generate a series of 2D and 3D |
| graphs. The graph generation functionality depends on gnuplot, and if it |
| is not present, functionality degrates gracefully. |
| |
| @copyright: Red Hat 2010 |
| """ |
| import os, sys, optparse, logging, math, time |
| import common |
| from autotest_lib.client.common_lib import logging_config, logging_manager |
| from autotest_lib.client.common_lib import error |
| from autotest_lib.client.bin import utils, os_dep |
| |
| |
| _LABELS = ['file_size', 'record_size', 'write', 'rewrite', 'read', 'reread', |
| 'randread', 'randwrite', 'bkwdread', 'recordrewrite', 'strideread', |
| 'fwrite', 'frewrite', 'fread', 'freread'] |
| |
| |
| def unique(list): |
| """ |
| Return a list of the elements in list, but without duplicates. |
| |
| @param list: List with values. |
| @return: List with non duplicate elements. |
| """ |
| n = len(list) |
| if n == 0: |
| return [] |
| u = {} |
| try: |
| for x in list: |
| u[x] = 1 |
| except TypeError: |
| return None |
| else: |
| return u.keys() |
| |
| |
| def geometric_mean(values): |
| """ |
| Evaluates the geometric mean for a list of numeric values. |
| |
| @param values: List with values. |
| @return: Single value representing the geometric mean for the list values. |
| @see: http://en.wikipedia.org/wiki/Geometric_mean |
| """ |
| try: |
| values = [int(value) for value in values] |
| except ValueError: |
| return None |
| product = 1 |
| n = len(values) |
| if n == 0: |
| return None |
| return math.exp(sum([math.log(x) for x in values])/n) |
| |
| |
| def compare_matrices(matrix1, matrix2, treshold=0.05): |
| """ |
| Compare 2 matrices nxm and return a matrix nxm with comparison data |
| |
| @param matrix1: Reference Matrix with numeric data |
| @param matrix2: Matrix that will be compared |
| @param treshold: Any difference bigger than this percent treshold will be |
| reported. |
| """ |
| improvements = 0 |
| regressions = 0 |
| same = 0 |
| comparison_matrix = [] |
| |
| new_matrix = [] |
| for line1, line2 in zip(matrix1, matrix2): |
| new_line = [] |
| for element1, element2 in zip(line1, line2): |
| ratio = float(element2) / float(element1) |
| if ratio < (1 - treshold): |
| regressions += 1 |
| new_line.append((100 * ratio - 1) - 100) |
| elif ratio > (1 + treshold): |
| improvements += 1 |
| new_line.append("+" + str((100 * ratio - 1) - 100)) |
| else: |
| same + 1 |
| if line1.index(element1) == 0: |
| new_line.append(element1) |
| else: |
| new_line.append(".") |
| new_matrix.append(new_line) |
| |
| total = improvements + regressions + same |
| |
| return (new_matrix, improvements, regressions, total) |
| |
| |
| class IOzoneAnalyzer(object): |
| """ |
| Analyze an unprocessed IOzone file, and generate the following types of |
| report: |
| |
| * Summary of throughput for all file and record sizes combined |
| * Summary of throughput for all file sizes |
| * Summary of throughput for all record sizes |
| |
| If more than one file is provided to the analyzer object, a comparison |
| between the two runs is made, searching for regressions in performance. |
| """ |
| def __init__(self, list_files, output_dir): |
| self.list_files = list_files |
| if not os.path.isdir(output_dir): |
| os.makedirs(output_dir) |
| self.output_dir = output_dir |
| logging.info("Results will be stored in %s", output_dir) |
| |
| |
| def average_performance(self, results, size=None): |
| """ |
| Flattens a list containing performance results. |
| |
| @param results: List of n lists containing data from performance runs. |
| @param size: Numerical value of a size (say, file_size) that was used |
| to filter the original results list. |
| @return: List with 1 list containing average data from the performance |
| run. |
| """ |
| average_line = [] |
| if size is not None: |
| average_line.append(size) |
| for i in range(2, 15): |
| average = geometric_mean([line[i] for line in results]) / 1024.0 |
| average = int(average) |
| average_line.append(average) |
| return average_line |
| |
| |
| def process_results(self, results, label=None): |
| """ |
| Process a list of IOzone results according to label. |
| |
| @label: IOzone column label that we'll use to filter and compute |
| geometric mean results, in practical term either 'file_size' |
| or 'record_size'. |
| @result: A list of n x m columns with original iozone results. |
| @return: A list of n-? x (m-1) columns with geometric averages for |
| values of each label (ex, average for all file_sizes). |
| """ |
| performance = [] |
| if label is not None: |
| index = _LABELS.index(label) |
| sizes = unique([line[index] for line in results]) |
| sizes.sort() |
| for size in sizes: |
| r_results = [line for line in results if line[index] == size] |
| performance.append(self.average_performance(r_results, size)) |
| else: |
| performance.append(self.average_performance(results)) |
| |
| return performance |
| |
| |
| def parse_file(self, file): |
| """ |
| Parse an IOzone results file. |
| |
| @param file: File object that will be parsed. |
| @return: Matrix containing IOzone results extracted from the file. |
| """ |
| lines = [] |
| for line in file.readlines(): |
| fields = line.split() |
| if len(fields) != 15: |
| continue |
| try: |
| lines.append([int(i) for i in fields]) |
| except ValueError: |
| continue |
| return lines |
| |
| |
| def report(self, overall_results, record_size_results, file_size_results): |
| """ |
| Generates analysis data for IOZone run. |
| |
| Generates a report to both logs (where it goes with nice headers) and |
| output files for further processing (graph generation). |
| |
| @param overall_results: 1x15 Matrix containing IOzone results for all |
| file sizes |
| @param record_size_results: nx15 Matrix containing IOzone results for |
| each record size tested. |
| @param file_size_results: nx15 Matrix containing file size results |
| for each file size tested. |
| """ |
| # Here we'll use the logging system to put the output of our analysis |
| # to files |
| logger = logging.getLogger() |
| formatter = logging.Formatter("") |
| |
| logging.info("") |
| logging.info("TABLE: SUMMARY of ALL FILE and RECORD SIZES Results in MB/sec") |
| logging.info("") |
| logging.info("FILE & RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE") |
| logging.info("SIZES (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") |
| logging.info("-------------------------------------------------------------------------------------------------------------------") |
| for result_line in overall_results: |
| logging.info("ALL %-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) |
| logging.info("") |
| |
| logging.info("DRILLED DATA:") |
| |
| logging.info("") |
| logging.info("TABLE: RECORD Size against all FILE Sizes Results in MB/sec") |
| logging.info("") |
| logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") |
| logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") |
| logging.info("--------------------------------------------------------------------------------------------------------------") |
| |
| foutput_path = os.path.join(self.output_dir, '2d-datasource-file') |
| if os.path.isfile(foutput_path): |
| os.unlink(foutput_path) |
| foutput = logging.FileHandler(foutput_path) |
| foutput.setFormatter(formatter) |
| logger.addHandler(foutput) |
| for result_line in record_size_results: |
| logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) |
| logger.removeHandler(foutput) |
| |
| logging.info("") |
| |
| logging.info("") |
| logging.info("TABLE: FILE Size against all RECORD Sizes Results in MB/sec") |
| logging.info("") |
| logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") |
| logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") |
| logging.info("--------------------------------------------------------------------------------------------------------------") |
| |
| routput_path = os.path.join(self.output_dir, '2d-datasource-record') |
| if os.path.isfile(routput_path): |
| os.unlink(routput_path) |
| routput = logging.FileHandler(routput_path) |
| routput.setFormatter(formatter) |
| logger.addHandler(routput) |
| for result_line in file_size_results: |
| logging.info("%-10s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s" % tuple(result_line)) |
| logger.removeHandler(routput) |
| |
| logging.info("") |
| |
| |
| def report_comparison(self, record, file): |
| """ |
| Generates comparison data for 2 IOZone runs. |
| |
| It compares 2 sets of nxm results and outputs a table with differences. |
| If a difference higher or smaller than 5% is found, a warning is |
| triggered. |
| |
| @param record: Tuple with 4 elements containing results for record size. |
| @param file: Tuple with 4 elements containing results for file size. |
| """ |
| (record_size, record_improvements, record_regressions, |
| record_total) = record |
| (file_size, file_improvements, file_regressions, |
| file_total) = file |
| logging.info("ANALYSIS of DRILLED DATA:") |
| |
| logging.info("") |
| logging.info("TABLE: RECsize Difference between runs Results are % DIFF") |
| logging.info("") |
| logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") |
| logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") |
| logging.info("--------------------------------------------------------------------------------------------------------------") |
| for result_line in record_size: |
| logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) |
| logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", |
| record_regressions, |
| (100 * record_regressions/float(record_total)), |
| record_improvements, |
| (100 * record_improvements/float(record_total))) |
| logging.info("") |
| |
| logging.info("") |
| logging.info("TABLE: FILEsize Difference between runs Results are % DIFF") |
| logging.info("") |
| logging.info("RECORD INIT RE RE RANDOM RANDOM BACKWD RECRE STRIDE F FRE F FRE ") |
| logging.info("SIZE (KB) WRITE WRITE READ READ READ WRITE READ WRITE READ WRITE WRITE READ READ") |
| logging.info("--------------------------------------------------------------------------------------------------------------") |
| for result_line in file_size: |
| logging.info("%-10s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s%-8.6s" % tuple(result_line)) |
| logging.info("REGRESSIONS: %d (%.2f%%) Improvements: %d (%.2f%%)", |
| file_regressions, |
| (100 * file_regressions/float(file_total)), |
| file_improvements, |
| (100 * file_improvements/float(file_total))) |
| logging.info("") |
| |
| |
| def analyze(self): |
| """ |
| Analyzes and eventually compares sets of IOzone data. |
| """ |
| overall = [] |
| record_size = [] |
| file_size = [] |
| for path in self.list_files: |
| file = open(path, 'r') |
| logging.info('FILE: %s', path) |
| |
| results = self.parse_file(file) |
| |
| overall_results = self.process_results(results) |
| record_size_results = self.process_results(results, 'record_size') |
| file_size_results = self.process_results(results, 'file_size') |
| self.report(overall_results, record_size_results, file_size_results) |
| |
| if len(self.list_files) == 2: |
| overall.append(overall_results) |
| record_size.append(record_size_results) |
| file_size.append(file_size_results) |
| |
| if len(self.list_files) == 2: |
| record_comparison = compare_matrices(*record_size) |
| file_comparison = compare_matrices(*file_size) |
| self.report_comparison(record_comparison, file_comparison) |
| |
| |
| class IOzonePlotter(object): |
| """ |
| Plots graphs based on the results of an IOzone run. |
| |
| Plots graphs based on the results of an IOzone run. Uses gnuplot to |
| generate the graphs. |
| """ |
| def __init__(self, results_file, output_dir): |
| self.active = True |
| try: |
| self.gnuplot = os_dep.command("gnuplot") |
| except: |
| logging.error("Command gnuplot not found, disabling graph " |
| "generation") |
| self.active = False |
| |
| if not os.path.isdir(output_dir): |
| os.makedirs(output_dir) |
| self.output_dir = output_dir |
| |
| if not os.path.isfile(results_file): |
| logging.error("Invalid file %s provided, disabling graph " |
| "generation", results_file) |
| self.active = False |
| self.results_file = None |
| else: |
| self.results_file = results_file |
| self.generate_data_source() |
| |
| |
| def generate_data_source(self): |
| """ |
| Creates data file without headers for gnuplot consumption. |
| """ |
| results_file = open(self.results_file, 'r') |
| self.datasource = os.path.join(self.output_dir, '3d-datasource') |
| datasource = open(self.datasource, 'w') |
| for line in results_file.readlines(): |
| fields = line.split() |
| if len(fields) != 15: |
| continue |
| try: |
| values = [int(i) for i in fields] |
| datasource.write(line) |
| except ValueError: |
| continue |
| datasource.close() |
| |
| |
| def plot_2d_graphs(self): |
| """ |
| For each one of the throughput parameters, generate a set of gnuplot |
| commands that will create a parametric surface with file size vs. |
| record size vs. throughput. |
| """ |
| datasource_2d = os.path.join(self.output_dir, '2d-datasource-file') |
| for index, label in zip(range(2, 15), _LABELS[2:]): |
| commands_path = os.path.join(self.output_dir, '2d-%s.do' % label) |
| commands = "" |
| commands += "set title 'Iozone performance: %s'\n" % label |
| commands += "set logscale x\n" |
| commands += "set xlabel 'File size (KB)'\n" |
| commands += "set ylabel 'Througput (MB/s)'\n" |
| commands += "set terminal png small size 450 350\n" |
| commands += "set output '%s'\n" % os.path.join(self.output_dir, |
| '2d-%s.png' % label) |
| commands += ("plot '%s' using 1:%s title '%s' with lines \n" % |
| (datasource_2d, index, label)) |
| commands_file = open(commands_path, 'w') |
| commands_file.write(commands) |
| commands_file.close() |
| try: |
| utils.system("%s %s" % (self.gnuplot, commands_path)) |
| except error.CmdError: |
| logging.error("Problem plotting from commands file %s", |
| commands_path) |
| |
| |
| def plot_3d_graphs(self): |
| """ |
| For each one of the throughput parameters, generate a set of gnuplot |
| commands that will create a parametric surface with file size vs. |
| record size vs. throughput. |
| """ |
| for index, label in zip(range(1, 14), _LABELS[2:]): |
| commands_path = os.path.join(self.output_dir, '%s.do' % label) |
| commands = "" |
| commands += "set title 'Iozone performance: %s'\n" % label |
| commands += "set grid lt 2 lw 1\n" |
| commands += "set surface\n" |
| commands += "set parametric\n" |
| commands += "set xtics\n" |
| commands += "set ytics\n" |
| commands += "set logscale x 2\n" |
| commands += "set logscale y 2\n" |
| commands += "set logscale z\n" |
| commands += "set xrange [2.**5:2.**24]\n" |
| commands += "set xlabel 'File size (KB)'\n" |
| commands += "set ylabel 'Record size (KB)'\n" |
| commands += "set zlabel 'Througput (KB/s)'\n" |
| commands += "set data style lines\n" |
| commands += "set dgrid3d 80,80, 3\n" |
| commands += "set terminal png small size 900 700\n" |
| commands += "set output '%s'\n" % os.path.join(self.output_dir, |
| '%s.png' % label) |
| commands += ("splot '%s' using 1:2:%s title '%s'\n" % |
| (self.datasource, index, label)) |
| commands_file = open(commands_path, 'w') |
| commands_file.write(commands) |
| commands_file.close() |
| try: |
| utils.system("%s %s" % (self.gnuplot, commands_path)) |
| except error.CmdError: |
| logging.error("Problem plotting from commands file %s", |
| commands_path) |
| |
| |
| def plot_all(self): |
| """ |
| Plot all graphs that are to be plotted, provided that we have gnuplot. |
| """ |
| if self.active: |
| self.plot_2d_graphs() |
| self.plot_3d_graphs() |
| |
| |
| class AnalyzerLoggingConfig(logging_config.LoggingConfig): |
| def configure_logging(self, results_dir=None, verbose=False): |
| super(AnalyzerLoggingConfig, self).configure_logging(use_console=True, |
| verbose=verbose) |
| |
| |
| if __name__ == "__main__": |
| parser = optparse.OptionParser("usage: %prog [options] [filenames]") |
| options, args = parser.parse_args() |
| |
| logging_manager.configure_logging(AnalyzerLoggingConfig()) |
| |
| if args: |
| filenames = args |
| else: |
| parser.print_help() |
| sys.exit(1) |
| |
| if len(args) > 2: |
| parser.print_help() |
| sys.exit(1) |
| |
| o = os.path.join(os.getcwd(), |
| "iozone-graphs-%s" % time.strftime('%Y-%m-%d-%H.%M.%S')) |
| if not os.path.isdir(o): |
| os.makedirs(o) |
| |
| a = IOzoneAnalyzer(list_files=filenames, output_dir=o) |
| a.analyze() |
| p = IOzonePlotter(results_file=filenames[0], output_dir=o) |
| p.plot_all() |