| #!/usr/bin/python |
| """Combines results from multiple days of a single metric. |
| |
| Feed it the STATUS.txt files on stdin. It then finds the corresponding |
| results.csv, and takes the top N items. |
| |
| Example: |
| |
| Date, "google.com,", yahoo.com |
| 2015-03-01, 0.0, 0.9 |
| 2015-03-02, 0.1, 0.8 |
| |
| Dygraphs can load this CSV file directly. |
| |
| TODO: Use different dygraph API? |
| |
| Also we need error bars. |
| |
| new Dygraph(document.getElementById("graphdiv2"), |
| [ |
| [1,10,100], |
| [2,20,80], |
| [3,50,60], |
| [4,70,80] |
| ], |
| { |
| labels: [ "Date", "failure", "timeout", "google.com" ] |
| }); |
| """ |
| |
| import collections |
| import csv |
| import json |
| import os |
| import sys |
| |
| import util |
| |
| |
| def CombineDistResults(stdin, c_out, num_top): |
| dates = [] |
| var_cols = collections.defaultdict(dict) # {name: {date: value}} |
| |
| seen_dates = set() |
| |
| for line in stdin: |
| status_path = line.strip() |
| |
| # Assume it looks like .../2015-03-01/STATUS.txt |
| task_dir = os.path.dirname(status_path) |
| date = os.path.basename(task_dir) |
| |
| # Get rid of duplicate dates. These could be caused by retries. |
| if date in seen_dates: |
| continue |
| |
| seen_dates.add(date) |
| |
| with open(status_path) as f: |
| status = f.readline().split()[0] # OK, FAIL, TIMEOUT, SKIPPED |
| |
| dates.append(date) |
| |
| if status != 'OK': |
| continue # won't have results.csv |
| |
| results_path = os.path.join(task_dir, 'results.csv') |
| with open(results_path) as f: |
| c = csv.reader(f) |
| unused_header = c.next() # header row |
| |
| # they are sorted by decreasing "estimate", which is what we want |
| for i in xrange(0, num_top): |
| try: |
| row = c.next() |
| except StopIteration: |
| # It's OK if it doesn't have enough |
| util.log('Stopping early. Fewer than %d results to render.', num_top) |
| break |
| |
| string, _, _, proportion, _, prop_low, prop_high = row |
| |
| # dygraphs has a weird format with semicolons: |
| # value;lower;upper,value;lower;upper. |
| |
| # http://dygraphs.com/data.html#csv |
| |
| # Arbitrarily use 4 digits after decimal point (for dygraphs, not |
| # directly displayed) |
| dygraph_triple = '%.4f;%.4f;%.4f' % ( |
| float(prop_low), float(proportion), float(prop_high)) |
| |
| var_cols[string][date] = dygraph_triple |
| |
| # Now print CSV on stdout. |
| cols = sorted(var_cols.keys()) # sort columns alphabetically |
| c_out.writerow(['date'] + cols) |
| |
| dates.sort() |
| |
| for date in dates: |
| row = [date] |
| for col in cols: |
| cell = var_cols[col].get(date) # None mean sthere is no row |
| row.append(cell) |
| c_out.writerow(row) |
| |
| #util.log("Number of dynamic cols: %d", len(var_cols)) |
| |
| |
| def CombineAssocResults(stdin, c_out, num_top): |
| header = ('dummy',) |
| c_out.writerow(header) |
| |
| |
| def main(argv): |
| action = argv[1] |
| |
| if action == 'dist': |
| num_top = int(argv[2]) # number of values to keep |
| c_out = csv.writer(sys.stdout) |
| CombineDistResults(sys.stdin, c_out, num_top) |
| |
| elif action == 'assoc': |
| num_top = int(argv[2]) # number of values to keep |
| c_out = csv.writer(sys.stdout) |
| CombineAssocResults(sys.stdin, c_out, num_top) |
| |
| else: |
| raise RuntimeError('Invalid action %r' % action) |
| |
| |
| if __name__ == '__main__': |
| try: |
| main(sys.argv) |
| except RuntimeError, e: |
| print >>sys.stderr, 'FATAL: %s' % e |
| sys.exit(1) |