blob: 6cb01505b9c01040d95daa34f25b0ab56b2b8d92 [file] [log] [blame] [edit]
#!/usr/bin/python
"""Combines results from multiple days of a single metric.
Feed it the STATUS.txt files on stdin. It then finds the corresponding
results.csv, and takes the top N items.
Example:
Date, "google.com,", yahoo.com
2015-03-01, 0.0, 0.9
2015-03-02, 0.1, 0.8
Dygraphs can load this CSV file directly.
TODO: Use different dygraph API?
Also we need error bars.
new Dygraph(document.getElementById("graphdiv2"),
[
[1,10,100],
[2,20,80],
[3,50,60],
[4,70,80]
],
{
labels: [ "Date", "failure", "timeout", "google.com" ]
});
"""
import collections
import csv
import json
import os
import sys
import util
def CombineDistResults(stdin, c_out, num_top):
dates = []
var_cols = collections.defaultdict(dict) # {name: {date: value}}
seen_dates = set()
for line in stdin:
status_path = line.strip()
# Assume it looks like .../2015-03-01/STATUS.txt
task_dir = os.path.dirname(status_path)
date = os.path.basename(task_dir)
# Get rid of duplicate dates. These could be caused by retries.
if date in seen_dates:
continue
seen_dates.add(date)
with open(status_path) as f:
status = f.readline().split()[0] # OK, FAIL, TIMEOUT, SKIPPED
dates.append(date)
if status != 'OK':
continue # won't have results.csv
results_path = os.path.join(task_dir, 'results.csv')
with open(results_path) as f:
c = csv.reader(f)
unused_header = c.next() # header row
# they are sorted by decreasing "estimate", which is what we want
for i in xrange(0, num_top):
try:
row = c.next()
except StopIteration:
# It's OK if it doesn't have enough
util.log('Stopping early. Fewer than %d results to render.', num_top)
break
string, _, _, proportion, _, prop_low, prop_high = row
# dygraphs has a weird format with semicolons:
# value;lower;upper,value;lower;upper.
# http://dygraphs.com/data.html#csv
# Arbitrarily use 4 digits after decimal point (for dygraphs, not
# directly displayed)
dygraph_triple = '%.4f;%.4f;%.4f' % (
float(prop_low), float(proportion), float(prop_high))
var_cols[string][date] = dygraph_triple
# Now print CSV on stdout.
cols = sorted(var_cols.keys()) # sort columns alphabetically
c_out.writerow(['date'] + cols)
dates.sort()
for date in dates:
row = [date]
for col in cols:
cell = var_cols[col].get(date) # None mean sthere is no row
row.append(cell)
c_out.writerow(row)
#util.log("Number of dynamic cols: %d", len(var_cols))
def CombineAssocResults(stdin, c_out, num_top):
header = ('dummy',)
c_out.writerow(header)
def main(argv):
action = argv[1]
if action == 'dist':
num_top = int(argv[2]) # number of values to keep
c_out = csv.writer(sys.stdout)
CombineDistResults(sys.stdin, c_out, num_top)
elif action == 'assoc':
num_top = int(argv[2]) # number of values to keep
c_out = csv.writer(sys.stdout)
CombineAssocResults(sys.stdin, c_out, num_top)
else:
raise RuntimeError('Invalid action %r' % action)
if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError, e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)