| # |
| # Copyright (C) 2013 The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| """Histogram generation tools.""" |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| |
| from collections import defaultdict |
| |
| from update_payload import format_utils |
| |
| |
| class Histogram(object): |
| """A histogram generating object. |
| |
| This object serves the sole purpose of formatting (key, val) pairs as an |
| ASCII histogram, including bars and percentage markers, and taking care of |
| label alignment, scaling, etc. In addition to the standard __init__ |
| interface, two static methods are provided for conveniently converting data |
| in different formats into a histogram. Histogram generation is exported via |
| its __str__ method, and looks as follows: |
| |
| Yes |################ | 5 (83.3%) |
| No |### | 1 (16.6%) |
| |
| TODO(garnold) we may want to add actual methods for adding data or tweaking |
| the output layout and formatting. For now, though, this is fine. |
| |
| """ |
| |
| def __init__(self, data, scale=20, formatter=None): |
| """Initialize a histogram object. |
| |
| Args: |
| data: list of (key, count) pairs constituting the histogram |
| scale: number of characters used to indicate 100% |
| formatter: function used for formatting raw histogram values |
| |
| """ |
| self.data = data |
| self.scale = scale |
| self.formatter = formatter or str |
| self.max_key_len = max([len(str(key)) for key, count in self.data]) |
| self.total = sum([count for key, count in self.data]) |
| |
| @staticmethod |
| def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): |
| """Takes a dictionary of counts and returns a histogram object. |
| |
| This simply converts a mapping from names to counts into a list of (key, |
| count) pairs, optionally translating keys into name strings, then |
| generating and returning a histogram for them. This is a useful convenience |
| call for clients that update a dictionary of counters as they (say) scan a |
| data stream. |
| |
| Args: |
| count_dict: dictionary mapping keys to occurrence counts |
| scale: number of characters used to indicate 100% |
| formatter: function used for formatting raw histogram values |
| key_names: dictionary mapping keys to name strings |
| Returns: |
| A histogram object based on the given data. |
| |
| """ |
| namer = None |
| if key_names: |
| namer = lambda key: key_names[key] |
| else: |
| namer = lambda key: key |
| |
| hist = [(namer(key), count) for key, count in count_dict.items()] |
| return Histogram(hist, scale, formatter) |
| |
| @staticmethod |
| def FromKeyList(key_list, scale=20, formatter=None, key_names=None): |
| """Takes a list of (possibly recurring) keys and returns a histogram object. |
| |
| This converts the list into a dictionary of counters, then uses |
| FromCountDict() to generate the actual histogram. For example: |
| |
| ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... |
| |
| Args: |
| key_list: list of (possibly recurring) keys |
| scale: number of characters used to indicate 100% |
| formatter: function used for formatting raw histogram values |
| key_names: dictionary mapping keys to name strings |
| Returns: |
| A histogram object based on the given data. |
| |
| """ |
| count_dict = defaultdict(int) # Unset items default to zero |
| for key in key_list: |
| count_dict[key] += 1 |
| return Histogram.FromCountDict(count_dict, scale, formatter, key_names) |
| |
| def __str__(self): |
| hist_lines = [] |
| hist_bar = '|' |
| for key, count in self.data: |
| if self.total: |
| bar_len = count * self.scale // self.total |
| hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) |
| |
| line = '%s %s %s' % ( |
| str(key).ljust(self.max_key_len), |
| hist_bar, |
| self.formatter(count)) |
| percent_str = format_utils.NumToPercent(count, self.total) |
| if percent_str: |
| line += ' (%s)' % percent_str |
| hist_lines.append(line) |
| |
| return '\n'.join(hist_lines) |
| |
| def GetKeys(self): |
| """Returns the keys of the histogram.""" |
| return [key for key, _ in self.data] |