| |
| def iter_clean_lines(lines): |
| lines = iter(lines) |
| for line in lines: |
| line = line.strip() |
| if line.startswith('# XXX'): |
| continue |
| yield line |
| |
| |
| def parse_table_lines(lines): |
| lines = iter_clean_lines(lines) |
| |
| for line in lines: |
| if line.startswith(('####', '#----')): |
| kind = 0 if line[1] == '#' else 1 |
| try: |
| line = next(lines).strip() |
| except StopIteration: |
| line = '' |
| if not line.startswith('# '): |
| raise NotImplementedError(line) |
| yield kind, line[2:].lstrip() |
| continue |
| |
| maybe = None |
| while line.startswith('#'): |
| if line != '#' and line[1] == ' ': |
| maybe = line[2:].lstrip() |
| try: |
| line = next(lines).strip() |
| except StopIteration: |
| return |
| if not line: |
| break |
| else: |
| if line: |
| if maybe: |
| yield 2, maybe |
| yield 'row', line |
| |
| |
| def iter_sections(lines): |
| header = None |
| section = [] |
| for kind, value in parse_table_lines(lines): |
| if kind == 'row': |
| if not section: |
| if header is None: |
| header = value |
| continue |
| raise NotImplementedError(value) |
| yield tuple(section), value |
| else: |
| if header is None: |
| header = False |
| section[kind:] = [value] |
| |
| |
| def collect_sections(lines): |
| sections = {} |
| for section, row in iter_sections(lines): |
| if section not in sections: |
| sections[section] = [row] |
| else: |
| sections[section].append(row) |
| return sections |
| |
| |
| def collate_sections(lines): |
| collated = {} |
| for section, rows in collect_sections(lines).items(): |
| parent = collated |
| current = () |
| for name in section: |
| current += (name,) |
| try: |
| child, secrows, totalrows = parent[name] |
| except KeyError: |
| child = {} |
| secrows = [] |
| totalrows = [] |
| parent[name] = (child, secrows, totalrows) |
| parent = child |
| if current == section: |
| secrows.extend(rows) |
| totalrows.extend(rows) |
| return collated |
| |
| |
| ############################# |
| # the commands |
| |
| def cmd_count_by_section(lines): |
| div = ' ' + '-' * 50 |
| total = 0 |
| def render_tree(root, depth=0): |
| nonlocal total |
| indent = ' ' * depth |
| for name, data in root.items(): |
| subroot, rows, totalrows = data |
| sectotal = f'({len(totalrows)})' if totalrows != rows else '' |
| count = len(rows) if rows else '' |
| if depth == 0: |
| yield div |
| yield f'{sectotal:>7} {count:>4} {indent}{name}' |
| yield from render_tree(subroot, depth+1) |
| total += len(rows) |
| sections = collate_sections(lines) |
| yield from render_tree(sections) |
| yield div |
| yield f'(total: {total})' |
| |
| |
| ############################# |
| # the script |
| |
| def parse_args(argv=None, prog=None): |
| import argparse |
| parser = argparse.ArgumentParser(prog=prog) |
| parser.add_argument('filename') |
| |
| args = parser.parse_args(argv) |
| ns = vars(args) |
| |
| return ns |
| |
| |
| def main(filename): |
| with open(filename) as infile: |
| for line in cmd_count_by_section(infile): |
| print(line) |
| |
| |
| if __name__ == '__main__': |
| kwargs = parse_args() |
| main(**kwargs) |