| #!/usr/bin/python |
| # Copyright 2017 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ |
| This is a utility to build a summary of the given directory. and save to a json |
| file. |
| |
| usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB] |
| |
| optional arguments: |
| -p PATH Path to build directory summary. |
| -m MAX_SIZE_KB Maximum result size in KB. Set to 0 to disable result |
| throttling. |
| |
| The content of the json file looks like: |
| {'default': {'/D': [{'control': {'/S': 734}}, |
| {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}}, |
| {'client.0.ERROR': {'/S': 254}}, |
| {'client.0.INFO': {'/S': 1020}}, |
| {'client.0.WARNING': {'/S': 242}}], |
| '/S': 7214}} |
| ], |
| '/S': 7948 |
| } |
| } |
| """ |
| |
| import argparse |
| import copy |
| import fnmatch |
| import glob |
| import json |
| import logging |
| import os |
| import random |
| import sys |
| import time |
| import traceback |
| |
| import dedupe_file_throttler |
| import delete_file_throttler |
| import result_info |
| import shrink_file_throttler |
| import throttler_lib |
| import utils_lib |
| import zip_file_throttler |
| |
| |
| # Do NOT import autotest_lib modules here. This module can be executed without |
| # dependency on other autotest modules. This is to keep the logic of result |
| # trimming on the server side, instead of depending on the autotest client |
| # module. |
| |
| DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json' |
| SUMMARY_FILE_PATTERN = 'dir_summary_*.json' |
| MERGED_SUMMARY_FILENAME = 'dir_summary_final.json' |
| |
| # Minimum disk space should be available after saving the summary file. |
| MIN_FREE_DISK_BYTES = 10 * 1024 * 1024 |
| |
| # Autotest uses some state files to track process running state. The files are |
| # deleted from test results. Therefore, these files can be ignored. |
| FILES_TO_IGNORE = set([ |
| 'control.autoserv.state' |
| ]) |
| |
| # Smallest file size to shrink to. |
| MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024 |
| |
| def get_unique_dir_summary_file(path): |
| """Get a unique file path to save the directory summary json string. |
| |
| @param path: The directory path to save the summary file to. |
| """ |
| summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time() |
| # Make sure the summary file name is unique. |
| file_name = os.path.join(path, summary_file) |
| if os.path.exists(file_name): |
| count = 1 |
| name, ext = os.path.splitext(summary_file) |
| while os.path.exists(file_name): |
| file_name = os.path.join(path, '%s_%s%s' % (name, count, ext)) |
| count += 1 |
| return file_name |
| |
| |
| def _preprocess_result_dir_path(path): |
| """Verify the result directory path is valid and make sure it ends with `/`. |
| |
| @param path: A path to the result directory. |
| @return: A verified and processed path to the result directory. |
| @raise IOError: If the path doesn't exist. |
| @raise ValueError: If the path is not a directory. |
| """ |
| if not os.path.exists(path): |
| raise IOError('Path %s does not exist.' % path) |
| |
| if not os.path.isdir(path): |
| raise ValueError('The given path %s is a file. It must be a ' |
| 'directory.' % path) |
| |
| # Make sure the path ends with `/` so the root key of summary json is always |
| # utils_lib.ROOT_DIR ('') |
| if not path.endswith(os.sep): |
| path = path + os.sep |
| |
| return path |
| |
| |
| def _delete_missing_entries(summary_old, summary_new): |
| """Delete files/directories only exists in old summary. |
| |
| When the new summary is final, i.e., it's built from the final result |
| directory, files or directories missing are considered to be deleted and |
| trimmed to size 0. |
| |
| @param summary_old: Old directory summary. |
| @param summary_new: New directory summary. |
| """ |
| new_files = summary_new.get_file_names() |
| old_files = summary_old.get_file_names() |
| for name in old_files: |
| old_file = summary_old.get_file(name) |
| if name not in new_files: |
| if old_file.is_dir: |
| # Trim sub-directories. |
| with old_file.disable_updating_parent_size_info(): |
| _delete_missing_entries(old_file, result_info.EMPTY) |
| old_file.update_sizes() |
| elif name in FILES_TO_IGNORE: |
| # Remove the file from the summary as it can be ignored. |
| summary_old.remove_file(name) |
| else: |
| with old_file.disable_updating_parent_size_info(): |
| # Before setting the trimmed size to 0, update the collected |
| # size if it's not set yet. |
| if not old_file.is_collected_size_recorded: |
| old_file.collected_size = old_file.trimmed_size |
| old_file.trimmed_size = 0 |
| elif old_file.is_dir: |
| # If `name` is a directory in the old summary, but a file in the new |
| # summary, delete the entry in the old summary. |
| new_file = summary_new.get_file(name) |
| if not new_file.is_dir: |
| new_file = result_info.EMPTY |
| _delete_missing_entries(old_file, new_file) |
| |
| |
| def _relocate_summary(result_dir, summary_file, summary): |
| """Update the given summary with the path relative to the result_dir. |
| |
| @param result_dir: Path to the result directory. |
| @param summary_file: Path to the summary file. |
| @param summary: A directory summary inside the given result_dir or its |
| sub-directory. |
| @return: An updated summary with the path relative to the result_dir. |
| """ |
| sub_path = os.path.dirname(summary_file).replace( |
| result_dir.rstrip(os.sep), '') |
| if sub_path == '': |
| return summary |
| |
| folders = sub_path.split(os.sep) |
| |
| # The first folder is always '' because of the leading `/` in sub_path. |
| parent = result_info.ResultInfo( |
| result_dir, utils_lib.ROOT_DIR, parent_result_info=None) |
| root = parent |
| |
| # That makes sure root has only one folder of utils_lib.ROOT_DIR. |
| for i in range(1, len(folders)): |
| child = result_info.ResultInfo( |
| parent.path, folders[i], parent_result_info=parent) |
| if i == len(folders) - 1: |
| # Add files in summary to child. |
| for info in summary.files: |
| child.files.append(info) |
| |
| parent.files.append(child) |
| parent = child |
| |
| parent.update_sizes() |
| return root |
| |
| |
| def merge_summaries(path): |
| """Merge all directory summaries in the given path. |
| |
| This function calculates the total size of result files being collected for |
| the test device and the files generated on the drone. It also returns merged |
| directory summary. |
| |
| @param path: A path to search for directory summaries. |
| @return a tuple of (client_collected_bytes, merged_summary, files): |
| client_collected_bytes: The total size of results collected from |
| the DUT. The number can be larger than the total file size of |
| the given path, as files can be overwritten or removed. |
| merged_summary: The merged directory summary of the given path. |
| files: All summary files in the given path, including |
| sub-directories. |
| """ |
| path = _preprocess_result_dir_path(path) |
| # Find all directory summary files and sort them by the time stamp in file |
| # name. |
| summary_files = [] |
| for root, _, filenames in os.walk(path): |
| for filename in fnmatch.filter(filenames, 'dir_summary_*.json'): |
| summary_files.append(os.path.join(root, filename)) |
| summary_files = sorted(summary_files, key=os.path.getmtime) |
| |
| all_summaries = [] |
| for summary_file in summary_files: |
| try: |
| summary = result_info.load_summary_json_file(summary_file) |
| summary = _relocate_summary(path, summary_file, summary) |
| all_summaries.append(summary) |
| except (IOError, ValueError) as e: |
| utils_lib.LOG('Failed to load summary file %s Error: %s' % |
| (summary_file, e)) |
| |
| # Merge all summaries. |
| merged_summary = all_summaries[0] if len(all_summaries) > 0 else None |
| for summary in all_summaries[1:]: |
| merged_summary.merge(summary) |
| # After all summaries from the test device (client side) are merged, we can |
| # get the total size of result files being transfered from the test device. |
| # If there is no directory summary collected, default client_collected_bytes |
| # to 0. |
| client_collected_bytes = 0 |
| if merged_summary: |
| client_collected_bytes = merged_summary.collected_size |
| |
| # Get the summary of current directory |
| last_summary = result_info.ResultInfo.build_from_path(path) |
| |
| if merged_summary: |
| merged_summary.merge(last_summary, is_final=True) |
| _delete_missing_entries(merged_summary, last_summary) |
| else: |
| merged_summary = last_summary |
| |
| return client_collected_bytes, merged_summary, summary_files |
| |
| |
| def _throttle_results(summary, max_result_size_KB): |
| """Throttle the test results by limiting to the given maximum size. |
| |
| @param summary: A ResultInfo object containing result summary. |
| @param max_result_size_KB: Maximum test result size in KB. |
| """ |
| if throttler_lib.check_throttle_limit(summary, max_result_size_KB): |
| utils_lib.LOG( |
| 'Result size is %s, which is less than %d KB. No need to ' |
| 'throttle.' % |
| (utils_lib.get_size_string(summary.trimmed_size), |
| max_result_size_KB)) |
| return |
| |
| args = {'summary': summary, |
| 'max_result_size_KB': max_result_size_KB} |
| args_skip_autotest_log = copy.copy(args) |
| args_skip_autotest_log['skip_autotest_log'] = True |
| # Apply the throttlers in following order. |
| throttlers = [ |
| (shrink_file_throttler, copy.copy(args_skip_autotest_log)), |
| (zip_file_throttler, copy.copy(args_skip_autotest_log)), |
| (shrink_file_throttler, copy.copy(args)), |
| (dedupe_file_throttler, copy.copy(args)), |
| (zip_file_throttler, copy.copy(args)), |
| ] |
| |
| # Add another zip_file_throttler to compress the files being shrunk. |
| # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of |
| # shrink_file_throttler. |
| new_args = copy.copy(args) |
| new_args['file_size_threshold_byte'] = 50 * 1024 |
| throttlers.append((zip_file_throttler, new_args)) |
| |
| # If the above throttlers still can't reduce the result size to be under |
| # max_result_size_KB, try to delete files with various threshold, starting |
| # at 5MB then lowering to 100KB. |
| delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024] |
| # Try to keep tgz files first. |
| exclude_file_patterns = ['.*\.tgz'] |
| for threshold in delete_file_thresholds: |
| new_args = copy.copy(args) |
| new_args.update({'file_size_threshold_byte': threshold, |
| 'exclude_file_patterns': exclude_file_patterns}) |
| throttlers.append((delete_file_throttler, new_args)) |
| # Add one more delete_file_throttler to not skipping tgz files. |
| new_args = copy.copy(args) |
| new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]}) |
| throttlers.append((delete_file_throttler, new_args)) |
| |
| # Run the throttlers in order until result size is under max_result_size_KB. |
| old_size = summary.trimmed_size |
| for throttler, args in throttlers: |
| try: |
| args_without_summary = copy.copy(args) |
| del args_without_summary['summary'] |
| utils_lib.LOG('Applying throttler %s, args: %s' % |
| (throttler.__name__, args_without_summary)) |
| throttler.throttle(**args) |
| if throttler_lib.check_throttle_limit(summary, max_result_size_KB): |
| return |
| except: |
| utils_lib.LOG('Failed to apply throttler %s. Exception: %s' % |
| (throttler, traceback.format_exc())) |
| finally: |
| new_size = summary.trimmed_size |
| if new_size == old_size: |
| utils_lib.LOG('Result size was not changed: %s.' % old_size) |
| else: |
| utils_lib.LOG('Result size was reduced from %s to %s.' % |
| (utils_lib.get_size_string(old_size), |
| utils_lib.get_size_string(new_size))) |
| |
| |
| def _setup_logging(): |
| """Set up logging to direct logs to stdout.""" |
| # Direct logging to stdout |
| logger = logging.getLogger() |
| logger.setLevel(logging.DEBUG) |
| handler = logging.StreamHandler(sys.stdout) |
| handler.setLevel(logging.DEBUG) |
| formatter = logging.Formatter('%(asctime)s %(message)s') |
| handler.setFormatter(formatter) |
| logger.handlers = [] |
| logger.addHandler(handler) |
| |
| |
| def _parse_options(): |
| """Options for the main script. |
| |
| @return: An option object container arg values. |
| """ |
| parser = argparse.ArgumentParser() |
| parser.add_argument('-p', type=str, dest='path', |
| help='Path to build directory summary.') |
| parser.add_argument('-m', type=int, dest='max_size_KB', default=0, |
| help='Maximum result size in KB. Set to 0 to disable ' |
| 'result throttling.') |
| parser.add_argument('-d', action='store_true', dest='delete_summaries', |
| default=False, |
| help='-d to delete all result summary files in the ' |
| 'given path.') |
| return parser.parse_args() |
| |
| |
| def execute(path, max_size_KB): |
| """Execute the script with given arguments. |
| |
| @param path: Path to build directory summary. |
| @param max_size_KB: Maximum result size in KB. |
| """ |
| utils_lib.LOG('Running result_tools/utils on path: %s' % path) |
| if max_size_KB > 0: |
| utils_lib.LOG('Throttle result size to : %s' % |
| utils_lib.get_size_string(max_size_KB * 1024)) |
| |
| result_dir = path |
| if not os.path.isdir(result_dir): |
| result_dir = os.path.dirname(result_dir) |
| summary = result_info.ResultInfo.build_from_path(path) |
| summary_json = json.dumps(summary) |
| summary_file = get_unique_dir_summary_file(result_dir) |
| |
| # Make sure there is enough free disk to write the file |
| stat = os.statvfs(path) |
| free_space = stat.f_frsize * stat.f_bavail |
| if free_space - len(summary_json) < MIN_FREE_DISK_BYTES: |
| raise utils_lib.NotEnoughDiskError( |
| 'Not enough disk space after saving the summary file. ' |
| 'Available free disk: %s bytes. Summary file size: %s bytes.' % |
| (free_space, len(summary_json))) |
| |
| with open(summary_file, 'w') as f: |
| f.write(summary_json) |
| utils_lib.LOG('Directory summary of %s is saved to file %s.' % |
| (path, summary_file)) |
| |
| if max_size_KB > 0 and summary.trimmed_size > 0: |
| old_size = summary.trimmed_size |
| throttle_probability = float(max_size_KB * 1024) / old_size |
| if random.random() < throttle_probability: |
| utils_lib.LOG( |
| 'Skip throttling %s: size=%s, throttle_probability=%s' % |
| (path, old_size, throttle_probability)) |
| else: |
| _throttle_results(summary, max_size_KB) |
| if summary.trimmed_size < old_size: |
| # Files are throttled, save the updated summary file. |
| utils_lib.LOG('Overwrite the summary file: %s' % summary_file) |
| result_info.save_summary(summary, summary_file) |
| |
| |
| def _delete_summaries(path): |
| """Delete all directory summary files in the given directory. |
| |
| This is to cleanup the directory so no summary files are left behind to |
| affect later tests. |
| |
| @param path: Path to cleanup directory summary. |
| """ |
| # Only summary files directly under the `path` needs to be cleaned. |
| summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN)) |
| for summary in summary_files: |
| try: |
| os.remove(summary) |
| except IOError as e: |
| utils_lib.LOG('Failed to delete summary: %s. Error: %s' % |
| (summary, e)) |
| |
| |
| def main(): |
| """main script. """ |
| _setup_logging() |
| options = _parse_options() |
| if options.delete_summaries: |
| _delete_summaries(options.path) |
| else: |
| execute(options.path, options.max_size_KB) |
| |
| |
| if __name__ == '__main__': |
| main() |