Brenden Blanco | 581fc7d | 2016-04-19 11:42:49 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
Brenden Blanco | b3abbfc | 2016-04-19 11:13:40 -0700 | [diff] [blame] | 2 | # |
| 3 | #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# |
| 4 | # |
| 5 | # The LLVM Compiler Infrastructure |
| 6 | # |
| 7 | # This file is distributed under the University of Illinois Open Source |
| 8 | # License. See LICENSE.TXT for details. |
| 9 | # |
| 10 | #===------------------------------------------------------------------------===# |
| 11 | |
| 12 | r""" |
| 13 | clang-format git integration |
| 14 | ============================ |
| 15 | |
| 16 | This file provides a clang-format integration for git. Put it somewhere in your |
| 17 | path and ensure that it is executable. Then, "git clang-format" will invoke |
| 18 | clang-format on the changes in current files or a specific commit. |
| 19 | |
| 20 | For further details, run: |
| 21 | git clang-format -h |
| 22 | |
| 23 | Requires Python 2.7 |
| 24 | """ |
| 25 | |
| 26 | import argparse |
| 27 | import collections |
| 28 | import contextlib |
| 29 | import errno |
| 30 | import os |
| 31 | import re |
| 32 | import subprocess |
| 33 | import sys |
| 34 | |
| 35 | usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' |
| 36 | |
| 37 | desc = ''' |
| 38 | Run clang-format on all lines that differ between the working directory |
| 39 | and <commit>, which defaults to HEAD. Changes are only applied to the working |
| 40 | directory. |
| 41 | |
| 42 | The following git-config settings set the default of the corresponding option: |
| 43 | clangFormat.binary |
| 44 | clangFormat.commit |
| 45 | clangFormat.extension |
| 46 | clangFormat.style |
| 47 | ''' |
| 48 | |
| 49 | # Name of the temporary index file in which save the output of clang-format. |
| 50 | # This file is created within the .git directory. |
| 51 | temp_index_basename = 'clang-format-index' |
| 52 | |
| 53 | |
| 54 | Range = collections.namedtuple('Range', 'start, count') |
| 55 | |
| 56 | |
| 57 | def main(): |
| 58 | config = load_git_config() |
| 59 | |
| 60 | # In order to keep '--' yet allow options after positionals, we need to |
| 61 | # check for '--' ourselves. (Setting nargs='*' throws away the '--', while |
| 62 | # nargs=argparse.REMAINDER disallows options after positionals.) |
| 63 | argv = sys.argv[1:] |
| 64 | try: |
| 65 | idx = argv.index('--') |
| 66 | except ValueError: |
| 67 | dash_dash = [] |
| 68 | else: |
| 69 | dash_dash = argv[idx:] |
| 70 | argv = argv[:idx] |
| 71 | |
| 72 | default_extensions = ','.join([ |
| 73 | # From clang/lib/Frontend/FrontendOptions.cpp, all lower case |
| 74 | 'c', 'h', # C |
| 75 | 'm', # ObjC |
| 76 | 'mm', # ObjC++ |
| 77 | 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ |
| 78 | # Other languages that clang-format supports |
| 79 | 'proto', 'protodevel', # Protocol Buffers |
| 80 | 'js', # JavaScript |
| 81 | 'ts', # TypeScript |
| 82 | ]) |
| 83 | |
| 84 | p = argparse.ArgumentParser( |
| 85 | usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, |
| 86 | description=desc) |
| 87 | p.add_argument('--binary', |
| 88 | default=config.get('clangformat.binary', 'clang-format'), |
| 89 | help='path to clang-format'), |
| 90 | p.add_argument('--commit', |
| 91 | default=config.get('clangformat.commit', 'HEAD'), |
| 92 | help='default commit to use if none is specified'), |
| 93 | p.add_argument('--diff', action='store_true', |
| 94 | help='print a diff instead of applying the changes') |
| 95 | p.add_argument('--extensions', |
| 96 | default=config.get('clangformat.extensions', |
| 97 | default_extensions), |
| 98 | help=('comma-separated list of file extensions to format, ' |
| 99 | 'excluding the period and case-insensitive')), |
| 100 | p.add_argument('-f', '--force', action='store_true', |
| 101 | help='allow changes to unstaged files') |
| 102 | p.add_argument('-p', '--patch', action='store_true', |
| 103 | help='select hunks interactively') |
| 104 | p.add_argument('-q', '--quiet', action='count', default=0, |
| 105 | help='print less information') |
| 106 | p.add_argument('--style', |
| 107 | default=config.get('clangformat.style', None), |
| 108 | help='passed to clang-format'), |
| 109 | p.add_argument('-v', '--verbose', action='count', default=0, |
| 110 | help='print extra information') |
| 111 | # We gather all the remaining positional arguments into 'args' since we need |
| 112 | # to use some heuristics to determine whether or not <commit> was present. |
| 113 | # However, to print pretty messages, we make use of metavar and help. |
| 114 | p.add_argument('args', nargs='*', metavar='<commit>', |
| 115 | help='revision from which to compute the diff') |
| 116 | p.add_argument('ignored', nargs='*', metavar='<file>...', |
| 117 | help='if specified, only consider differences in these files') |
| 118 | opts = p.parse_args(argv) |
| 119 | |
| 120 | opts.verbose -= opts.quiet |
| 121 | del opts.quiet |
| 122 | |
| 123 | commit, files = interpret_args(opts.args, dash_dash, opts.commit) |
| 124 | changed_lines = compute_diff_and_extract_lines(commit, files) |
| 125 | if opts.verbose >= 1: |
| 126 | ignored_files = set(changed_lines) |
| 127 | filter_by_extension(changed_lines, opts.extensions.lower().split(',')) |
| 128 | if opts.verbose >= 1: |
| 129 | ignored_files.difference_update(changed_lines) |
| 130 | if ignored_files: |
| 131 | print 'Ignoring changes in the following files (wrong extension):' |
| 132 | for filename in ignored_files: |
| 133 | print ' ', filename |
| 134 | if changed_lines: |
| 135 | print 'Running clang-format on the following files:' |
| 136 | for filename in changed_lines: |
| 137 | print ' ', filename |
Brenden Blanco | af8552b | 2016-04-19 11:15:05 -0700 | [diff] [blame] | 138 | else: |
| 139 | print 'no modified files to format' |
| 140 | return |
Brenden Blanco | b3abbfc | 2016-04-19 11:13:40 -0700 | [diff] [blame] | 141 | # The computed diff outputs absolute paths, so we must cd before accessing |
| 142 | # those files. |
| 143 | cd_to_toplevel() |
| 144 | old_tree = create_tree_from_workdir(changed_lines) |
| 145 | new_tree = run_clang_format_and_save_to_tree(changed_lines, |
| 146 | binary=opts.binary, |
| 147 | style=opts.style) |
| 148 | if opts.verbose >= 1: |
| 149 | print 'old tree:', old_tree |
| 150 | print 'new tree:', new_tree |
| 151 | if old_tree == new_tree: |
| 152 | if opts.verbose >= 0: |
| 153 | print 'clang-format did not modify any files' |
| 154 | elif opts.diff: |
| 155 | print_diff(old_tree, new_tree) |
| 156 | else: |
| 157 | changed_files = apply_changes(old_tree, new_tree, force=opts.force, |
| 158 | patch_mode=opts.patch) |
| 159 | if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: |
| 160 | print 'changed files:' |
| 161 | for filename in changed_files: |
| 162 | print ' ', filename |
| 163 | |
| 164 | |
| 165 | def load_git_config(non_string_options=None): |
| 166 | """Return the git configuration as a dictionary. |
| 167 | |
| 168 | All options are assumed to be strings unless in `non_string_options`, in which |
| 169 | is a dictionary mapping option name (in lower case) to either "--bool" or |
| 170 | "--int".""" |
| 171 | if non_string_options is None: |
| 172 | non_string_options = {} |
| 173 | out = {} |
| 174 | for entry in run('git', 'config', '--list', '--null').split('\0'): |
| 175 | if entry: |
| 176 | name, value = entry.split('\n', 1) |
| 177 | if name in non_string_options: |
| 178 | value = run('git', 'config', non_string_options[name], name) |
| 179 | out[name] = value |
| 180 | return out |
| 181 | |
| 182 | |
| 183 | def interpret_args(args, dash_dash, default_commit): |
| 184 | """Interpret `args` as "[commit] [--] [files...]" and return (commit, files). |
| 185 | |
| 186 | It is assumed that "--" and everything that follows has been removed from |
| 187 | args and placed in `dash_dash`. |
| 188 | |
| 189 | If "--" is present (i.e., `dash_dash` is non-empty), the argument to its |
| 190 | left (if present) is taken as commit. Otherwise, the first argument is |
| 191 | checked if it is a commit or a file. If commit is not given, |
| 192 | `default_commit` is used.""" |
| 193 | if dash_dash: |
| 194 | if len(args) == 0: |
| 195 | commit = default_commit |
| 196 | elif len(args) > 1: |
| 197 | die('at most one commit allowed; %d given' % len(args)) |
| 198 | else: |
| 199 | commit = args[0] |
| 200 | object_type = get_object_type(commit) |
| 201 | if object_type not in ('commit', 'tag'): |
| 202 | if object_type is None: |
| 203 | die("'%s' is not a commit" % commit) |
| 204 | else: |
| 205 | die("'%s' is a %s, but a commit was expected" % (commit, object_type)) |
| 206 | files = dash_dash[1:] |
| 207 | elif args: |
| 208 | if disambiguate_revision(args[0]): |
| 209 | commit = args[0] |
| 210 | files = args[1:] |
| 211 | else: |
| 212 | commit = default_commit |
| 213 | files = args |
| 214 | else: |
| 215 | commit = default_commit |
| 216 | files = [] |
| 217 | return commit, files |
| 218 | |
| 219 | |
| 220 | def disambiguate_revision(value): |
| 221 | """Returns True if `value` is a revision, False if it is a file, or dies.""" |
| 222 | # If `value` is ambiguous (neither a commit nor a file), the following |
| 223 | # command will die with an appropriate error message. |
| 224 | run('git', 'rev-parse', value, verbose=False) |
| 225 | object_type = get_object_type(value) |
| 226 | if object_type is None: |
| 227 | return False |
| 228 | if object_type in ('commit', 'tag'): |
| 229 | return True |
| 230 | die('`%s` is a %s, but a commit or filename was expected' % |
| 231 | (value, object_type)) |
| 232 | |
| 233 | |
| 234 | def get_object_type(value): |
| 235 | """Returns a string description of an object's type, or None if it is not |
| 236 | a valid git object.""" |
| 237 | cmd = ['git', 'cat-file', '-t', value] |
| 238 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 239 | stdout, stderr = p.communicate() |
| 240 | if p.returncode != 0: |
| 241 | return None |
| 242 | return stdout.strip() |
| 243 | |
| 244 | |
| 245 | def compute_diff_and_extract_lines(commit, files): |
| 246 | """Calls compute_diff() followed by extract_lines().""" |
| 247 | diff_process = compute_diff(commit, files) |
| 248 | changed_lines = extract_lines(diff_process.stdout) |
| 249 | diff_process.stdout.close() |
| 250 | diff_process.wait() |
| 251 | if diff_process.returncode != 0: |
| 252 | # Assume error was already printed to stderr. |
| 253 | sys.exit(2) |
| 254 | return changed_lines |
| 255 | |
| 256 | |
| 257 | def compute_diff(commit, files): |
| 258 | """Return a subprocess object producing the diff from `commit`. |
| 259 | |
| 260 | The return value's `stdin` file object will produce a patch with the |
| 261 | differences between the working directory and `commit`, filtered on `files` |
| 262 | (if non-empty). Zero context lines are used in the patch.""" |
| 263 | cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] |
| 264 | cmd.extend(files) |
| 265 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 266 | p.stdin.close() |
| 267 | return p |
| 268 | |
| 269 | |
| 270 | def extract_lines(patch_file): |
| 271 | """Extract the changed lines in `patch_file`. |
| 272 | |
| 273 | The return value is a dictionary mapping filename to a list of (start_line, |
| 274 | line_count) pairs. |
| 275 | |
| 276 | The input must have been produced with ``-U0``, meaning unidiff format with |
| 277 | zero lines of context. The return value is a dict mapping filename to a |
| 278 | list of line `Range`s.""" |
| 279 | matches = {} |
| 280 | for line in patch_file: |
| 281 | match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) |
| 282 | if match: |
| 283 | filename = match.group(1).rstrip('\r\n') |
| 284 | match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) |
| 285 | if match: |
| 286 | start_line = int(match.group(1)) |
| 287 | line_count = 1 |
| 288 | if match.group(3): |
| 289 | line_count = int(match.group(3)) |
| 290 | if line_count > 0: |
| 291 | matches.setdefault(filename, []).append(Range(start_line, line_count)) |
| 292 | return matches |
| 293 | |
| 294 | |
| 295 | def filter_by_extension(dictionary, allowed_extensions): |
| 296 | """Delete every key in `dictionary` that doesn't have an allowed extension. |
| 297 | |
| 298 | `allowed_extensions` must be a collection of lowercase file extensions, |
| 299 | excluding the period.""" |
| 300 | allowed_extensions = frozenset(allowed_extensions) |
| 301 | for filename in dictionary.keys(): |
| 302 | base_ext = filename.rsplit('.', 1) |
| 303 | if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: |
| 304 | del dictionary[filename] |
| 305 | |
| 306 | |
| 307 | def cd_to_toplevel(): |
| 308 | """Change to the top level of the git repository.""" |
| 309 | toplevel = run('git', 'rev-parse', '--show-toplevel') |
| 310 | os.chdir(toplevel) |
| 311 | |
| 312 | |
| 313 | def create_tree_from_workdir(filenames): |
| 314 | """Create a new git tree with the given files from the working directory. |
| 315 | |
| 316 | Returns the object ID (SHA-1) of the created tree.""" |
| 317 | return create_tree(filenames, '--stdin') |
| 318 | |
| 319 | |
| 320 | def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format', |
| 321 | style=None): |
| 322 | """Run clang-format on each file and save the result to a git tree. |
| 323 | |
| 324 | Returns the object ID (SHA-1) of the created tree.""" |
| 325 | def index_info_generator(): |
| 326 | for filename, line_ranges in changed_lines.iteritems(): |
| 327 | mode = oct(os.stat(filename).st_mode) |
| 328 | blob_id = clang_format_to_blob(filename, line_ranges, binary=binary, |
| 329 | style=style) |
| 330 | yield '%s %s\t%s' % (mode, blob_id, filename) |
| 331 | return create_tree(index_info_generator(), '--index-info') |
| 332 | |
| 333 | |
| 334 | def create_tree(input_lines, mode): |
| 335 | """Create a tree object from the given input. |
| 336 | |
| 337 | If mode is '--stdin', it must be a list of filenames. If mode is |
| 338 | '--index-info' is must be a list of values suitable for "git update-index |
| 339 | --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode |
| 340 | is invalid.""" |
| 341 | assert mode in ('--stdin', '--index-info') |
| 342 | cmd = ['git', 'update-index', '--add', '-z', mode] |
| 343 | with temporary_index_file(): |
| 344 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE) |
| 345 | for line in input_lines: |
| 346 | p.stdin.write('%s\0' % line) |
| 347 | p.stdin.close() |
| 348 | if p.wait() != 0: |
| 349 | die('`%s` failed' % ' '.join(cmd)) |
| 350 | tree_id = run('git', 'write-tree') |
| 351 | return tree_id |
| 352 | |
| 353 | |
| 354 | def clang_format_to_blob(filename, line_ranges, binary='clang-format', |
| 355 | style=None): |
| 356 | """Run clang-format on the given file and save the result to a git blob. |
| 357 | |
| 358 | Returns the object ID (SHA-1) of the created blob.""" |
| 359 | clang_format_cmd = [binary, filename] |
| 360 | if style: |
| 361 | clang_format_cmd.extend(['-style='+style]) |
| 362 | clang_format_cmd.extend([ |
| 363 | '-lines=%s:%s' % (start_line, start_line+line_count-1) |
| 364 | for start_line, line_count in line_ranges]) |
| 365 | try: |
| 366 | clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE, |
| 367 | stdout=subprocess.PIPE) |
| 368 | except OSError as e: |
| 369 | if e.errno == errno.ENOENT: |
| 370 | die('cannot find executable "%s"' % binary) |
| 371 | else: |
| 372 | raise |
| 373 | clang_format.stdin.close() |
| 374 | hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] |
| 375 | hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, |
| 376 | stdout=subprocess.PIPE) |
| 377 | clang_format.stdout.close() |
| 378 | stdout = hash_object.communicate()[0] |
| 379 | if hash_object.returncode != 0: |
| 380 | die('`%s` failed' % ' '.join(hash_object_cmd)) |
| 381 | if clang_format.wait() != 0: |
| 382 | die('`%s` failed' % ' '.join(clang_format_cmd)) |
| 383 | return stdout.rstrip('\r\n') |
| 384 | |
| 385 | |
| 386 | @contextlib.contextmanager |
| 387 | def temporary_index_file(tree=None): |
| 388 | """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting |
| 389 | the file afterward.""" |
| 390 | index_path = create_temporary_index(tree) |
| 391 | old_index_path = os.environ.get('GIT_INDEX_FILE') |
| 392 | os.environ['GIT_INDEX_FILE'] = index_path |
| 393 | try: |
| 394 | yield |
| 395 | finally: |
| 396 | if old_index_path is None: |
| 397 | del os.environ['GIT_INDEX_FILE'] |
| 398 | else: |
| 399 | os.environ['GIT_INDEX_FILE'] = old_index_path |
| 400 | os.remove(index_path) |
| 401 | |
| 402 | |
| 403 | def create_temporary_index(tree=None): |
| 404 | """Create a temporary index file and return the created file's path. |
| 405 | |
| 406 | If `tree` is not None, use that as the tree to read in. Otherwise, an |
| 407 | empty index is created.""" |
| 408 | gitdir = run('git', 'rev-parse', '--git-dir') |
| 409 | path = os.path.join(gitdir, temp_index_basename) |
| 410 | if tree is None: |
| 411 | tree = '--empty' |
| 412 | run('git', 'read-tree', '--index-output='+path, tree) |
| 413 | return path |
| 414 | |
| 415 | |
| 416 | def print_diff(old_tree, new_tree): |
| 417 | """Print the diff between the two trees to stdout.""" |
| 418 | # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output |
| 419 | # is expected to be viewed by the user, and only the former does nice things |
| 420 | # like color and pagination. |
| 421 | subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) |
| 422 | |
| 423 | |
| 424 | def apply_changes(old_tree, new_tree, force=False, patch_mode=False): |
| 425 | """Apply the changes in `new_tree` to the working directory. |
| 426 | |
| 427 | Bails if there are local changes in those files and not `force`. If |
| 428 | `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" |
| 429 | changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree, |
| 430 | new_tree).rstrip('\0').split('\0') |
| 431 | if not force: |
| 432 | unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) |
| 433 | if unstaged_files: |
| 434 | print >>sys.stderr, ('The following files would be modified but ' |
| 435 | 'have unstaged changes:') |
| 436 | print >>sys.stderr, unstaged_files |
| 437 | print >>sys.stderr, 'Please commit, stage, or stash them first.' |
| 438 | sys.exit(2) |
| 439 | if patch_mode: |
| 440 | # In patch mode, we could just as well create an index from the new tree |
| 441 | # and checkout from that, but then the user will be presented with a |
| 442 | # message saying "Discard ... from worktree". Instead, we use the old |
| 443 | # tree as the index and checkout from new_tree, which gives the slightly |
| 444 | # better message, "Apply ... to index and worktree". This is not quite |
| 445 | # right, since it won't be applied to the user's index, but oh well. |
| 446 | with temporary_index_file(old_tree): |
| 447 | subprocess.check_call(['git', 'checkout', '--patch', new_tree]) |
| 448 | index_tree = old_tree |
| 449 | else: |
| 450 | with temporary_index_file(new_tree): |
| 451 | run('git', 'checkout-index', '-a', '-f') |
| 452 | return changed_files |
| 453 | |
| 454 | |
| 455 | def run(*args, **kwargs): |
| 456 | stdin = kwargs.pop('stdin', '') |
| 457 | verbose = kwargs.pop('verbose', True) |
| 458 | strip = kwargs.pop('strip', True) |
| 459 | for name in kwargs: |
| 460 | raise TypeError("run() got an unexpected keyword argument '%s'" % name) |
| 461 | p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, |
| 462 | stdin=subprocess.PIPE) |
| 463 | stdout, stderr = p.communicate(input=stdin) |
| 464 | if p.returncode == 0: |
| 465 | if stderr: |
| 466 | if verbose: |
| 467 | print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) |
| 468 | print >>sys.stderr, stderr.rstrip() |
| 469 | if strip: |
| 470 | stdout = stdout.rstrip('\r\n') |
| 471 | return stdout |
| 472 | if verbose: |
| 473 | print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) |
| 474 | if stderr: |
| 475 | print >>sys.stderr, stderr.rstrip() |
| 476 | sys.exit(2) |
| 477 | |
| 478 | |
| 479 | def die(message): |
| 480 | print >>sys.stderr, 'error:', message |
| 481 | sys.exit(2) |
| 482 | |
| 483 | |
| 484 | if __name__ == '__main__': |
| 485 | main() |