blob: 600124cdec9d69e7b4ab25e8383658e948e4ddcf [file] [log] [blame]
#!/usr/bin/env python3
from concurrent.futures import ThreadPoolExecutor, Future
from pathlib import Path
from subprocess import CalledProcessError, SubprocessError, DEVNULL
from typing import NoReturn
import argparse
import functools
import os
import shlex
import subprocess
import sys
# In contrast to 'repo rebase', this script:
#
# Rebases merge commits and preserve the 2nd parent.
#
# Detects when m/studio-main is already an ancestor and bails early if so.
#
# Does the rebase in-memory and only updates the working tree once the entire rebase succeeds
# (thereby minimizing disk I/O and keeping the working tree clean if the rebase fails).
#
# Does a "dry run" to anticipate results and merge conflicts before committing.
#
# Example usage: ./repo-smart-rebase.py --sync --apply --skip-up-to-date my-large-branch
def main():
parser = argparse.ArgumentParser(description='A better version of repo-rebase that handles merge commits and more')
parser.add_argument('branch', help='which local branch to rebase')
parser.add_argument('projects', type=Path, nargs='*', help='optionally update only specific projects')
parser.add_argument('-s', '--sync', action='store_true', help='run repo sync beforehand')
parser.add_argument('-f', '--apply', action='store_true', help='update HEAD and working tree')
parser.add_argument('--skip-up-to-date', action='store_true', help='skip assertions for projects already up-to-date')
args = parser.parse_args()
if not args.apply:
print('Dry run. Use -f to move HEAD and update the working tree.')
# Run repo sync.
if args.sync:
print('Running repo sync')
print('===')
subprocess.check_call(['repo', 'sync', '-cvnj16', *args.projects])
print('===')
# Compute project list and move to repo root.
projects = [Path(p) for p in run('repo', 'list', '--path-only', *args.projects).splitlines()]
os.chdir(find_repo_root(Path(os.getcwd())))
# Rebase projects in parallel and keep track of (project, job) pairs.
project_jobs: list[tuple[Path,Future]] = []
with ThreadPoolExecutor() as executor:
for project in projects:
job = executor.submit(rebase_project, project, args.branch, args.apply, args.skip_up_to_date)
project_jobs.append((project, job))
job.add_done_callback(functools.partial(report_done, project))
# Analyze the results.
print('===')
for (project, job) in project_jobs:
if job.exception() is None and job.result() != '<quiet>':
print(f'{project}: {job.result()}')
for (project, job) in project_jobs:
err = job.exception()
if isinstance(err, CalledProcessError):
print(f'ERROR: {project}: command failed: {shlex.join(err.cmd)}', file=sys.stderr)
elif err is not None:
print(f'ERROR: {project}: {err}', file=sys.stderr)
success = all(job.exception() is None for (_, job) in project_jobs)
print('===')
print('Finished successfully.' if success else 'Some projects aborted due to errors.')
sys.exit(0 if success else 1)
def rebase_project(project: Path, branch: str, apply: bool, skip_up_to_date: bool) -> str:
# Check if we are on the target branch. If not, just detach to m/studio-main.
current_branch = run('git', 'rev-parse', '--abbrev-ref', 'HEAD', cwd=project)
if current_branch != branch:
if apply:
run('git', 'checkout', '-q', 'm/studio-main', cwd=project)
already_detached = current_branch == 'HEAD'
return '<quiet>' if already_detached else f'detached from {current_branch}'
# Check if m/studio-main is already an ancestor.
already_up_to_date = check('git', 'merge-base', '--is-ancestor', 'm/studio-main', branch, cwd=project)
if already_up_to_date and skip_up_to_date:
return 'already up-to-date'
# Verify that no upstream changes are merge commits, since those may require special care.
upstream_changes = run('git', 'rev-list', '--first-parent', f'{branch}..m/studio-main', cwd=project).splitlines()
for change in upstream_changes:
if len(query_parents(project, change)) > 1:
fail(f'Cannot rebase on top of new merge commit in studio-main: {change:.10}')
# Start the rebase.
changes = run('git', 'rev-list', '--first-parent', '--reverse', f'm/studio-main..{branch}', cwd=project).splitlines()
print(f'Progress: {project}: starting rebase')
orig_head = run('git', 'rev-parse', branch, cwd=project)
head = run('git', 'rev-parse', 'm/studio-main', cwd=project)
for change in changes:
if len(query_parents(project, change)) == 0:
# No parents? Must be a grafted commit in a prebuilts project. We can generally
# assume that grafted commits are ancestors of studio-main.
print(f'Warning: {project}: skipping grafted commit {change:.10}')
continue
head = smart_cherry_pick(project, head, change)
# If m/studio-main was already an ancestor, assert that our rebase was a no-op (just to help shake out bugs).
if already_up_to_date:
run('git', 'diff', '--quiet', branch, head, cwd=project)
return 'already up-to-date'
# Move HEAD and update the working tree.
if apply:
run('git', 'branch', '-f', 'tmp-repo-smart-rebase-backup', branch, cwd=project)
print(f'Progress: {project}: checking out {head:.10}')
run('git', 'checkout', '-q', '-B', branch, head, cwd=project)
count = run('git', 'rev-list', '--count', '--first-parent', f'm/studio-main..{head}', cwd=project)
action = 'moved' if apply else 'would move'
return f'{action} {count} from {orig_head:.10} to {head:.10}'
def smart_cherry_pick(project: Path, head: str, commit: str) -> str:
"""Like git-cherry-pick but preserves merge commits and does not touch the working tree."""
# Compute parents.
_, *extra_parents = query_parents(project, commit)
parent_args = ['-p', head]
for extra_parent in extra_parents:
parent_args += ['-p', extra_parent]
# Compute the new tree (or thrown an exception if there are merge conflicts).
try:
new_tree, *_ = run('git', 'merge-tree', f'--merge-base={commit}^1', head, commit, cwd=project).splitlines()
except SubprocessError:
# Assume git-merge-tree failed due to merge conflicts.
fail(f'aborting due to merge conflicts with {commit:.10}')
if check('git', 'diff', '--quiet', head, new_tree, cwd=project):
print(f'Warning: {project}: skipping empty commit {commit:.10}')
return head
# Preserve authorship; see https://git-scm.com/book/en/v2/Git-Internals-Environment-Variables.
author, email, date, *msg = run('git', 'log', '-1', '--format=%an%n%ae%n%ad%n%B', commit, cwd=project).splitlines()
msg_args = ['-m', '\n'.join(msg)]
env = dict(GIT_AUTHOR_NAME=author, GIT_AUTHOR_EMAIL=email, GIT_AUTHOR_DATE=date, **os.environ)
cherry_pick = run('git', 'commit-tree', *parent_args, *msg_args, new_tree, cwd=project, env=env)
# Double-check that the cherry-pick looks reasonably similar to the original commit.
# Note: we use git-diff-tree because for some reason it is way faster than git-show.
tree_diff = ['git', 'diff-tree', '-r', '--name-only', '--diff-merges=1', '--no-commit-id']
old_changed_files = run(*tree_diff, commit, cwd=project).splitlines()
new_changed_files = run(*tree_diff, cherry_pick, cwd=project).splitlines()
assert old_changed_files and new_changed_files, 'Expected a nonzero number of changed files'
if new_changed_files != old_changed_files:
fail(f'Cherry-pick {cherry_pick:.10} modifies different files than its original commit {commit:.10}')
return cherry_pick
def query_parents(project: Path, commit: str) -> list[str]:
parents = run('git', 'log', '-1', '--format=%P', commit, cwd=project)
return parents.split(' ') if parents else []
def report_done(project: Path, f: Future[str]):
if f.exception() is None and f.result() != '<quiet>':
print(f'Progress: {project}: done')
def find_repo_root(cd: Path) -> Path:
while cd != cd.root:
if cd.joinpath('.repo').is_dir():
return cd
cd = cd.parent
fail('Failed to find repo root')
# Run a command and return stdout.
def run(*args: str, **kwargs) -> str:
return subprocess.check_output(args, **kwargs).decode().strip()
# Run a command and return whether it succeeded.
def check(*args: str, **kwargs) -> bool:
return subprocess.run(args, **kwargs).returncode == 0
def fail(msg: str = 'unreachable') -> NoReturn:
raise Exception(msg)
if __name__ == '__main__':
main()