| #!/usr/bin/env python3 |
| # Copyright 2019 The ChromiumOS Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Maps LLVM git SHAs to synthetic revision numbers and back. |
| |
| Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, |
| r1234 is parsed as '(main, 1234)'. |
| """ |
| |
| import argparse |
| from pathlib import Path |
| import re |
| import subprocess |
| import sys |
| from typing import IO, Iterable, List, NamedTuple, Optional, Tuple, Union |
| |
| |
| MAIN_BRANCH = "main" |
| |
| # Note that after base_llvm_sha, we reach The Wild West(TM) of commits. |
| # So reasonable input that could break us includes: |
| # |
| # Revert foo |
| # |
| # This reverts foo, which had the commit message: |
| # |
| # bar |
| # llvm-svn: 375505 |
| # |
| # While saddening, this is something we should probably try to handle |
| # reasonably. |
| base_llvm_revision = 375505 |
| base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b" |
| |
| # Known pairs of [revision, SHA] in ascending order. |
| # The first element is the first non-`llvm-svn` commit that exists. Later ones |
| # are functional nops, but speed this script up immensely, since `git` can take |
| # quite a while to walk >100K commits. |
| known_llvm_rev_sha_pairs = ( |
| (base_llvm_revision, base_llvm_sha), |
| (425000, "af870e11aed7a5c475ae41a72e3015c4c88597d1"), |
| (450000, "906ebd5830e6053b50c52bf098e3586b567e8499"), |
| (475000, "530d14a99611a71f8f3eb811920fd7b5c4d4e1f8"), |
| (500000, "173855f9b0bdfe45d71272596b510650bfc1ca33"), |
| ) |
| |
| # Represents an LLVM git checkout: |
| # - |dir| is the directory of the LLVM checkout |
| # - |remote| is the name of the LLVM remote. Generally it's "origin". |
| LLVMConfig = NamedTuple( |
| "LLVMConfig", (("remote", str), ("dir", Union[Path, str])) |
| ) |
| |
| |
| class Rev(NamedTuple("Rev", (("branch", str), ("number", int)))): |
| """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" |
| |
| @staticmethod |
| def parse(rev: str) -> "Rev": |
| """Parses a Rev from the given string. |
| |
| Raises a ValueError on a failed parse. |
| """ |
| # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) |
| # pairs. |
| # |
| # We support r${commits_since_base_commit} as shorthand for |
| # (main, r${commits_since_base_commit}). |
| if rev.startswith("r"): |
| branch_name = MAIN_BRANCH |
| rev_string = rev[1:] |
| else: |
| match = re.match(r"\((.+), r(\d+)\)", rev) |
| if not match: |
| raise ValueError("%r isn't a valid revision" % rev) |
| |
| branch_name, rev_string = match.groups() |
| |
| return Rev(branch=branch_name, number=int(rev_string)) |
| |
| def __str__(self) -> str: |
| branch_name, number = self |
| if branch_name == MAIN_BRANCH: |
| return "r%d" % number |
| return "(%s, r%d)" % (branch_name, number) |
| |
| |
| def is_git_sha(xs: str) -> bool: |
| """Returns whether the given string looks like a valid git commit SHA.""" |
| return ( |
| len(xs) > 6 |
| and len(xs) <= 40 |
| and all(x.isdigit() or "a" <= x.lower() <= "f" for x in xs) |
| ) |
| |
| |
| def check_output(command: List[str], cwd: Union[Path, str]) -> str: |
| """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" |
| result = subprocess.run( |
| command, |
| cwd=cwd, |
| check=True, |
| stdin=subprocess.DEVNULL, |
| stdout=subprocess.PIPE, |
| encoding="utf-8", |
| ) |
| return result.stdout |
| |
| |
| def translate_prebase_sha_to_rev_number( |
| llvm_config: LLVMConfig, sha: str |
| ) -> int: |
| """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). |
| |
| This function assumes that the given SHA is an ancestor of |base_llvm_sha|. |
| """ |
| commit_message = check_output( |
| ["git", "log", "-n1", "--format=%B", sha, "--"], |
| cwd=llvm_config.dir, |
| ) |
| last_line = commit_message.strip().splitlines()[-1] |
| svn_match = re.match(r"^llvm-svn: (\d+)$", last_line) |
| |
| if not svn_match: |
| raise ValueError( |
| f"No llvm-svn line found for {sha}, which... shouldn't happen?" |
| ) |
| |
| return int(svn_match.group(1)) |
| |
| |
| def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: |
| """Translates a sha or git ref to a Rev.""" |
| |
| if is_git_sha(sha_or_ref): |
| sha = sha_or_ref |
| else: |
| sha = check_output( |
| ["git", "rev-parse", "--revs-only", sha_or_ref, "--"], |
| cwd=llvm_config.dir, |
| ) |
| sha = sha.strip() |
| |
| for base_rev, base_sha in reversed(known_llvm_rev_sha_pairs): |
| merge_base = check_output( |
| ["git", "merge-base", base_sha, sha, "--"], |
| cwd=llvm_config.dir, |
| ) |
| merge_base = merge_base.strip() |
| if merge_base == base_sha: |
| result = check_output( |
| [ |
| "git", |
| "rev-list", |
| "--count", |
| "--first-parent", |
| f"{base_sha}..{sha}", |
| "--", |
| ], |
| cwd=llvm_config.dir, |
| ) |
| count = int(result.strip()) |
| return Rev(branch=MAIN_BRANCH, number=count + base_rev) |
| |
| # Otherwise, either: |
| # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) |
| # - |merge_base| is neither (we have a guaranteed llvm-svn number on |
| # |merge_base|, but not |sha|) |
| merge_base_number = translate_prebase_sha_to_rev_number( |
| llvm_config, merge_base |
| ) |
| if merge_base == sha: |
| return Rev(branch=MAIN_BRANCH, number=merge_base_number) |
| |
| distance_from_base = check_output( |
| [ |
| "git", |
| "rev-list", |
| "--count", |
| "--first-parent", |
| f"{merge_base}..{sha}", |
| "--", |
| ], |
| cwd=llvm_config.dir, |
| ) |
| |
| revision_number = merge_base_number + int(distance_from_base.strip()) |
| branches_containing = check_output( |
| ["git", "branch", "-r", "--contains", sha], |
| cwd=llvm_config.dir, |
| ) |
| |
| candidates = [] |
| |
| prefix = llvm_config.remote + "/" |
| for branch in branches_containing.splitlines(): |
| branch = branch.strip() |
| if branch.startswith(prefix): |
| candidates.append(branch[len(prefix) :]) |
| |
| if not candidates: |
| raise ValueError( |
| f"No viable branches found from {llvm_config.remote} with {sha}" |
| ) |
| |
| # It seems that some `origin/release/.*` branches have |
| # `origin/upstream/release/.*` equivalents, which is... awkward to deal |
| # with. Prefer the latter, since that seems to have newer commits than the |
| # former. Technically n^2, but len(elements) should be like, tens in the |
| # worst case. |
| candidates = [x for x in candidates if f"upstream/{x}" not in candidates] |
| if len(candidates) != 1: |
| raise ValueError( |
| f"Ambiguity: multiple branches from {llvm_config.remote} have " |
| f"{sha}: {sorted(candidates)}" |
| ) |
| |
| return Rev(branch=candidates[0], number=revision_number) |
| |
| |
| def parse_git_commit_messages( |
| stream: Union[Iterable[str], IO[str]], separator: str |
| ) -> Iterable[Tuple[str, str]]: |
| """Parses a stream of git log messages. |
| |
| These are expected to be in the format: |
| |
| 40 character sha |
| commit |
| message |
| body |
| separator |
| 40 character sha |
| commit |
| message |
| body |
| separator |
| """ |
| |
| lines = iter(stream) |
| while True: |
| # Looks like a potential bug in pylint? crbug.com/1041148 |
| # pylint: disable=stop-iteration-return |
| sha = next(lines, None) |
| if sha is None: |
| return |
| |
| sha = sha.strip() |
| assert is_git_sha(sha), f"Invalid git SHA: {sha}" |
| |
| message = [] |
| for line in lines: |
| if line.strip() == separator: |
| break |
| message.append(line) |
| |
| yield sha, "".join(message) |
| |
| |
| def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: |
| """Translates a Rev to a SHA. |
| |
| This function assumes that the given rev refers to a commit that's an |
| ancestor of |base_llvm_sha|. |
| """ |
| # Because reverts may include reverted commit messages, we can't just |-n1| |
| # and pick that. |
| separator = ">!" * 80 |
| looking_for = f"llvm-svn: {rev.number}" |
| |
| git_command = [ |
| "git", |
| "log", |
| "--grep", |
| f"^{looking_for}$", |
| f"--format=%H%n%B{separator}", |
| base_llvm_sha, |
| ] |
| |
| with subprocess.Popen( |
| git_command, |
| cwd=llvm_config.dir, |
| stdin=subprocess.DEVNULL, |
| stdout=subprocess.PIPE, |
| encoding="utf-8", |
| ) as subp: |
| assert subp.stdout is not None |
| for sha, message in parse_git_commit_messages(subp.stdout, separator): |
| last_line = message.splitlines()[-1] |
| if last_line.strip() == looking_for: |
| subp.terminate() |
| return sha |
| if subp.wait() != 0: |
| raise subprocess.CalledProcessError(subp.returncode, git_command) |
| |
| raise ValueError(f"No commit with revision {rev} found") |
| |
| |
| def translate_rev_to_sha_from_baseline( |
| llvm_config: LLVMConfig, |
| parent_sha: str, |
| parent_rev: int, |
| child_sha: str, |
| child_rev: Optional[int], |
| want_rev: int, |
| branch_name: str, |
| ) -> str: |
| """Translates a revision number between a parent & child to a SHA. |
| |
| Args: |
| llvm_config: LLVM config to use. |
| parent_sha: SHA of the parent that the revision number is a child of. |
| parent_rev: Revision number of `parent_sha`. |
| child_sha: A child of `parent_sha` to find a rev on. |
| child_rev: Optional note of what the child's revision number is. |
| want_rev: The desired revision number between child and parent. |
| branch_name: Name of the branch to refer to if a ValueError is raised. |
| |
| Raises: |
| ValueError if the given child isn't far enough away from the parent to |
| find `want_rev`. |
| """ |
| # As a convenience, have a fast path for want_rev < parent_rev. In |
| # particular, branches can hit this case. |
| if want_rev < parent_rev: |
| baseline_git_sha = parent_sha |
| commits_behind_baseline = parent_rev - want_rev |
| else: |
| if child_rev is None: |
| commits_between_parent_and_child = check_output( |
| [ |
| "git", |
| "rev-list", |
| "--count", |
| "--first-parent", |
| f"{parent_sha}..{child_sha}", |
| "--", |
| ], |
| cwd=llvm_config.dir, |
| ) |
| child_rev = parent_rev + int( |
| commits_between_parent_and_child.strip() |
| ) |
| if child_rev < want_rev: |
| raise ValueError( |
| "Revision {want_rev} is past " |
| f"{llvm_config.remote}/{branch_name}. Try updating your tree?" |
| ) |
| baseline_git_sha = child_sha |
| commits_behind_baseline = child_rev - want_rev |
| |
| if not commits_behind_baseline: |
| return baseline_git_sha |
| |
| result = check_output( |
| [ |
| "git", |
| "rev-parse", |
| "--revs-only", |
| f"{baseline_git_sha}~{commits_behind_baseline}", |
| ], |
| cwd=llvm_config.dir, |
| ) |
| return result.strip() |
| |
| |
| def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: |
| """Translates a Rev to a SHA. |
| |
| Raises a ValueError if the given Rev doesn't exist in the given config. |
| """ |
| branch, number = rev |
| |
| branch_tip = check_output( |
| ["git", "rev-parse", "--revs-only", f"{llvm_config.remote}/{branch}"], |
| cwd=llvm_config.dir, |
| ).strip() |
| |
| if branch != MAIN_BRANCH: |
| main_merge_point = check_output( |
| [ |
| "git", |
| "merge-base", |
| f"{llvm_config.remote}/{MAIN_BRANCH}", |
| branch_tip, |
| ], |
| cwd=llvm_config.dir, |
| ) |
| main_merge_point = main_merge_point.strip() |
| main_rev = translate_sha_to_rev(llvm_config, main_merge_point) |
| return translate_rev_to_sha_from_baseline( |
| llvm_config, |
| parent_sha=main_merge_point, |
| parent_rev=main_rev.number, |
| child_sha=branch_tip, |
| child_rev=None, |
| want_rev=number, |
| branch_name=branch, |
| ) |
| |
| if number < base_llvm_revision: |
| return translate_prebase_rev_to_sha(llvm_config, rev) |
| |
| # Technically this could be a binary search, but the list has fewer than 10 |
| # elems, and won't grow fast. Linear is easier. |
| last_cached_rev = None |
| last_cached_sha = branch_tip |
| for cached_rev, cached_sha in reversed(known_llvm_rev_sha_pairs): |
| if cached_rev == number: |
| return cached_sha |
| |
| if cached_rev < number: |
| return translate_rev_to_sha_from_baseline( |
| llvm_config, |
| parent_sha=cached_sha, |
| parent_rev=cached_rev, |
| child_sha=last_cached_sha, |
| child_rev=last_cached_rev, |
| want_rev=number, |
| branch_name=branch, |
| ) |
| |
| last_cached_rev = cached_rev |
| last_cached_sha = cached_sha |
| |
| # This is only hit if `number >= base_llvm_revision` _and_ there's no |
| # coverage for `number` in `known_llvm_rev_sha_pairs`, which contains |
| # `base_llvm_revision`. |
| assert False, "Couldn't find a base SHA for a rev on main?" |
| |
| |
| def find_root_llvm_dir(root_dir: str = ".") -> str: |
| """Finds the root of an LLVM directory starting at |root_dir|. |
| |
| Raises a subprocess.CalledProcessError if no git directory is found. |
| """ |
| result = check_output( |
| ["git", "rev-parse", "--show-toplevel"], |
| cwd=root_dir, |
| ) |
| return result.strip() |
| |
| |
| def main(argv: List[str]) -> None: |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| "--llvm_dir", |
| help="LLVM directory to consult for git history, etc. Autodetected " |
| "if cwd is inside of an LLVM tree", |
| ) |
| parser.add_argument( |
| "--upstream", |
| default="origin", |
| help="LLVM upstream's remote name. Defaults to %(default)s.", |
| ) |
| sha_or_rev = parser.add_mutually_exclusive_group(required=True) |
| sha_or_rev.add_argument( |
| "--sha", help="A git SHA (or ref) to convert to a rev" |
| ) |
| sha_or_rev.add_argument("--rev", help="A rev to convert into a sha") |
| opts = parser.parse_args(argv) |
| |
| llvm_dir = opts.llvm_dir |
| if llvm_dir is None: |
| try: |
| llvm_dir = find_root_llvm_dir() |
| except subprocess.CalledProcessError: |
| parser.error( |
| "Couldn't autodetect an LLVM tree; please use --llvm_dir" |
| ) |
| |
| config = LLVMConfig( |
| remote=opts.upstream, |
| dir=opts.llvm_dir or find_root_llvm_dir(), |
| ) |
| |
| if opts.sha: |
| rev = translate_sha_to_rev(config, opts.sha) |
| print(rev) |
| else: |
| sha = translate_rev_to_sha(config, Rev.parse(opts.rev)) |
| print(sha) |
| |
| |
| if __name__ == "__main__": |
| main(sys.argv[1:]) |