blob: 14db9a06a614a401834d59b243bad7905ebc564e [file] [log] [blame] [edit]
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import pathlib
import re
import shutil
import subprocess
from typing import Dict, Optional, Tuple, Union
import synthtool
import synthtool.preconfig
from synthtool.log import logger
from synthtool import _tracked_paths, cache, metadata, shell
REPO_REGEX = (
r"(((https:\/\/)|(git@))github.com(:|\/))?(?P<owner>[^\/]+)\/(?P<name>[^\/]+)"
)
USE_SSH = os.environ.get("AUTOSYNTH_USE_SSH", False)
def make_repo_clone_url(repo: str) -> str:
"""Returns a fully-qualified repo URL on GitHub from a string containing
"owner/repo".
This returns an https URL by default, but will return an ssh URL if
AUTOSYNTH_USE_SSH is set.
"""
if USE_SSH:
return f"[email protected]:{repo}.git"
else:
return f"https://github.com/{repo}.git"
def _local_default_branch(path: pathlib.Path) -> Union[str, None]:
"""Helper method to infer the default branch.
Sorts the list of branches by committerdate (latest is last) and then
returns the later of master or main. The order of branches that are tied
by committerdate is undefined.
Arguments:
path {pathlib.Path} - Path to the local git clone
Returns:
string -- The inferred default branch.
"""
branches = (
subprocess.check_output(
["git", "branch", "--sort=-committerdate", "--format=%(refname:short)"],
cwd=str(path),
)
.decode("utf-8")
.splitlines()
)
for branch in branches:
if branch == "master" or branch == "main":
return branch
return None
def clone(
url: str,
dest: pathlib.Path = None,
committish: str = None,
force: bool = False,
) -> pathlib.Path:
"""Clones a remote git repo.
Will not actually clone the repo if it's already local via two ways:
1. It's in the cache (the default destitination).
2. It was supplied via the preconfig file.
Arguments:
url {str} -- Url pointing to remote git repo.
Keyword Arguments:
dest {pathlib.Path} -- Local folder where repo should be cloned. (default: {None})
committish {str} -- The commit hash to check out. (default: {None})
force {bool} -- Wipe out and reclone if it already exists it the cache. (default: {False})
Returns:
pathlib.Path -- Local directory where the repo was cloned.
"""
preclone = get_preclone(url)
if preclone:
logger.debug(f"Using precloned repo {preclone}")
dest = pathlib.Path(preclone)
else:
if dest is None:
dest = cache.get_cache_dir()
dest = dest / pathlib.Path(url).stem
if force and dest.exists():
shutil.rmtree(dest)
default_branch = None
if not dest.exists():
cmd = ["git", "clone", "--recurse-submodules", "--single-branch", url, dest]
shell.run(cmd, check=True)
else:
default_branch = _local_default_branch(dest)
shell.run(["git", "checkout", default_branch], cwd=str(dest), check=True)
shell.run(["git", "pull"], cwd=str(dest), check=True)
committish = committish or default_branch
if committish:
shell.run(["git", "reset", "--hard", committish], cwd=str(dest))
# track all git repositories
_tracked_paths.add(dest)
# add repo to metadata
sha, message = get_latest_commit(dest)
commit_metadata = extract_commit_message_metadata(message)
metadata.add_git_source(
name=dest.name,
remote=url,
sha=sha,
internal_ref=commit_metadata.get("PiperOrigin-RevId"),
local_path=str(dest),
)
return dest
def parse_repo_url(url: str) -> Dict[str, str]:
"""
Parses a GitHub url and returns a dict with:
owner - Owner of the repository
name - Name of the repository
The following are matchable:
googleapis/nodejs-vision(.git)?
[email protected]:GoogleCloudPlatform/google-cloud-python.git
https://github.com/GoogleCloudPlatform/google-cloud-python.git
"""
match = re.search(REPO_REGEX, url)
if not match:
raise RuntimeError("repository url is not a properly formatted git string.")
owner = match.group("owner")
name = match.group("name")
if name.endswith(".git"):
name = name[:-4]
return {"owner": owner, "name": name}
def get_latest_commit(repo: pathlib.Path = None) -> Tuple[str, str]:
"""Return the sha and commit message of the latest commit."""
output = subprocess.check_output(
["git", "log", "-1", "--pretty=%H%n%B"], cwd=repo
).decode("utf-8")
commit, message = output.split("\n", 1)
return commit, message
def extract_commit_message_metadata(message: str) -> Dict[str, str]:
"""Extract extended metadata stored in the Git commit message.
For example, a commit that looks like this::
Do the thing!
Piper-Changelog: 1234567
Will return::
{"Piper-Changelog": "1234567"}
"""
metadata = {}
for line in message.splitlines():
if ":" not in line:
continue
key, value = line.split(":", 1)
metadata[key] = value.strip()
return metadata
def get_preclone(url: str) -> Optional[str]:
"""Finds a pre-cloned git repo in the preclone map."""
preconfig = synthtool.preconfig.load()
return preconfig.precloned_repos.get(url)