| from collections import namedtuple |
| from pathlib import Path |
| import locale |
| import subprocess |
| import re |
| import requests |
| import os |
| import json |
| |
| categories = [ |
| 'Uncategorized', |
| 'distributed', |
| 'lazy', |
| 'hub', |
| 'mobile', |
| 'jit', |
| 'visualization', |
| 'onnx', |
| 'caffe2', |
| 'quantization', |
| 'amd', |
| 'rocm', |
| 'cuda', |
| 'cudnn', |
| 'benchmark', |
| 'profiler', |
| 'performance_as_product', |
| 'package', |
| 'dispatcher', |
| 'releng', |
| 'fx', |
| 'code_coverage', |
| 'vulkan', |
| 'skip', |
| 'composability', |
| 'meta_frontend', |
| 'nn_frontend', |
| 'linalg_frontend', |
| 'cpp_frontend', |
| 'python_frontend', |
| 'complex_frontend', |
| 'vmap_frontend', |
| 'autograd_frontend', |
| 'build_frontend', |
| 'memory_format_frontend', |
| 'foreach_frontend', |
| 'dataloader_frontend', |
| 'sparse_frontend' |
| ] |
| |
| topics = [ |
| 'bc_breaking', |
| 'deprecations', |
| 'new_features', |
| 'improvements', |
| 'bug_fixes', |
| 'performance', |
| 'docs', |
| 'devs', |
| 'Untopiced', |
| "not user facing", |
| "security", |
| ] |
| |
| |
| Features = namedtuple('Features', [ |
| 'title', |
| 'body', |
| 'pr_number', |
| 'files_changed', |
| 'labels', |
| 'author', |
| 'accepters' |
| ]) |
| |
| |
| def dict_to_features(dct): |
| return Features( |
| title=dct['title'], |
| body=dct['body'], |
| pr_number=dct['pr_number'], |
| files_changed=dct['files_changed'], |
| labels=dct['labels'], |
| author=dct['author'], |
| accepters=tuple(dct['accepters'])) |
| |
| |
| def features_to_dict(features): |
| return dict(features._asdict()) |
| |
| |
| def run(command): |
| """Returns (return-code, stdout, stderr)""" |
| p = subprocess.Popen(command, stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE, shell=True) |
| output, err = p.communicate() |
| rc = p.returncode |
| enc = locale.getpreferredencoding() |
| output = output.decode(enc) |
| err = err.decode(enc) |
| return rc, output.strip(), err.strip() |
| |
| |
| def commit_body(commit_hash): |
| cmd = f'git log -n 1 --pretty=format:%b {commit_hash}' |
| ret, out, err = run(cmd) |
| return out if ret == 0 else None |
| |
| |
| def commit_title(commit_hash): |
| cmd = f'git log -n 1 --pretty=format:%s {commit_hash}' |
| ret, out, err = run(cmd) |
| return out if ret == 0 else None |
| |
| |
| def commit_files_changed(commit_hash): |
| cmd = f'git diff-tree --no-commit-id --name-only -r {commit_hash}' |
| ret, out, err = run(cmd) |
| return out.split('\n') if ret == 0 else None |
| |
| |
| def parse_pr_number(body, commit_hash, title): |
| regex = r'Pull Request resolved: https://github.com/pytorch/pytorch/pull/([0-9]+)' |
| matches = re.findall(regex, body) |
| if len(matches) == 0: |
| if 'revert' not in title.lower() and 'updating submodules' not in title.lower(): |
| print(f'[{commit_hash}: {title}] Could not parse PR number, ignoring PR') |
| return None |
| if len(matches) > 1: |
| print(f'[{commit_hash}: {title}] Got two PR numbers, using the first one') |
| return matches[0] |
| return matches[0] |
| |
| |
| def get_ghstack_token(): |
| pattern = 'github_oauth = (.*)' |
| with open(Path('~/.ghstackrc').expanduser(), 'r+') as f: |
| config = f.read() |
| matches = re.findall(pattern, config) |
| if len(matches) == 0: |
| raise RuntimeError("Can't find a github oauth token") |
| return matches[0] |
| |
| token = get_ghstack_token() |
| headers = {"Authorization": f"token {token}"} |
| |
| def run_query(query): |
| request = requests.post('https://api.github.com/graphql', json={'query': query}, headers=headers) |
| if request.status_code == 200: |
| return request.json() |
| else: |
| raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query)) |
| |
| |
| def github_data(pr_number): |
| query = """ |
| { |
| repository(owner: "pytorch", name: "pytorch") { |
| pullRequest(number: %s ) { |
| author { |
| login |
| } |
| reviews(last: 5, states: APPROVED) { |
| nodes { |
| author { |
| login |
| } |
| } |
| } |
| labels(first: 10) { |
| edges { |
| node { |
| name |
| } |
| } |
| } |
| } |
| } |
| } |
| """ % pr_number |
| query = run_query(query) |
| |
| edges = query['data']['repository']['pullRequest']['labels']['edges'] |
| labels = [edge['node']['name'] for edge in edges] |
| author = query['data']['repository']['pullRequest']['author']['login'] |
| nodes = query['data']['repository']['pullRequest']['reviews']['nodes'] |
| |
| # using set to dedup multiple accepts from same accepter |
| accepters = {node["author"]["login"] for node in nodes} |
| accepters = tuple(sorted(accepters)) |
| |
| return labels, author, accepters |
| |
| |
| def get_features(commit_hash): |
| title, body, files_changed = ( |
| commit_title(commit_hash), |
| commit_body(commit_hash), |
| commit_files_changed(commit_hash)) |
| pr_number = parse_pr_number(body, commit_hash, title) |
| labels = [] |
| author = "" |
| accepters = tuple() |
| if pr_number is not None: |
| labels, author, accepters = github_data(pr_number) |
| result = Features(title, body, pr_number, files_changed, labels, author, accepters) |
| return result |
| |
| |
| _commit_data_cache = None |
| |
| def get_commit_data_cache(path='results/data.json'): |
| global _commit_data_cache |
| if _commit_data_cache is None: |
| _commit_data_cache = _CommitDataCache(path) |
| return _commit_data_cache |
| |
| class _CommitDataCache: |
| def __init__(self, path): |
| self.path = path |
| self.data = {} |
| if os.path.exists(path): |
| self.data = self.read_from_disk() |
| else: |
| os.makedirs(Path(path).parent, exist_ok=True) |
| |
| def get(self, commit): |
| if commit not in self.data.keys(): |
| # Fetch and cache the data |
| self.data[commit] = get_features(commit) |
| self.write_to_disk() |
| return self.data[commit] |
| |
| def read_from_disk(self): |
| with open(self.path, 'r') as f: |
| data = json.load(f) |
| data = {commit: dict_to_features(dct) |
| for commit, dct in data.items()} |
| return data |
| |
| def write_to_disk(self): |
| data = {commit: features._asdict() for commit, features in self.data.items()} |
| with open(self.path, 'w') as f: |
| json.dump(data, f) |