tools/linter/translate_annotations.py - platform/external/pytorch - Git at Google

 #!/usr/bin/env python3

 import argparse
 import json
 import re
 import subprocess
 from bisect import bisect_right
 from collections import defaultdict
 from typing import (
     Callable,
     DefaultDict,
     Generic,
     List,
     Optional,
     Pattern,
     Sequence,
     TypeVar,
     cast,
 )

 from typing_extensions import TypedDict


 class Hunk(TypedDict):
     old_start: int
     old_count: int
     new_start: int
     new_count: int


 class Diff(TypedDict):
     old_filename: Optional[str]
     hunks: List[Hunk]


 # @@ -start,count +start,count @@
 hunk_pattern = r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@"


 def parse_diff(diff: str) -> Diff:
     name = None
     name_found = False
     hunks: List[Hunk] = []
     for line in diff.splitlines():
         hunk_match = re.match(hunk_pattern, line)
         if name_found:
             if hunk_match:
                 old_start, old_count, new_start, new_count = hunk_match.groups()
                 hunks.append(
                     {
                         "old_start": int(old_start),
                         "old_count": int(old_count or "1"),
                         "new_start": int(new_start),
                         "new_count": int(new_count or "1"),
                     }
                 )
         else:
             assert not hunk_match
             name_match = re.match(r"^--- (?:(?:/dev/null)|(?:a/(.*)))$", line)
             if name_match:
                 name_found = True
                 (name,) = name_match.groups()
     return {
         "old_filename": name,
         "hunks": hunks,
     }


 T = TypeVar("T")
 U = TypeVar("U")


 # we want to use bisect.bisect_right to find the closest hunk to a given
 # line number, but the bisect module won't have a key function until
 # Python 3.10 https://github.com/python/cpython/pull/20556 so we make an
 # O(1) wrapper around the list of hunks that makes it pretend to just be
 # a list of line numbers
 # https://gist.github.com/ericremoreynolds/2d80300dabc70eebc790
 class KeyifyList(Generic[T, U]):
     def __init__(self, inner: List[T], key: Callable[[T], U]) -> None:
         self.inner = inner
         self.key = key

     def __len__(self) -> int:
         return len(self.inner)

     def __getitem__(self, k: int) -> U:
         return self.key(self.inner[k])


 def translate(diff: Diff, line_number: int) -> Optional[int]:
     if line_number < 1:
         return None

     hunks = diff["hunks"]
     if not hunks:
         return line_number

     keyified = KeyifyList(
         hunks, lambda hunk: hunk["new_start"] + (0 if hunk["new_count"] > 0 else 1)
     )
     i = bisect_right(cast(Sequence[int], keyified), line_number)
     if i < 1:
         return line_number

     hunk = hunks[i - 1]
     d = line_number - (hunk["new_start"] + (hunk["new_count"] or 1))
     return None if d < 0 else hunk["old_start"] + (hunk["old_count"] or 1) + d


 # we use camelCase here because this will be output as JSON and so the
 # field names need to match the group names from here:
 # https://github.com/pytorch/add-annotations-github-action/blob/3ab7d7345209f5299d53303f7aaca7d3bc09e250/action.yml#L23
 class Annotation(TypedDict):
     filename: str
     lineNumber: int
     columnNumber: int
     errorCode: str
     errorDesc: str


 def parse_annotation(regex: Pattern[str], line: str) -> Optional[Annotation]:
     m = re.match(regex, line)
     if m:
         try:
             line_number = int(m.group("lineNumber"))
             column_number = int(m.group("columnNumber"))
         except ValueError:
             return None
         return {
             "filename": m.group("filename"),
             "lineNumber": line_number,
             "columnNumber": column_number,
             "errorCode": m.group("errorCode"),
             "errorDesc": m.group("errorDesc"),
         }
     else:
         return None


 def translate_all(
     *, lines: List[str], regex: Pattern[str], commit: str
 ) -> List[Annotation]:
     ann_dict: DefaultDict[str, List[Annotation]] = defaultdict(list)
     for line in lines:
         annotation = parse_annotation(regex, line)
         if annotation is not None:
             ann_dict[annotation["filename"]].append(annotation)
     ann_list = []
     for filename, annotations in ann_dict.items():
         raw_diff = subprocess.check_output(
             ["git", "diff-index", "--unified=0", commit, filename],
             encoding="utf-8",
         )
         diff = parse_diff(raw_diff) if raw_diff.strip() else None
         # if there is a diff but it doesn't list an old filename, that
         # means the file is absent in the commit we're targeting, so we
         # skip it
         if not (diff and not diff["old_filename"]):
             for annotation in annotations:
                 line_number: Optional[int] = annotation["lineNumber"]
                 if diff:
                     annotation["filename"] = cast(str, diff["old_filename"])
                     line_number = translate(diff, cast(int, line_number))
                 if line_number:
                     annotation["lineNumber"] = line_number
                     ann_list.append(annotation)
     return ann_list


 def main() -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--file")
     parser.add_argument("--regex")
     parser.add_argument("--commit")
     args = parser.parse_args()
     with open(args.file, "r") as f:
         lines = f.readlines()
     print(json.dumps(translate_all(lines=lines, regex=args.regex, commit=args.commit)))


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3

	import argparse
	import json
	import re
	import subprocess
	from bisect import bisect_right
	from collections import defaultdict
	from typing import (
	Callable,
	DefaultDict,
	Generic,
	List,
	Optional,
	Pattern,
	Sequence,
	TypeVar,
	cast,
	)

	from typing_extensions import TypedDict


	class Hunk(TypedDict):
	old_start: int
	old_count: int
	new_start: int
	new_count: int


	class Diff(TypedDict):
	old_filename: Optional[str]
	hunks: List[Hunk]


	# @@ -start,count +start,count @@
	hunk_pattern = r"^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@"


	def parse_diff(diff: str) -> Diff:
	name = None
	name_found = False
	hunks: List[Hunk] = []
	for line in diff.splitlines():
	hunk_match = re.match(hunk_pattern, line)
	if name_found:
	if hunk_match:
	old_start, old_count, new_start, new_count = hunk_match.groups()
	hunks.append(
	{
	"old_start": int(old_start),
	"old_count": int(old_count or "1"),
	"new_start": int(new_start),
	"new_count": int(new_count or "1"),
	}
	)
	else:
	assert not hunk_match
	name_match = re.match(r"^--- (?:(?:/dev/null)\|(?:a/(.*)))$", line)
	if name_match:
	name_found = True
	(name,) = name_match.groups()
	return {
	"old_filename": name,
	"hunks": hunks,
	}


	T = TypeVar("T")
	U = TypeVar("U")


	# we want to use bisect.bisect_right to find the closest hunk to a given
	# line number, but the bisect module won't have a key function until
	# Python 3.10 https://github.com/python/cpython/pull/20556 so we make an
	# O(1) wrapper around the list of hunks that makes it pretend to just be
	# a list of line numbers
	# https://gist.github.com/ericremoreynolds/2d80300dabc70eebc790
	class KeyifyList(Generic[T, U]):
	def __init__(self, inner: List[T], key: Callable[[T], U]) -> None:
	self.inner = inner
	self.key = key

	def __len__(self) -> int:
	return len(self.inner)

	def __getitem__(self, k: int) -> U:
	return self.key(self.inner[k])


	def translate(diff: Diff, line_number: int) -> Optional[int]:
	if line_number < 1:
	return None

	hunks = diff["hunks"]
	if not hunks:
	return line_number

	keyified = KeyifyList(
	hunks, lambda hunk: hunk["new_start"] + (0 if hunk["new_count"] > 0 else 1)
	)
	i = bisect_right(cast(Sequence[int], keyified), line_number)
	if i < 1:
	return line_number

	hunk = hunks[i - 1]
	d = line_number - (hunk["new_start"] + (hunk["new_count"] or 1))
	return None if d < 0 else hunk["old_start"] + (hunk["old_count"] or 1) + d


	# we use camelCase here because this will be output as JSON and so the
	# field names need to match the group names from here:
	# https://github.com/pytorch/add-annotations-github-action/blob/3ab7d7345209f5299d53303f7aaca7d3bc09e250/action.yml#L23
	class Annotation(TypedDict):
	filename: str
	lineNumber: int
	columnNumber: int
	errorCode: str
	errorDesc: str


	def parse_annotation(regex: Pattern[str], line: str) -> Optional[Annotation]:
	m = re.match(regex, line)
	if m:
	try:
	line_number = int(m.group("lineNumber"))
	column_number = int(m.group("columnNumber"))
	except ValueError:
	return None
	return {
	"filename": m.group("filename"),
	"lineNumber": line_number,
	"columnNumber": column_number,
	"errorCode": m.group("errorCode"),
	"errorDesc": m.group("errorDesc"),
	}
	else:
	return None


	def translate_all(
	*, lines: List[str], regex: Pattern[str], commit: str
	) -> List[Annotation]:
	ann_dict: DefaultDict[str, List[Annotation]] = defaultdict(list)
	for line in lines:
	annotation = parse_annotation(regex, line)
	if annotation is not None:
	ann_dict[annotation["filename"]].append(annotation)
	ann_list = []
	for filename, annotations in ann_dict.items():
	raw_diff = subprocess.check_output(
	["git", "diff-index", "--unified=0", commit, filename],
	encoding="utf-8",
	)
	diff = parse_diff(raw_diff) if raw_diff.strip() else None
	# if there is a diff but it doesn't list an old filename, that
	# means the file is absent in the commit we're targeting, so we
	# skip it
	if not (diff and not diff["old_filename"]):
	for annotation in annotations:
	line_number: Optional[int] = annotation["lineNumber"]
	if diff:
	annotation["filename"] = cast(str, diff["old_filename"])
	line_number = translate(diff, cast(int, line_number))
	if line_number:
	annotation["lineNumber"] = line_number
	ann_list.append(annotation)
	return ann_list


	def main() -> None:
	parser = argparse.ArgumentParser()
	parser.add_argument("--file")
	parser.add_argument("--regex")
	parser.add_argument("--commit")
	args = parser.parse_args()
	with open(args.file, "r") as f:
	lines = f.readlines()
	print(json.dumps(translate_all(lines=lines, regex=args.regex, commit=args.commit)))


	if __name__ == "__main__":
	main()