llvm_tools/werror_logs.py - platform/external/toolchain-utils - Git at Google

 #!/usr/bin/env python3
 # Copyright 2024 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Helps reason about -Werror logs emitted by the compiler wrapper.

 Specifically, this works with the -Werror reports produced by the compiler
 wrapper in FORCE_DISABLE_WERROR mode. It's intended to be run on trees of these
 reports, so devs can run roughly the following commands:

 $ apply_force_disable_werror  # (There's no actual script to do this today.)
 $ build_packages --board=foo --nousepkg
 $ ./werror_logs.py aggregate --directory=/build/foo/var/lib/chromeos

 And see a full aggregation of all warnings that were suppressed in that
 `build_packages` invocation.

 It can also be used to fetch warnings reports from CQ runs, for instance,
 $ ./werror_logs.py fetch-cq --cq-orchestrator-id=123456

 In this case, it downloads _all -Werror logs_ from children of the given
 cq-orchestrator, and prints the parent directory of all of these reports. If
 you run `aggregate` on this directory, it's highly recommended to use the
 `--canonicalize-board-roots` flag.
 """

 import argparse
 import collections
 import dataclasses
 import json
 import logging
 import multiprocessing.pool
 import os
 from pathlib import Path
 import re
 import shutil
 import subprocess
 import sys
 import tempfile
 import threading
 from typing import Any, Counter, DefaultDict, Dict, IO, Iterable, List, Optional

 import cros_cls


 _DEFAULT_FETCH_DIRECTORY = Path("/tmp/werror_logs")


 def canonicalize_file_path_board_root(file_path: str) -> str:
     # Get rid of double slashes, unnecessary directory traversal
     # (foo/../bar/..), etc. Easier to read this way.
     file_path = os.path.normpath(file_path)
     if file_path.startswith("/build/"):
         i = file_path.find("/", len("/build/"))
         if i != -1:
             return f"/build/{{board}}/{file_path[i+1:]}"
     return file_path


 @dataclasses.dataclass(frozen=True, eq=True, order=True)
 class ClangWarningLocation:
     """Represents a location at which a Clang warning was emitted."""

     file: str
     line: int
     column: int

     @classmethod
     def parse(
         cls, location: str, canonicalize_board_root: bool = False
     ) -> "ClangWarningLocation":
         split = location.rsplit(":", 2)
         if len(split) == 3:
             file = split[0]
             if canonicalize_board_root:
                 file = canonicalize_file_path_board_root(file)
             return cls(file=file, line=int(split[1]), column=int(split[2]))
         raise ValueError(f"Invalid location: {location!r}")


 @dataclasses.dataclass(frozen=True, eq=True)
 class ClangWarning:
     """Represents a Clang warning at a specific location (if applicable)."""

     # The name of the warning, e.g., -Wunused-variable
     name: str
     # The message of the warning, e.g., "'allocate' is deprecated."
     message: str
     # The location of this warning. Not present for frontend diagnostics.
     location: Optional[ClangWarningLocation]

     # This parses two kinds of errors:
     # 1. `clang-17: error: foo [-W...]`
     # 2. `/file/path:123:45: error: foo [-W...]"
     _WARNING_RE = re.compile(
         # Capture the location on its own, since `clang-\d+` is unused below.
         r"^(?:([^:]*:\d+:\d+)|clang-\d+)"
         r": error: "
         # Capture the message
         r"(.*?)\s+"
         r"\[(-W[^\][]+)]\s*$"
     )

     @classmethod
     def try_parse_line(
         cls, line: str, canonicalize_board_root: bool = False
     ) -> Optional["ClangWarning"]:
         # Fast path: we can expect "error: " in interesting lines. Break early
         # if that's not present.
         if "error: " not in line:
             return None

         m = cls._WARNING_RE.fullmatch(line)
         if not m:
             return None

         location, message, warning_flags = m.groups()
         individual_warning_flags = [
             x for x in warning_flags.split(",") if x != "-Werror"
         ]

         # This isn't impossible to handle in theory, just unexpected. Complain
         # about it.
         if len(individual_warning_flags) != 1:
             raise ValueError(
                 f"Weird: parsed warnings {individual_warning_flags} out "
                 f"of {line}"
             )

         if location is None:
             parsed_location = None
         else:
             parsed_location = ClangWarningLocation.parse(
                 location, canonicalize_board_root
             )
         return cls(
             name=individual_warning_flags[0],
             message=message,
             location=parsed_location,
         )


 @dataclasses.dataclass(frozen=True, eq=True)
 class WarningInfo:
     """Carries information about a ClangWarning."""

     packages: DefaultDict[str, int] = dataclasses.field(
         default_factory=lambda: collections.defaultdict(int)
     )


 class UnknownPackageNameError(ValueError):
     """Raised when a package name can't be determined from a warning report."""


 @dataclasses.dataclass
 class AggregatedWarnings:
     """Aggregates warning reports incrementally."""

     num_reports: int = 0
     # Mapping of warning -> list of packages that emitted it. Warnings in
     # headers may be referred to by multiple packages.
     warnings: DefaultDict[ClangWarning, WarningInfo] = dataclasses.field(
         default_factory=lambda: collections.defaultdict(WarningInfo)
     )

     _CWD_PACKAGE_RE = re.compile(
         r"^(?:/build/[^/]+)?/var/(?:cache|tmp)/portage/([^/]+/[^/]+)/"
     )

     @classmethod
     def _guess_package_name(cls, report: Dict[str, Any]) -> str:
         """Tries to guess what package `report` is from.

         Raises:
             UnknownPackageNameError if the package's name couldn't be
             determined.
         """
         m = cls._CWD_PACKAGE_RE.match(report.get("cwd", ""))
         if not m:
             raise UnknownPackageNameError()
         return m.group(1)

     def add_report_json(
         self, report_json: Dict[str, Any], canonicalize_board_root: bool = False
     ) -> int:
         """Adds the given report, returning the number of warnings parsed.

         Raises:
             UnknownPackageNameError if the package's name couldn't be
             determined.
         """
         self.num_reports += 1
         package_name = self._guess_package_name(report_json)

         num_warnings = 0
         for line in report_json.get("stdout", "").splitlines():
             if parsed := ClangWarning.try_parse_line(
                 line, canonicalize_board_root
             ):
                 self.warnings[parsed].packages[package_name] += 1
                 num_warnings += 1

         return num_warnings

     def add_report(
         self, report_file: Path, canonicalize_board_root: bool = False
     ) -> None:
         with report_file.open(encoding="utf-8") as f:
             report = json.load(f)

         try:
             n = self.add_report_json(report, canonicalize_board_root)
         except UnknownPackageNameError:
             logging.warning(
                 "Failed guessing package name for report at %r; ignoring file",
                 report_file,
             )
             return

         if not n:
             logging.warning(
                 "Report at %r had no parseable warnings", report_file
             )


 def print_aligned_counts(
     name_count_map: Dict[str, int], file: Optional[IO[str]] = None
 ) -> None:
     assert name_count_map
     # Sort on value, highest first. Name breaks ties.
     summary = sorted(name_count_map.items(), key=lambda x: (-x[1], x[0]))
     num_col_width = len(f"{summary[0][1]:,}")
     name_col_width = max(len(x) for x in name_count_map)
     for name, count in summary:
         fmt_name = name.rjust(name_col_width)
         fmt_count = f"{count:,}".rjust(num_col_width)
         print(f"\t{fmt_name}: {fmt_count}", file=file)


 def summarize_per_package_warnings(
     warning_infos: Iterable[WarningInfo],
     file: Optional[IO[str]] = None,
 ) -> None:
     warnings_per_package: DefaultDict[str, int] = collections.defaultdict(int)
     for info in warning_infos:
         for package_name, warning_count in info.packages.items():
             warnings_per_package[package_name] += warning_count

     if not warnings_per_package:
         return

     print("## Per-package warning counts:", file=file)
     print_aligned_counts(warnings_per_package, file=file)


 def summarize_warnings_by_flag(
     warnings: Dict[ClangWarning, WarningInfo],
     file: Optional[IO[str]] = None,
 ) -> None:
     if not warnings:
         return

     warnings_per_flag: Counter[str] = collections.Counter()
     for warning, info in warnings.items():
         warnings_per_flag[warning.name] += sum(info.packages.values())

     print("## Instances of each fatal warning:", file=file)
     print_aligned_counts(warnings_per_flag, file=file)


 def aggregate_reports(opts: argparse.Namespace) -> None:
     directory = opts.directory
     aggregated = AggregatedWarnings()
     for report in directory.glob("**/warnings_report*.json"):
         logging.debug("Discovered report %s", report)
         aggregated.add_report(report, opts.canonicalize_board_roots)

     if not aggregated.num_reports:
         raise ValueError(f"Found no warnings report under {directory}")

     logging.info("Discovered %d report files in total", aggregated.num_reports)
     summarize_per_package_warnings(aggregated.warnings.values())
     summarize_warnings_by_flag(aggregated.warnings)


 def fetch_werror_tarball_links(
     child_builders: Dict[str, cros_cls.BuildID]
 ) -> List[str]:
     outputs = cros_cls.CQBoardBuilderOutput.fetch_many(child_builders.values())
     artifacts_links = []
     for builder_name, out in zip(child_builders, outputs):
         if out.artifacts_link:
             artifacts_links.append(out.artifacts_link)
         else:
             logging.info("%s had no output artifacts; ignoring", builder_name)

     gsutil_stdout = subprocess.run(
         ["gsutil", "-m", "ls"] + artifacts_links,
         check=True,
         encoding="utf-8",
         stdin=subprocess.DEVNULL,
         stdout=subprocess.PIPE,
     ).stdout

     return [
         x
         for x in gsutil_stdout.splitlines()
         if x.endswith(".fatal_clang_warnings.tar.xz")
     ]


 def cq_builder_name_from_werror_logs_path(werror_logs: str) -> str:
     """Returns the CQ builder given a -Werror logs path.

     >>> cq_builder_name_from_werror_logs_path(
             "gs://chromeos-image-archive/staryu-cq/"
             "R123-15771.0.0-94466-8756713501925941617/"
             "staryu.20240207.fatal_clang_warnings.tar.xz"
         )
     "staryu-cq"
     """
     return os.path.basename(os.path.dirname(os.path.dirname(werror_logs)))


 def download_and_unpack_werror_tarballs(
     unpack_dir: Path, download_dir: Path, gs_urls: List[str]
 ):
     # This is necessary below when we're untarring files. It should trivially
     # always be the case, and assuming it makes testing easier.
     assert download_dir.is_absolute(), download_dir

     unpack_dir.mkdir()
     download_dir.mkdir()

     logging.info(
         "Fetching and unpacking %d -Werror reports; this may take a bit",
         len(gs_urls),
     )
     # Run the download in a threadpool since we can have >100 logs, and all of
     # this is heavily I/O-bound.
     # Max 8 downloads at a time is arbitrary, but should minimize the chance of
     # rate-limiting. Don't limit `tar xaf`, since those should be short-lived.
     download_limiter = threading.BoundedSemaphore(8)

     def download_one_url(
         unpack_dir: Path, download_dir: Path, gs_url: str
     ) -> Optional[subprocess.CalledProcessError]:
         """Downloads and unpacks -Werror logs from the given gs_url.

         Leaves the tarball in `download_dir`, and the unpacked version in
         `unpack_dir`.

         Returns:
             None if all went well; otherwise, returns the command that failed.
             All commands have stderr data piped in.
         """
         file_targ = download_dir / os.path.basename(gs_url)
         try:
             with download_limiter:
                 subprocess.run(
                     ["gsutil", "cp", gs_url, file_targ],
                     check=True,
                     stdin=subprocess.DEVNULL,
                     stdout=subprocess.DEVNULL,
                     stderr=subprocess.PIPE,
                     encoding="utf-8",
                     errors="replace",
                 )

             # N.B., file_targ is absolute, so running with `file_targ` while
             # changing `cwd` is safe.
             subprocess.run(
                 ["tar", "xaf", file_targ],
                 check=True,
                 cwd=unpack_dir,
                 stdin=subprocess.DEVNULL,
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.PIPE,
                 encoding="utf-8",
                 errors="replace",
             )
         except subprocess.CalledProcessError as e:
             return e
         return None

     with multiprocessing.pool.ThreadPool() as thread_pool:
         download_futures = []
         for gs_url in gs_urls:
             name = cq_builder_name_from_werror_logs_path(gs_url)
             unpack_to = unpack_dir / name
             unpack_to.mkdir()
             download_to = download_dir / name
             download_to.mkdir()
             download_futures.append(
                 (
                     name,
                     thread_pool.apply_async(
                         download_one_url, (unpack_to, download_to, gs_url)
                     ),
                 )
             )

         num_failures = 0
         for name, future in download_futures:
             result = future.get()
             if not result:
                 continue

             num_failures += 1
             logging.error(
                 "Downloading %s failed: running %r. Stderr: %r",
                 name,
                 result.cmd,
                 result.stderr,
             )
     if num_failures:
         raise ValueError(f"{num_failures} download(s) failed.")


 def fetch_cq_reports(opts: argparse.Namespace) -> None:
     if opts.cl:
         logging.info(
             "Fetching most recent completed CQ orchestrator from %s", opts.cl
         )
         all_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl)
         if not all_ids:
             raise ValueError(
                 f"No CQ orchestrators found under {opts.cl}. See --help for "
                 "how to pass a build ID directly."
             )
         # Note that these cq-orchestrator runs are returned in oldest-to-newest
         # order. The user probably wants the newest run.
         cq_orchestrator_id = all_ids[-1]
         cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)
         logging.info("Checking CQ run %s", cq_orchestrator_url)
     else:
         cq_orchestrator_id = opts.cq_orchestrator_id
         cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)

     # This is the earliest point at which we can compute this directory with
     # certainty. Figure it out now and fail early if it exists.
     output_directory = opts.directory
     if not output_directory:
         output_directory = _DEFAULT_FETCH_DIRECTORY / str(cq_orchestrator_id)

     if output_directory.exists():
         if not opts.force:
             sys.exit(
                 f"Directory at {output_directory} exists; not overwriting. "
                 "Pass --force to overwrite."
             )
         # Actually _remove_ it when we have all logs unpacked and are able to
         # create the output directory with confidence.

     logging.info("Fetching info on child builders of %s", cq_orchestrator_url)
     child_builders = cros_cls.CQOrchestratorOutput.fetch(
         cq_orchestrator_id
     ).child_builders
     if not child_builders:
         raise ValueError(f"No child builders found for {cq_orchestrator_url}")

     logging.info(
         "%d child builders found; finding associated tarball links",
         len(child_builders),
     )
     werror_links = fetch_werror_tarball_links(child_builders)
     if not werror_links:
         raise ValueError(
             f"No -Werror logs found in children of {cq_orchestrator_url}"
         )

     logging.info("%d -Werror logs found", len(werror_links))
     with tempfile.TemporaryDirectory("werror_logs_fetch_cq") as t:
         tempdir = Path(t)
         unpack_dir = tempdir / "unpacked"
         download_and_unpack_werror_tarballs(
             unpack_dir=unpack_dir,
             download_dir=tempdir / "tarballs",
             gs_urls=werror_links,
         )

         if output_directory.exists():
             logging.info("Removing output directory at %s", output_directory)
             shutil.rmtree(output_directory)
         output_directory.parent.mkdir(parents=True, exist_ok=True)
         # (Convert these to strs to keep mypy happy.)
         shutil.move(str(unpack_dir), str(output_directory))
         logging.info(
             "CQ logs from %s stored in %s",
             cq_orchestrator_url,
             output_directory,
         )


 def main(argv: List[str]) -> None:
     parser = argparse.ArgumentParser(
         description=__doc__,
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
     parser.add_argument(
         "--debug", action="store_true", help="Enable debug logging"
     )
     subparsers = parser.add_subparsers(required=True)
     # b/318833638: While there's only one subparser here for the moment, more
     # are expected to come (specifically, one to download logs from a CQ run).
     aggregate = subparsers.add_parser(
         "aggregate",
         help="""
         Aggregate all -Werror reports beneath a directory. Note that this will
         traverse all children of the directory, so can be used either on
         unpacked -Werror reports from CQ builders, or can be used on e.g.,
         /build/cherry/var/lib/chromeos.
         """,
     )
     aggregate.set_defaults(func=aggregate_reports)
     aggregate.add_argument(
         "--canonicalize-board-roots",
         action="store_true",
         help="""
         Converts warnings paths starting with a board root (e.g., /build/atlas)
         to a form consistent across many boards.
         """,
     )
     aggregate.add_argument(
         "--directory", type=Path, required=True, help="Directory to inspect."
     )

     fetch_cq = subparsers.add_parser(
         "fetch-cq",
         help="Fetch all -Werror reports for a CQ run.",
     )
     fetch_cq.set_defaults(func=fetch_cq_reports)
     cl_or_cq_orchestrator = fetch_cq.add_mutually_exclusive_group(required=True)
     cl_or_cq_orchestrator.add_argument(
         "--cl",
         type=cros_cls.ChangeListURL.parse_with_patch_set,
         help="Link to a CL to get the most recent cq-orchestrator from",
     )
     cl_or_cq_orchestrator.add_argument(
         "--cq-orchestrator-id",
         type=cros_cls.BuildID,
         help="""
         Build number for a cq-orchestrator run. Builders invoked by this are
         examined for -Werror logs.
         """,
     )
     fetch_cq.add_argument(
         "--directory",
         type=Path,
         help=f"""
         Directory to put downloaded -Werror logs in. Default is a subdirectory
         of {_DEFAULT_FETCH_DIRECTORY}.
         """,
     )
     fetch_cq.add_argument(
         "-f",
         "--force",
         action="store_true",
         help="Remove the directory at `--directory` if it exists",
     )

     opts = parser.parse_args(argv)

     logging.basicConfig(
         format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
         "%(message)s",
         level=logging.DEBUG if opts.debug else logging.INFO,
     )

     assert getattr(opts, "func", None), "Unknown subcommand?"
     opts.func(opts)


 if __name__ == "__main__":
     main(sys.argv[1:])
	#!/usr/bin/env python3
	# Copyright 2024 The ChromiumOS Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Helps reason about -Werror logs emitted by the compiler wrapper.

	Specifically, this works with the -Werror reports produced by the compiler
	wrapper in FORCE_DISABLE_WERROR mode. It's intended to be run on trees of these
	reports, so devs can run roughly the following commands:

	$ apply_force_disable_werror # (There's no actual script to do this today.)
	$ build_packages --board=foo --nousepkg
	$ ./werror_logs.py aggregate --directory=/build/foo/var/lib/chromeos

	And see a full aggregation of all warnings that were suppressed in that
	`build_packages` invocation.

	It can also be used to fetch warnings reports from CQ runs, for instance,
	$ ./werror_logs.py fetch-cq --cq-orchestrator-id=123456

	In this case, it downloads _all -Werror logs_ from children of the given
	cq-orchestrator, and prints the parent directory of all of these reports. If
	you run `aggregate` on this directory, it's highly recommended to use the
	`--canonicalize-board-roots` flag.
	"""

	import argparse
	import collections
	import dataclasses
	import json
	import logging
	import multiprocessing.pool
	import os
	from pathlib import Path
	import re
	import shutil
	import subprocess
	import sys
	import tempfile
	import threading
	from typing import Any, Counter, DefaultDict, Dict, IO, Iterable, List, Optional

	import cros_cls


	_DEFAULT_FETCH_DIRECTORY = Path("/tmp/werror_logs")


	def canonicalize_file_path_board_root(file_path: str) -> str:
	# Get rid of double slashes, unnecessary directory traversal
	# (foo/../bar/..), etc. Easier to read this way.
	file_path = os.path.normpath(file_path)
	if file_path.startswith("/build/"):
	i = file_path.find("/", len("/build/"))
	if i != -1:
	return f"/build/{{board}}/{file_path[i+1:]}"
	return file_path


	@dataclasses.dataclass(frozen=True, eq=True, order=True)
	class ClangWarningLocation:
	"""Represents a location at which a Clang warning was emitted."""

	file: str
	line: int
	column: int

	@classmethod
	def parse(
	cls, location: str, canonicalize_board_root: bool = False
	) -> "ClangWarningLocation":
	split = location.rsplit(":", 2)
	if len(split) == 3:
	file = split[0]
	if canonicalize_board_root:
	file = canonicalize_file_path_board_root(file)
	return cls(file=file, line=int(split[1]), column=int(split[2]))
	raise ValueError(f"Invalid location: {location!r}")


	@dataclasses.dataclass(frozen=True, eq=True)
	class ClangWarning:
	"""Represents a Clang warning at a specific location (if applicable)."""

	# The name of the warning, e.g., -Wunused-variable
	name: str
	# The message of the warning, e.g., "'allocate' is deprecated."
	message: str
	# The location of this warning. Not present for frontend diagnostics.
	location: Optional[ClangWarningLocation]

	# This parses two kinds of errors:
	# 1. `clang-17: error: foo [-W...]`
	# 2. `/file/path:123:45: error: foo [-W...]"
	_WARNING_RE = re.compile(
	# Capture the location on its own, since `clang-\d+` is unused below.
	r"^(?:([^:]*:\d+:\d+)\|clang-\d+)"
	r": error: "
	# Capture the message
	r"(.*?)\s+"
	r"\[(-W[^\][]+)]\s*$"
	)

	@classmethod
	def try_parse_line(
	cls, line: str, canonicalize_board_root: bool = False
	) -> Optional["ClangWarning"]:
	# Fast path: we can expect "error: " in interesting lines. Break early
	# if that's not present.
	if "error: " not in line:
	return None

	m = cls._WARNING_RE.fullmatch(line)
	if not m:
	return None

	location, message, warning_flags = m.groups()
	individual_warning_flags = [
	x for x in warning_flags.split(",") if x != "-Werror"
	]

	# This isn't impossible to handle in theory, just unexpected. Complain
	# about it.
	if len(individual_warning_flags) != 1:
	raise ValueError(
	f"Weird: parsed warnings {individual_warning_flags} out "
	f"of {line}"
	)

	if location is None:
	parsed_location = None
	else:
	parsed_location = ClangWarningLocation.parse(
	location, canonicalize_board_root
	)
	return cls(
	name=individual_warning_flags[0],
	message=message,
	location=parsed_location,
	)


	@dataclasses.dataclass(frozen=True, eq=True)
	class WarningInfo:
	"""Carries information about a ClangWarning."""

	packages: DefaultDict[str, int] = dataclasses.field(
	default_factory=lambda: collections.defaultdict(int)
	)


	class UnknownPackageNameError(ValueError):
	"""Raised when a package name can't be determined from a warning report."""


	@dataclasses.dataclass
	class AggregatedWarnings:
	"""Aggregates warning reports incrementally."""

	num_reports: int = 0
	# Mapping of warning -> list of packages that emitted it. Warnings in
	# headers may be referred to by multiple packages.
	warnings: DefaultDict[ClangWarning, WarningInfo] = dataclasses.field(
	default_factory=lambda: collections.defaultdict(WarningInfo)
	)

	_CWD_PACKAGE_RE = re.compile(
	r"^(?:/build/[^/]+)?/var/(?:cache\|tmp)/portage/([^/]+/[^/]+)/"
	)

	@classmethod
	def _guess_package_name(cls, report: Dict[str, Any]) -> str:
	"""Tries to guess what package `report` is from.

	Raises:
	UnknownPackageNameError if the package's name couldn't be
	determined.
	"""
	m = cls._CWD_PACKAGE_RE.match(report.get("cwd", ""))
	if not m:
	raise UnknownPackageNameError()
	return m.group(1)

	def add_report_json(
	self, report_json: Dict[str, Any], canonicalize_board_root: bool = False
	) -> int:
	"""Adds the given report, returning the number of warnings parsed.

	Raises:
	UnknownPackageNameError if the package's name couldn't be
	determined.
	"""
	self.num_reports += 1
	package_name = self._guess_package_name(report_json)

	num_warnings = 0
	for line in report_json.get("stdout", "").splitlines():
	if parsed := ClangWarning.try_parse_line(
	line, canonicalize_board_root
	):
	self.warnings[parsed].packages[package_name] += 1
	num_warnings += 1

	return num_warnings

	def add_report(
	self, report_file: Path, canonicalize_board_root: bool = False
	) -> None:
	with report_file.open(encoding="utf-8") as f:
	report = json.load(f)

	try:
	n = self.add_report_json(report, canonicalize_board_root)
	except UnknownPackageNameError:
	logging.warning(
	"Failed guessing package name for report at %r; ignoring file",
	report_file,
	)
	return

	if not n:
	logging.warning(
	"Report at %r had no parseable warnings", report_file
	)


	def print_aligned_counts(
	name_count_map: Dict[str, int], file: Optional[IO[str]] = None
	) -> None:
	assert name_count_map
	# Sort on value, highest first. Name breaks ties.
	summary = sorted(name_count_map.items(), key=lambda x: (-x[1], x[0]))
	num_col_width = len(f"{summary[0][1]:,}")
	name_col_width = max(len(x) for x in name_count_map)
	for name, count in summary:
	fmt_name = name.rjust(name_col_width)
	fmt_count = f"{count:,}".rjust(num_col_width)
	print(f"\t{fmt_name}: {fmt_count}", file=file)


	def summarize_per_package_warnings(
	warning_infos: Iterable[WarningInfo],
	file: Optional[IO[str]] = None,
	) -> None:
	warnings_per_package: DefaultDict[str, int] = collections.defaultdict(int)
	for info in warning_infos:
	for package_name, warning_count in info.packages.items():
	warnings_per_package[package_name] += warning_count

	if not warnings_per_package:
	return

	print("## Per-package warning counts:", file=file)
	print_aligned_counts(warnings_per_package, file=file)


	def summarize_warnings_by_flag(
	warnings: Dict[ClangWarning, WarningInfo],
	file: Optional[IO[str]] = None,
	) -> None:
	if not warnings:
	return

	warnings_per_flag: Counter[str] = collections.Counter()
	for warning, info in warnings.items():
	warnings_per_flag[warning.name] += sum(info.packages.values())

	print("## Instances of each fatal warning:", file=file)
	print_aligned_counts(warnings_per_flag, file=file)


	def aggregate_reports(opts: argparse.Namespace) -> None:
	directory = opts.directory
	aggregated = AggregatedWarnings()
	for report in directory.glob("*/warnings_report.json"):
	logging.debug("Discovered report %s", report)
	aggregated.add_report(report, opts.canonicalize_board_roots)

	if not aggregated.num_reports:
	raise ValueError(f"Found no warnings report under {directory}")

	logging.info("Discovered %d report files in total", aggregated.num_reports)
	summarize_per_package_warnings(aggregated.warnings.values())
	summarize_warnings_by_flag(aggregated.warnings)


	def fetch_werror_tarball_links(
	child_builders: Dict[str, cros_cls.BuildID]
	) -> List[str]:
	outputs = cros_cls.CQBoardBuilderOutput.fetch_many(child_builders.values())
	artifacts_links = []
	for builder_name, out in zip(child_builders, outputs):
	if out.artifacts_link:
	artifacts_links.append(out.artifacts_link)
	else:
	logging.info("%s had no output artifacts; ignoring", builder_name)

	gsutil_stdout = subprocess.run(
	["gsutil", "-m", "ls"] + artifacts_links,
	check=True,
	encoding="utf-8",
	stdin=subprocess.DEVNULL,
	stdout=subprocess.PIPE,
	).stdout

	return [
	x
	for x in gsutil_stdout.splitlines()
	if x.endswith(".fatal_clang_warnings.tar.xz")
	]


	def cq_builder_name_from_werror_logs_path(werror_logs: str) -> str:
	"""Returns the CQ builder given a -Werror logs path.

	>>> cq_builder_name_from_werror_logs_path(
	"gs://chromeos-image-archive/staryu-cq/"
	"R123-15771.0.0-94466-8756713501925941617/"
	"staryu.20240207.fatal_clang_warnings.tar.xz"
	)
	"staryu-cq"
	"""
	return os.path.basename(os.path.dirname(os.path.dirname(werror_logs)))


	def download_and_unpack_werror_tarballs(
	unpack_dir: Path, download_dir: Path, gs_urls: List[str]
	):
	# This is necessary below when we're untarring files. It should trivially
	# always be the case, and assuming it makes testing easier.
	assert download_dir.is_absolute(), download_dir

	unpack_dir.mkdir()
	download_dir.mkdir()

	logging.info(
	"Fetching and unpacking %d -Werror reports; this may take a bit",
	len(gs_urls),
	)
	# Run the download in a threadpool since we can have >100 logs, and all of
	# this is heavily I/O-bound.
	# Max 8 downloads at a time is arbitrary, but should minimize the chance of
	# rate-limiting. Don't limit `tar xaf`, since those should be short-lived.
	download_limiter = threading.BoundedSemaphore(8)

	def download_one_url(
	unpack_dir: Path, download_dir: Path, gs_url: str
	) -> Optional[subprocess.CalledProcessError]:
	"""Downloads and unpacks -Werror logs from the given gs_url.

	Leaves the tarball in `download_dir`, and the unpacked version in
	`unpack_dir`.

	Returns:
	None if all went well; otherwise, returns the command that failed.
	All commands have stderr data piped in.
	"""
	file_targ = download_dir / os.path.basename(gs_url)
	try:
	with download_limiter:
	subprocess.run(
	["gsutil", "cp", gs_url, file_targ],
	check=True,
	stdin=subprocess.DEVNULL,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.PIPE,
	encoding="utf-8",
	errors="replace",
	)

	# N.B., file_targ is absolute, so running with `file_targ` while
	# changing `cwd` is safe.
	subprocess.run(
	["tar", "xaf", file_targ],
	check=True,
	cwd=unpack_dir,
	stdin=subprocess.DEVNULL,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.PIPE,
	encoding="utf-8",
	errors="replace",
	)
	except subprocess.CalledProcessError as e:
	return e
	return None

	with multiprocessing.pool.ThreadPool() as thread_pool:
	download_futures = []
	for gs_url in gs_urls:
	name = cq_builder_name_from_werror_logs_path(gs_url)
	unpack_to = unpack_dir / name
	unpack_to.mkdir()
	download_to = download_dir / name
	download_to.mkdir()
	download_futures.append(
	(
	name,
	thread_pool.apply_async(
	download_one_url, (unpack_to, download_to, gs_url)
	),
	)
	)

	num_failures = 0
	for name, future in download_futures:
	result = future.get()
	if not result:
	continue

	num_failures += 1
	logging.error(
	"Downloading %s failed: running %r. Stderr: %r",
	name,
	result.cmd,
	result.stderr,
	)
	if num_failures:
	raise ValueError(f"{num_failures} download(s) failed.")


	def fetch_cq_reports(opts: argparse.Namespace) -> None:
	if opts.cl:
	logging.info(
	"Fetching most recent completed CQ orchestrator from %s", opts.cl
	)
	all_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl)
	if not all_ids:
	raise ValueError(
	f"No CQ orchestrators found under {opts.cl}. See --help for "
	"how to pass a build ID directly."
	)
	# Note that these cq-orchestrator runs are returned in oldest-to-newest
	# order. The user probably wants the newest run.
	cq_orchestrator_id = all_ids[-1]
	cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)
	logging.info("Checking CQ run %s", cq_orchestrator_url)
	else:
	cq_orchestrator_id = opts.cq_orchestrator_id
	cq_orchestrator_url = cros_cls.builder_url(cq_orchestrator_id)

	# This is the earliest point at which we can compute this directory with
	# certainty. Figure it out now and fail early if it exists.
	output_directory = opts.directory
	if not output_directory:
	output_directory = _DEFAULT_FETCH_DIRECTORY / str(cq_orchestrator_id)

	if output_directory.exists():
	if not opts.force:
	sys.exit(
	f"Directory at {output_directory} exists; not overwriting. "
	"Pass --force to overwrite."
	)
	# Actually _remove_ it when we have all logs unpacked and are able to
	# create the output directory with confidence.

	logging.info("Fetching info on child builders of %s", cq_orchestrator_url)
	child_builders = cros_cls.CQOrchestratorOutput.fetch(
	cq_orchestrator_id
	).child_builders
	if not child_builders:
	raise ValueError(f"No child builders found for {cq_orchestrator_url}")

	logging.info(
	"%d child builders found; finding associated tarball links",
	len(child_builders),
	)
	werror_links = fetch_werror_tarball_links(child_builders)
	if not werror_links:
	raise ValueError(
	f"No -Werror logs found in children of {cq_orchestrator_url}"
	)

	logging.info("%d -Werror logs found", len(werror_links))
	with tempfile.TemporaryDirectory("werror_logs_fetch_cq") as t:
	tempdir = Path(t)
	unpack_dir = tempdir / "unpacked"
	download_and_unpack_werror_tarballs(
	unpack_dir=unpack_dir,
	download_dir=tempdir / "tarballs",
	gs_urls=werror_links,
	)

	if output_directory.exists():
	logging.info("Removing output directory at %s", output_directory)
	shutil.rmtree(output_directory)
	output_directory.parent.mkdir(parents=True, exist_ok=True)
	# (Convert these to strs to keep mypy happy.)
	shutil.move(str(unpack_dir), str(output_directory))
	logging.info(
	"CQ logs from %s stored in %s",
	cq_orchestrator_url,
	output_directory,
	)


	def main(argv: List[str]) -> None:
	parser = argparse.ArgumentParser(
	description=__doc__,
	formatter_class=argparse.RawDescriptionHelpFormatter,
	)
	parser.add_argument(
	"--debug", action="store_true", help="Enable debug logging"
	)
	subparsers = parser.add_subparsers(required=True)
	# b/318833638: While there's only one subparser here for the moment, more
	# are expected to come (specifically, one to download logs from a CQ run).
	aggregate = subparsers.add_parser(
	"aggregate",
	help="""
	Aggregate all -Werror reports beneath a directory. Note that this will
	traverse all children of the directory, so can be used either on
	unpacked -Werror reports from CQ builders, or can be used on e.g.,
	/build/cherry/var/lib/chromeos.
	""",
	)
	aggregate.set_defaults(func=aggregate_reports)
	aggregate.add_argument(
	"--canonicalize-board-roots",
	action="store_true",
	help="""
	Converts warnings paths starting with a board root (e.g., /build/atlas)
	to a form consistent across many boards.
	""",
	)
	aggregate.add_argument(
	"--directory", type=Path, required=True, help="Directory to inspect."
	)

	fetch_cq = subparsers.add_parser(
	"fetch-cq",
	help="Fetch all -Werror reports for a CQ run.",
	)
	fetch_cq.set_defaults(func=fetch_cq_reports)
	cl_or_cq_orchestrator = fetch_cq.add_mutually_exclusive_group(required=True)
	cl_or_cq_orchestrator.add_argument(
	"--cl",
	type=cros_cls.ChangeListURL.parse_with_patch_set,
	help="Link to a CL to get the most recent cq-orchestrator from",
	)
	cl_or_cq_orchestrator.add_argument(
	"--cq-orchestrator-id",
	type=cros_cls.BuildID,
	help="""
	Build number for a cq-orchestrator run. Builders invoked by this are
	examined for -Werror logs.
	""",
	)
	fetch_cq.add_argument(
	"--directory",
	type=Path,
	help=f"""
	Directory to put downloaded -Werror logs in. Default is a subdirectory
	of {_DEFAULT_FETCH_DIRECTORY}.
	""",
	)
	fetch_cq.add_argument(
	"-f",
	"--force",
	action="store_true",
	help="Remove the directory at `--directory` if it exists",
	)

	opts = parser.parse_args(argv)

	logging.basicConfig(
	format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
	"%(message)s",
	level=logging.DEBUG if opts.debug else logging.INFO,
	)

	assert getattr(opts, "func", None), "Unknown subcommand?"
	opts.func(opts)


	if __name__ == "__main__":
	main(sys.argv[1:])