infra/base-images/base-builder/bisect_clang.py - platform/external/oss-fuzz - Git at Google

 #!/usr/bin/env python3
 # Copyright 2019 Google Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 ################################################################################
 """Use git bisect to find the Clang/LLVM commit causing a regression."""

 import logging
 import os
 import re
 import shutil
 import subprocess
 import sys


 def execute(command, *args, expect_zero=True, **kwargs):
   """Execute |command| and return the returncode, stdout and stderr."""
   kwargs['stdout'] = subprocess.PIPE
   kwargs['stderr'] = subprocess.PIPE
   logging.debug('Running command: "%s"', str(command))
   process = subprocess.Popen(command, *args, **kwargs)
   stdout, stderr = process.communicate()
   stdout = stdout.decode('utf-8')
   stderr = stderr.decode('utf-8')
   retcode = process.returncode
   logging.info('Command: "%s" returned: %d.\nStdout: %s.\nStderr: %s',
                str(command), retcode, stdout, stderr)
   if expect_zero and retcode != 0:
     raise subprocess.CalledProcessError(retcode, command)
   return retcode, stdout, stderr


 def search_bisect_output(output):
   """Search |output| for a message indicating the culprit commit has been
   found."""
   # TODO(metzman): Is it necessary to look for "good"?
   culprit_regex = re.compile('([a-z0-9]{40}) is the first (good|bad) commit')
   match = re.match(culprit_regex, output)
   return match.group(1) if match is not None else None


 class GitRepo:
   """Class for executing commmands on a git repo."""

   def __init__(self, repo_dir):
     self.repo_dir = repo_dir

   def do_command(self, git_subcommand):
     """Execute a |git_subcommand| (a list of strings)."""
     command = ['git', '-C', self.repo_dir] + git_subcommand
     return execute(command)

   def test_commit(self, test_command):
     """Build LLVM at the currently checkedout commit, then run |test_command|.
     If returncode is 0 run 'git bisect good' otherwise return 'git bisect bad'.
     Return None if bisect didn't finish yet. Return the culprit commit if it
     does."""
     build_clang(self.repo_dir)
     retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
     if retcode == 0:
       retcode, stdout, _ = self.do_bisect_command('good')
     else:
       retcode, stdout, _ = self.do_bisect_command('bad')
     return search_bisect_output(stdout)

   def bisect(self, good_commit, bad_commit, test_command):
     """Do git bisect assuming |good_commit| is good, |bad_commit| is bad and
     |test_command| is an oracle. Return the culprit commit."""
     self.bisect_start(good_commit, bad_commit, test_command)
     result = self.test_commit(test_command)
     while result is None:
       result = self.test_commit(test_command)
     return result

   def bisect_start(self, good_commit, bad_commit, test_command):
     """Start doing git bisect."""
     self.do_bisect_command('start')
     # Do bad commit first since it is more likely to be recent.
     self.test_start_commit(bad_commit, 'bad', test_command)
     self.test_start_commit(good_commit, 'good', test_command)

   def do_bisect_command(self, subcommand):
     """Execute a git bisect |subcommand| (string) and return the result."""
     return self.do_command(['bisect', subcommand])

   def test_start_commit(self, commit, label, test_command):
     """Use |test_command| to test the first good or bad |commit| (depending on
     |label|)."""
     assert label in ('good', 'bad'), label
     self.do_command(['checkout', commit])
     build_clang(self.repo_dir)
     retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
     if label == 'good' and retcode != 0:
       raise BisectError('Test command "%s" returns %d on first good commit %s' %
                         (test_command, retcode, commit))
     if label == 'bad' and retcode == 0:
       raise BisectError('Test command "%s" returns %d on first bad commit %s' %
                         (test_command, retcode, commit))

     self.do_bisect_command(label)


 class BisectError(Exception):
   """Error that was encountered during bisection."""


 def get_clang_build_env():
   """Get an environment for building Clang."""
   env = os.environ.copy()
   for variable in ['CXXFLAGS', 'CFLAGS']:
     if variable in env:
       del env[variable]
   return env


 def install_clang_build_deps():
   """Instal dependencies necessary to build clang."""
   execute([
       'apt-get', 'install', '-y', 'build-essential', 'make', 'cmake',
       'ninja-build', 'git', 'subversion', 'g++-multilib'
   ])


 def clone_with_retries(repo, local_path, num_retries=10):
   """Clone |repo| to |local_path| if it doesn't exist already. Try up to
   |num_retries| times. Return False if unable to checkout."""
   if os.path.isdir(local_path):
     return
   for _ in range(num_retries):
     if os.path.isdir(local_path):
       shutil.rmtree(local_path)
     retcode, _, _ = execute(['git', 'clone', repo, local_path],
                             expect_zero=False)
     if retcode == 0:
       return
   raise Exception('Could not checkout %s.' % repo)


 def get_clang_target_arch():
   """Get target architecture we want clang to target when we build it."""
   _, arch, _ = execute(['uname', '-m'])
   if 'x86_64' in arch:
     return 'X86'
   if 'aarch64' in arch:
     return 'AArch64'
   raise Exception('Unsupported target: %s.' % arch)


 def prepare_build(llvm_project_path):
   """Prepare to build clang."""
   llvm_build_dir = os.path.join(os.getenv('WORK'), 'llvm-build')
   if not os.path.exists(llvm_build_dir):
     os.mkdir(llvm_build_dir)
   execute([
       'cmake', '-G', 'Ninja', '-DLIBCXX_ENABLE_SHARED=OFF',
       '-DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON', '-DLIBCXXABI_ENABLE_SHARED=OFF',
       '-DCMAKE_BUILD_TYPE=Release',
       '-DLLVM_ENABLE_PROJECTS=libcxx;libcxxabi;compiler-rt;clang',
       '-DLLVM_TARGETS_TO_BUILD=' + get_clang_target_arch(),
       os.path.join(llvm_project_path, 'llvm')
   ],
           env=get_clang_build_env(),
           cwd=llvm_build_dir)
   return llvm_build_dir


 def build_clang(llvm_project_path):
   """Checkout, build and install Clang."""
   # TODO(metzman): Merge Python checkout and build code with
   # checkout_build_install_llvm.sh.
   # TODO(metzman): Look into speeding this process using ccache.
   # TODO(metzman): Make this program capable of handling MSAN and i386 Clang
   # regressions.
   llvm_build_dir = prepare_build(llvm_project_path)
   execute(['ninja', '-C', llvm_build_dir, 'install'], env=get_clang_build_env())


 def find_culprit_commit(test_command, good_commit, bad_commit):
   """Returns the culprit LLVM commit that introduced a bug revealed by running
   |test_command|. Uses git bisect and treats |good_commit| as the first latest
    known good commit and |bad_commit| as the first known bad commit."""
   llvm_project_path = os.path.join(os.getenv('SRC'), 'llvm-project')
   clone_with_retries('https://github.com/llvm/llvm-project.git',
                      llvm_project_path)
   git_repo = GitRepo(llvm_project_path)
   result = git_repo.bisect(good_commit, bad_commit, test_command)
   print('Culprit commit', result)
   return result


 def main():
   # pylint: disable=line-too-long
   """Finds the culprit LLVM commit that introduced a clang regression.
   Can be tested using this command in a libsodium shell:
   python3 bisect_clang.py "cd /src/libsodium; make clean; cd -; compile && /out/secret_key_auth_fuzzer -runs=100" \
                           f7e52fbdb5a7af8ea0808e98458b497125a5eca1 \
                           8288453f6aac05080b751b680455349e09d49825
   """
   # pylint: enable=line-too-long
   # TODO(metzman): Check CFLAGS for things like -fsanitize=fuzzer-no-link.
   # TODO(metzman): Allow test_command to be optional and for just build.sh to be
   # used instead.
   test_command = sys.argv[1]
   # TODO(metzman): Add in more automation so that the script can automatically
   # determine the commits used in last Clang roll.
   good_commit = sys.argv[2]
   bad_commit = sys.argv[3]
   # TODO(metzman): Make verbosity configurable.
   logging.getLogger().setLevel(logging.DEBUG)
   install_clang_build_deps()
   find_culprit_commit(test_command, good_commit, bad_commit)
   return 0


 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env python3
	# Copyright 2019 Google Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	################################################################################
	"""Use git bisect to find the Clang/LLVM commit causing a regression."""

	import logging
	import os
	import re
	import shutil
	import subprocess
	import sys


	def execute(command, args, expect_zero=True, *kwargs):
	"""Execute \|command\| and return the returncode, stdout and stderr."""
	kwargs['stdout'] = subprocess.PIPE
	kwargs['stderr'] = subprocess.PIPE
	logging.debug('Running command: "%s"', str(command))
	process = subprocess.Popen(command, args, *kwargs)
	stdout, stderr = process.communicate()
	stdout = stdout.decode('utf-8')
	stderr = stderr.decode('utf-8')
	retcode = process.returncode
	logging.info('Command: "%s" returned: %d.\nStdout: %s.\nStderr: %s',
	str(command), retcode, stdout, stderr)
	if expect_zero and retcode != 0:
	raise subprocess.CalledProcessError(retcode, command)
	return retcode, stdout, stderr


	def search_bisect_output(output):
	"""Search \|output\| for a message indicating the culprit commit has been
	found."""
	# TODO(metzman): Is it necessary to look for "good"?
	culprit_regex = re.compile('([a-z0-9]{40}) is the first (good\|bad) commit')
	match = re.match(culprit_regex, output)
	return match.group(1) if match is not None else None


	class GitRepo:
	"""Class for executing commmands on a git repo."""

	def __init__(self, repo_dir):
	self.repo_dir = repo_dir

	def do_command(self, git_subcommand):
	"""Execute a \|git_subcommand\| (a list of strings)."""
	command = ['git', '-C', self.repo_dir] + git_subcommand
	return execute(command)

	def test_commit(self, test_command):
	"""Build LLVM at the currently checkedout commit, then run \|test_command\|.
	If returncode is 0 run 'git bisect good' otherwise return 'git bisect bad'.
	Return None if bisect didn't finish yet. Return the culprit commit if it
	does."""
	build_clang(self.repo_dir)
	retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
	if retcode == 0:
	retcode, stdout, _ = self.do_bisect_command('good')
	else:
	retcode, stdout, _ = self.do_bisect_command('bad')
	return search_bisect_output(stdout)

	def bisect(self, good_commit, bad_commit, test_command):
	"""Do git bisect assuming \|good_commit\| is good, \|bad_commit\| is bad and
	\|test_command\| is an oracle. Return the culprit commit."""
	self.bisect_start(good_commit, bad_commit, test_command)
	result = self.test_commit(test_command)
	while result is None:
	result = self.test_commit(test_command)
	return result

	def bisect_start(self, good_commit, bad_commit, test_command):
	"""Start doing git bisect."""
	self.do_bisect_command('start')
	# Do bad commit first since it is more likely to be recent.
	self.test_start_commit(bad_commit, 'bad', test_command)
	self.test_start_commit(good_commit, 'good', test_command)

	def do_bisect_command(self, subcommand):
	"""Execute a git bisect \|subcommand\| (string) and return the result."""
	return self.do_command(['bisect', subcommand])

	def test_start_commit(self, commit, label, test_command):
	"""Use \|test_command\| to test the first good or bad \|commit\| (depending on
	\|label\|)."""
	assert label in ('good', 'bad'), label
	self.do_command(['checkout', commit])
	build_clang(self.repo_dir)
	retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
	if label == 'good' and retcode != 0:
	raise BisectError('Test command "%s" returns %d on first good commit %s' %
	(test_command, retcode, commit))
	if label == 'bad' and retcode == 0:
	raise BisectError('Test command "%s" returns %d on first bad commit %s' %
	(test_command, retcode, commit))

	self.do_bisect_command(label)


	class BisectError(Exception):
	"""Error that was encountered during bisection."""


	def get_clang_build_env():
	"""Get an environment for building Clang."""
	env = os.environ.copy()
	for variable in ['CXXFLAGS', 'CFLAGS']:
	if variable in env:
	del env[variable]
	return env


	def install_clang_build_deps():
	"""Instal dependencies necessary to build clang."""
	execute([
	'apt-get', 'install', '-y', 'build-essential', 'make', 'cmake',
	'ninja-build', 'git', 'subversion', 'g++-multilib'
	])


	def clone_with_retries(repo, local_path, num_retries=10):
	"""Clone \|repo\| to \|local_path\| if it doesn't exist already. Try up to
	\|num_retries\| times. Return False if unable to checkout."""
	if os.path.isdir(local_path):
	return
	for _ in range(num_retries):
	if os.path.isdir(local_path):
	shutil.rmtree(local_path)
	retcode, _, _ = execute(['git', 'clone', repo, local_path],
	expect_zero=False)
	if retcode == 0:
	return
	raise Exception('Could not checkout %s.' % repo)


	def get_clang_target_arch():
	"""Get target architecture we want clang to target when we build it."""
	_, arch, _ = execute(['uname', '-m'])
	if 'x86_64' in arch:
	return 'X86'
	if 'aarch64' in arch:
	return 'AArch64'
	raise Exception('Unsupported target: %s.' % arch)


	def prepare_build(llvm_project_path):
	"""Prepare to build clang."""
	llvm_build_dir = os.path.join(os.getenv('WORK'), 'llvm-build')
	if not os.path.exists(llvm_build_dir):
	os.mkdir(llvm_build_dir)
	execute([
	'cmake', '-G', 'Ninja', '-DLIBCXX_ENABLE_SHARED=OFF',
	'-DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON', '-DLIBCXXABI_ENABLE_SHARED=OFF',
	'-DCMAKE_BUILD_TYPE=Release',
	'-DLLVM_ENABLE_PROJECTS=libcxx;libcxxabi;compiler-rt;clang',
	'-DLLVM_TARGETS_TO_BUILD=' + get_clang_target_arch(),
	os.path.join(llvm_project_path, 'llvm')
	],
	env=get_clang_build_env(),
	cwd=llvm_build_dir)
	return llvm_build_dir


	def build_clang(llvm_project_path):
	"""Checkout, build and install Clang."""
	# TODO(metzman): Merge Python checkout and build code with
	# checkout_build_install_llvm.sh.
	# TODO(metzman): Look into speeding this process using ccache.
	# TODO(metzman): Make this program capable of handling MSAN and i386 Clang
	# regressions.
	llvm_build_dir = prepare_build(llvm_project_path)
	execute(['ninja', '-C', llvm_build_dir, 'install'], env=get_clang_build_env())


	def find_culprit_commit(test_command, good_commit, bad_commit):
	"""Returns the culprit LLVM commit that introduced a bug revealed by running
	\|test_command\|. Uses git bisect and treats \|good_commit\| as the first latest
	known good commit and \|bad_commit\| as the first known bad commit."""
	llvm_project_path = os.path.join(os.getenv('SRC'), 'llvm-project')
	clone_with_retries('https://github.com/llvm/llvm-project.git',
	llvm_project_path)
	git_repo = GitRepo(llvm_project_path)
	result = git_repo.bisect(good_commit, bad_commit, test_command)
	print('Culprit commit', result)
	return result


	def main():
	# pylint: disable=line-too-long
	"""Finds the culprit LLVM commit that introduced a clang regression.
	Can be tested using this command in a libsodium shell:
	python3 bisect_clang.py "cd /src/libsodium; make clean; cd -; compile && /out/secret_key_auth_fuzzer -runs=100" \
	f7e52fbdb5a7af8ea0808e98458b497125a5eca1 \
	8288453f6aac05080b751b680455349e09d49825
	"""
	# pylint: enable=line-too-long
	# TODO(metzman): Check CFLAGS for things like -fsanitize=fuzzer-no-link.
	# TODO(metzman): Allow test_command to be optional and for just build.sh to be
	# used instead.
	test_command = sys.argv[1]
	# TODO(metzman): Add in more automation so that the script can automatically
	# determine the commits used in last Clang roll.
	good_commit = sys.argv[2]
	bad_commit = sys.argv[3]
	# TODO(metzman): Make verbosity configurable.
	logging.getLogger().setLevel(logging.DEBUG)
	install_clang_build_deps()
	find_culprit_commit(test_command, good_commit, bad_commit)
	return 0


	if __name__ == '__main__':
	sys.exit(main())